xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderMiscTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 The Khronos Group Inc.
6  * Copyright (c) 2021 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Mesh Shader Misc Tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMeshShaderMiscTests.hpp"
26 #include "vktMeshShaderUtil.hpp"
27 #include "vktTestCase.hpp"
28 #include "vktTestCaseUtil.hpp"
29 
30 #include "vkBuilderUtil.hpp"
31 #include "vkImageWithMemory.hpp"
32 #include "vkBufferWithMemory.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkImageUtil.hpp"
37 #include "vkBarrierUtil.hpp"
38 
39 #include "tcuImageCompare.hpp"
40 #include "tcuTexture.hpp"
41 #include "tcuTextureUtil.hpp"
42 #include "tcuMaybe.hpp"
43 #include "tcuStringTemplate.hpp"
44 #include "tcuTestLog.hpp"
45 
46 #include "deRandom.hpp"
47 
48 #include <cstdint>
49 #include <memory>
50 #include <utility>
51 #include <vector>
52 #include <string>
53 #include <sstream>
54 #include <map>
55 #include <limits>
56 
57 namespace vkt
58 {
59 namespace MeshShader
60 {
61 
62 namespace
63 {
64 
65 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
66 
67 using namespace vk;
68 
69 // Output images will use this format.
getOutputFormat()70 VkFormat getOutputFormat()
71 {
72     return VK_FORMAT_R8G8B8A8_UNORM;
73 }
74 
75 // Threshold that's reasonable for the previous format.
getCompareThreshold()76 float getCompareThreshold()
77 {
78     return 0.005f; // 1/256 < 0.005 < 2/256
79 }
80 
81 // Check mesh shader support.
genericCheckSupport(Context & context,bool requireTaskShader,bool requireVertexStores)82 void genericCheckSupport(Context &context, bool requireTaskShader, bool requireVertexStores)
83 {
84     checkTaskMeshShaderSupportNV(context, requireTaskShader, true);
85 
86     if (requireVertexStores)
87     {
88         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
89     }
90 }
91 
92 struct MiscTestParams
93 {
94     tcu::Maybe<uint32_t> taskCount;
95     uint32_t meshCount;
96 
97     uint32_t width;
98     uint32_t height;
99 
MiscTestParamsvkt::MeshShader::__anon5941fe3f0111::MiscTestParams100     MiscTestParams(const tcu::Maybe<uint32_t> &taskCount_, uint32_t meshCount_, uint32_t width_, uint32_t height_)
101         : taskCount(taskCount_)
102         , meshCount(meshCount_)
103         , width(width_)
104         , height(height_)
105     {
106     }
107 
108     // Makes the class polymorphic and allows the right destructor to be used for subclasses.
~MiscTestParamsvkt::MeshShader::__anon5941fe3f0111::MiscTestParams109     virtual ~MiscTestParams()
110     {
111     }
112 
needsTaskShadervkt::MeshShader::__anon5941fe3f0111::MiscTestParams113     bool needsTaskShader() const
114     {
115         return static_cast<bool>(taskCount);
116     }
117 
drawCountvkt::MeshShader::__anon5941fe3f0111::MiscTestParams118     uint32_t drawCount() const
119     {
120         if (needsTaskShader())
121             return taskCount.get();
122         return meshCount;
123     }
124 };
125 
126 using ParamsPtr = std::unique_ptr<MiscTestParams>;
127 
128 class MeshShaderMiscCase : public vkt::TestCase
129 {
130 public:
131     MeshShaderMiscCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params);
~MeshShaderMiscCase(void)132     virtual ~MeshShaderMiscCase(void)
133     {
134     }
135 
136     void checkSupport(Context &context) const override;
137     void initPrograms(vk::SourceCollections &programCollection) const override;
138 
139 protected:
140     std::unique_ptr<MiscTestParams> m_params;
141 };
142 
MeshShaderMiscCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)143 MeshShaderMiscCase::MeshShaderMiscCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
144     : vkt::TestCase(testCtx, name)
145     , m_params(params.release())
146 {
147 }
148 
checkSupport(Context & context) const149 void MeshShaderMiscCase::checkSupport(Context &context) const
150 {
151     genericCheckSupport(context, m_params->needsTaskShader(), /*requireVertexStores*/ false);
152 }
153 
154 // Adds the generic fragment shader. To be called by subclasses.
initPrograms(vk::SourceCollections & programCollection) const155 void MeshShaderMiscCase::initPrograms(vk::SourceCollections &programCollection) const
156 {
157     std::string frag = "#version 450\n"
158                        "#extension GL_NV_mesh_shader : enable\n"
159                        "\n"
160                        "layout (location=0) in perprimitiveNV vec4 primitiveColor;\n"
161                        "layout (location=0) out vec4 outColor;\n"
162                        "\n"
163                        "void main ()\n"
164                        "{\n"
165                        "    outColor = primitiveColor;\n"
166                        "}\n";
167     programCollection.glslSources.add("frag") << glu::FragmentSource(frag);
168 }
169 
170 class MeshShaderMiscInstance : public vkt::TestInstance
171 {
172 public:
MeshShaderMiscInstance(Context & context,const MiscTestParams * params)173     MeshShaderMiscInstance(Context &context, const MiscTestParams *params)
174         : vkt::TestInstance(context)
175         , m_params(params)
176         , m_referenceLevel()
177     {
178     }
179 
180     void generateSolidRefLevel(const tcu::Vec4 &color, std::unique_ptr<tcu::TextureLevel> &output);
181     virtual void generateReferenceLevel() = 0;
182 
183     virtual bool verifyResult(const tcu::ConstPixelBufferAccess &resultAccess,
184                               const tcu::TextureLevel &referenceLevel) const;
185     virtual bool verifyResult(const tcu::ConstPixelBufferAccess &resultAccess) const;
186     tcu::TestStatus iterate() override;
187 
188 protected:
189     const MiscTestParams *m_params;
190     std::unique_ptr<tcu::TextureLevel> m_referenceLevel;
191 };
192 
generateSolidRefLevel(const tcu::Vec4 & color,std::unique_ptr<tcu::TextureLevel> & output)193 void MeshShaderMiscInstance::generateSolidRefLevel(const tcu::Vec4 &color, std::unique_ptr<tcu::TextureLevel> &output)
194 {
195     const auto format    = getOutputFormat();
196     const auto tcuFormat = mapVkFormat(format);
197 
198     const auto iWidth  = static_cast<int>(m_params->width);
199     const auto iHeight = static_cast<int>(m_params->height);
200 
201     output.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
202 
203     const auto access = output->getAccess();
204 
205     // Fill with solid color.
206     tcu::clear(access, color);
207 }
208 
verifyResult(const tcu::ConstPixelBufferAccess & resultAccess) const209 bool MeshShaderMiscInstance::verifyResult(const tcu::ConstPixelBufferAccess &resultAccess) const
210 {
211     return verifyResult(resultAccess, *m_referenceLevel);
212 }
213 
verifyResult(const tcu::ConstPixelBufferAccess & resultAccess,const tcu::TextureLevel & referenceLevel) const214 bool MeshShaderMiscInstance::verifyResult(const tcu::ConstPixelBufferAccess &resultAccess,
215                                           const tcu::TextureLevel &referenceLevel) const
216 {
217     const auto referenceAccess = referenceLevel.getAccess();
218 
219     const auto refWidth  = referenceAccess.getWidth();
220     const auto refHeight = referenceAccess.getHeight();
221     const auto refDepth  = referenceAccess.getDepth();
222 
223     const auto resWidth  = resultAccess.getWidth();
224     const auto resHeight = resultAccess.getHeight();
225     const auto resDepth  = resultAccess.getDepth();
226 
227     DE_ASSERT(resWidth == refWidth || resHeight == refHeight || resDepth == refDepth);
228 
229     // For release builds.
230     DE_UNREF(refWidth);
231     DE_UNREF(refHeight);
232     DE_UNREF(refDepth);
233     DE_UNREF(resWidth);
234     DE_UNREF(resHeight);
235     DE_UNREF(resDepth);
236 
237     const auto outputFormat   = getOutputFormat();
238     const auto expectedFormat = mapVkFormat(outputFormat);
239     const auto resFormat      = resultAccess.getFormat();
240     const auto refFormat      = referenceAccess.getFormat();
241 
242     DE_ASSERT(resFormat == expectedFormat && refFormat == expectedFormat);
243 
244     // For release builds
245     DE_UNREF(expectedFormat);
246     DE_UNREF(resFormat);
247     DE_UNREF(refFormat);
248 
249     auto &log            = m_context.getTestContext().getLog();
250     const auto threshold = getCompareThreshold();
251     const tcu::Vec4 thresholdVec(threshold, threshold, threshold, threshold);
252 
253     return tcu::floatThresholdCompare(log, "Result", "", referenceAccess, resultAccess, thresholdVec,
254                                       tcu::COMPARE_LOG_ON_ERROR);
255 }
256 
iterate()257 tcu::TestStatus MeshShaderMiscInstance::iterate()
258 {
259     const auto &vkd       = m_context.getDeviceInterface();
260     const auto device     = m_context.getDevice();
261     auto &alloc           = m_context.getDefaultAllocator();
262     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
263     const auto queue      = m_context.getUniversalQueue();
264 
265     const auto imageFormat = getOutputFormat();
266     const auto tcuFormat   = mapVkFormat(imageFormat);
267     const auto imageExtent = makeExtent3D(m_params->width, m_params->height, 1u);
268     const auto imageUsage  = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
269 
270     const VkImageCreateInfo colorBufferInfo = {
271         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
272         nullptr,                             // const void* pNext;
273         0u,                                  // VkImageCreateFlags flags;
274         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
275         imageFormat,                         // VkFormat format;
276         imageExtent,                         // VkExtent3D extent;
277         1u,                                  // uint32_t mipLevels;
278         1u,                                  // uint32_t arrayLayers;
279         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
280         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
281         imageUsage,                          // VkImageUsageFlags usage;
282         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
283         0u,                                  // uint32_t queueFamilyIndexCount;
284         nullptr,                             // const uint32_t* pQueueFamilyIndices;
285         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
286     };
287 
288     // Create color image and view.
289     ImageWithMemory colorImage(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
290     const auto colorSRR  = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
291     const auto colorSRL  = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
292     const auto colorView = makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
293 
294     // Create a memory buffer for verification.
295     const auto verificationBufferSize =
296         static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
297     const auto verificationBufferUsage = (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
298     const auto verificationBufferInfo  = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
299 
300     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
301     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
302     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
303 
304     // Pipeline layout.
305     const auto pipelineLayout = makePipelineLayout(vkd, device);
306 
307     // Shader modules.
308     const auto &binaries = m_context.getBinaryCollection();
309     const auto hasTask   = binaries.contains("task");
310 
311     const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
312     const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
313 
314     Move<VkShaderModule> taskShader;
315     if (hasTask)
316         taskShader = createShaderModule(vkd, device, binaries.get("task"));
317 
318     // Render pass.
319     const auto renderPass = makeRenderPass(vkd, device, imageFormat);
320 
321     // Framebuffer.
322     const auto framebuffer =
323         makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
324 
325     // Viewport and scissor.
326     const std::vector<VkViewport> viewports(1u, makeViewport(imageExtent));
327     const std::vector<VkRect2D> scissors(1u, makeRect2D(imageExtent));
328 
329     // Color blending.
330     const auto colorWriteMask =
331         (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT);
332     const VkPipelineColorBlendAttachmentState blendAttState = {
333         VK_TRUE,             // VkBool32 blendEnable;
334         VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
335         VK_BLEND_FACTOR_ONE, // VkBlendFactor dstColorBlendFactor;
336         VK_BLEND_OP_ADD,     // VkBlendOp colorBlendOp;
337         VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
338         VK_BLEND_FACTOR_ONE, // VkBlendFactor dstAlphaBlendFactor;
339         VK_BLEND_OP_ADD,     // VkBlendOp alphaBlendOp;
340         colorWriteMask,      // VkColorComponentFlags colorWriteMask;
341     };
342 
343     const VkPipelineColorBlendStateCreateInfo colorBlendInfo = {
344         VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
345         nullptr,                                                  // const void* pNext;
346         0u,                                                       // VkPipelineColorBlendStateCreateFlags flags;
347         VK_FALSE,                                                 // VkBool32 logicOpEnable;
348         VK_LOGIC_OP_OR,                                           // VkLogicOp logicOp;
349         1u,                                                       // uint32_t attachmentCount;
350         &blendAttState,           // const VkPipelineColorBlendAttachmentState* pAttachments;
351         {0.0f, 0.0f, 0.0f, 0.0f}, // float blendConstants[4];
352     };
353 
354     const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(),
355                                                fragShader.get(), renderPass.get(), viewports, scissors, 0u /*subpass*/,
356                                                nullptr, nullptr, nullptr, &colorBlendInfo);
357 
358     // Command pool and buffer.
359     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
360     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
361     const auto cmdBuffer    = cmdBufferPtr.get();
362 
363     beginCommandBuffer(vkd, cmdBuffer);
364 
365     // Run pipeline.
366     const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 0.0f);
367     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
368     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
369     vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params->drawCount(), 0u);
370     endRenderPass(vkd, cmdBuffer);
371 
372     // Copy color buffer to verification buffer.
373     const auto colorAccess   = (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
374     const auto transferRead  = VK_ACCESS_TRANSFER_READ_BIT;
375     const auto transferWrite = VK_ACCESS_TRANSFER_WRITE_BIT;
376     const auto hostRead      = VK_ACCESS_HOST_READ_BIT;
377 
378     const auto preCopyBarrier =
379         makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
380                                VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
381     const auto postCopyBarrier = makeMemoryBarrier(transferWrite, hostRead);
382     const auto copyRegion      = makeBufferImageCopy(imageExtent, colorSRL);
383 
384     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
385                            0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
386     vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
387                              verificationBuffer.get(), 1u, &copyRegion);
388     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
389                            &postCopyBarrier, 0u, nullptr, 0u, nullptr);
390 
391     endCommandBuffer(vkd, cmdBuffer);
392     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
393 
394     // Generate reference image and compare results.
395     const tcu::IVec3 iExtent(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
396     const tcu::ConstPixelBufferAccess verificationAccess(tcuFormat, iExtent, verificationBufferData);
397 
398     generateReferenceLevel();
399     invalidateAlloc(vkd, device, verificationBufferAlloc);
400     if (!verifyResult(verificationAccess))
401         TCU_FAIL("Result does not match reference; check log for details");
402 
403     return tcu::TestStatus::pass("Pass");
404 }
405 
406 // Verify passing more complex data between the task and mesh shaders.
407 class ComplexTaskDataCase : public MeshShaderMiscCase
408 {
409 public:
ComplexTaskDataCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)410     ComplexTaskDataCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
411         : MeshShaderMiscCase(testCtx, name, std::move(params))
412     {
413     }
414 
415     void initPrograms(vk::SourceCollections &programCollection) const override;
416     TestInstance *createInstance(Context &context) const override;
417 };
418 
419 class ComplexTaskDataInstance : public MeshShaderMiscInstance
420 {
421 public:
ComplexTaskDataInstance(Context & context,const MiscTestParams * params)422     ComplexTaskDataInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
423     {
424     }
425 
426     void generateReferenceLevel() override;
427 };
428 
generateReferenceLevel()429 void ComplexTaskDataInstance::generateReferenceLevel()
430 {
431     const auto format    = getOutputFormat();
432     const auto tcuFormat = mapVkFormat(format);
433 
434     const auto iWidth  = static_cast<int>(m_params->width);
435     const auto iHeight = static_cast<int>(m_params->height);
436 
437     const auto halfWidth  = iWidth / 2;
438     const auto halfHeight = iHeight / 2;
439 
440     m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
441 
442     const auto access = m_referenceLevel->getAccess();
443 
444     // Each image quadrant gets a different color.
445     for (int y = 0; y < iHeight; ++y)
446         for (int x = 0; x < iWidth; ++x)
447         {
448             const float red     = ((y < halfHeight) ? 0.0f : 1.0f);
449             const float green   = ((x < halfWidth) ? 0.0f : 1.0f);
450             const auto refColor = tcu::Vec4(red, green, 1.0f, 1.0f);
451             access.setPixel(refColor, x, y);
452         }
453 }
454 
initPrograms(vk::SourceCollections & programCollection) const455 void ComplexTaskDataCase::initPrograms(vk::SourceCollections &programCollection) const
456 {
457     // Add the generic fragment shader.
458     MeshShaderMiscCase::initPrograms(programCollection);
459 
460     const std::string taskDataDeclTemplate = "struct RowId {\n"
461                                              "    uint id;\n"
462                                              "};\n"
463                                              "\n"
464                                              "struct WorkGroupData {\n"
465                                              "    float WorkGroupIdPlusOnex1000Iota[10];\n"
466                                              "    RowId rowId;\n"
467                                              "    uvec3 WorkGroupIdPlusOnex2000Iota;\n"
468                                              "    vec2  WorkGroupIdPlusOnex3000Iota;\n"
469                                              "};\n"
470                                              "\n"
471                                              "struct ExternalData {\n"
472                                              "    float OneMillion;\n"
473                                              "    uint  TwoMillion;\n"
474                                              "    WorkGroupData workGroupData;\n"
475                                              "};\n"
476                                              "\n"
477                                              "${INOUT} taskNV TaskData {\n"
478                                              "    uint yes;\n"
479                                              "    ExternalData externalData;\n"
480                                              "} td;\n";
481     const tcu::StringTemplate taskDataDecl(taskDataDeclTemplate);
482 
483     {
484         std::map<std::string, std::string> taskMap;
485         taskMap["INOUT"] = "out";
486         std::ostringstream task;
487         task << "#version 450\n"
488              << "#extension GL_NV_mesh_shader : enable\n"
489              << "\n"
490              << "layout (local_size_x=1) in;\n"
491              << "\n"
492              << taskDataDecl.specialize(taskMap) << "\n"
493              << "void main ()\n"
494              << "{\n"
495              << "    gl_TaskCountNV = 2u;\n"
496              << "    td.yes = 1u;\n"
497              << "    td.externalData.OneMillion = 1000000.0;\n"
498              << "    td.externalData.TwoMillion = 2000000u;\n"
499              << "    for (uint i = 0; i < 10; i++) {\n"
500              << "        td.externalData.workGroupData.WorkGroupIdPlusOnex1000Iota[i] = float((gl_WorkGroupID.x + 1u) "
501                 "* 1000 + i);\n"
502              << "    }\n"
503              << "    {\n"
504              << "        uint baseVal = (gl_WorkGroupID.x + 1u) * 2000;\n"
505              << "        td.externalData.workGroupData.WorkGroupIdPlusOnex2000Iota = uvec3(baseVal, baseVal + 1, "
506                 "baseVal + 2);\n"
507              << "    }\n"
508              << "    {\n"
509              << "        uint baseVal = (gl_WorkGroupID.x + 1u) * 3000;\n"
510              << "        td.externalData.workGroupData.WorkGroupIdPlusOnex3000Iota = vec2(baseVal, baseVal + 1);\n"
511              << "    }\n"
512              << "    td.externalData.workGroupData.rowId.id = gl_WorkGroupID.x;\n"
513              << "}\n";
514         programCollection.glslSources.add("task") << glu::TaskSource(task.str());
515     }
516 
517     {
518         std::map<std::string, std::string> meshMap;
519         meshMap["INOUT"] = "in";
520         std::ostringstream mesh;
521         mesh
522             << "#version 450\n"
523             << "#extension GL_NV_mesh_shader : enable\n"
524             << "\n"
525             << "layout(local_size_x=2) in;\n"
526             << "layout(triangles) out;\n"
527             << "layout(max_vertices=4, max_primitives=2) out;\n"
528             << "\n"
529             << "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
530             << "\n"
531             << taskDataDecl.specialize(meshMap) << "\n"
532             << "void main ()\n"
533             << "{\n"
534             << "    bool dataOK = true;\n"
535             << "    dataOK = (dataOK && (td.yes == 1u));\n"
536             << "    dataOK = (dataOK && (td.externalData.OneMillion == 1000000.0 && td.externalData.TwoMillion == "
537                "2000000u));\n"
538             << "    uint rowId = td.externalData.workGroupData.rowId.id;\n"
539             << "    dataOK = (dataOK && (rowId == 0u || rowId == 1u));\n"
540             << "\n"
541             << "    {\n"
542             << "        uint baseVal = (rowId + 1u) * 1000u;\n"
543             << "        for (uint i = 0; i < 10; i++) {\n"
544             << "            if (td.externalData.workGroupData.WorkGroupIdPlusOnex1000Iota[i] != float(baseVal + i)) {\n"
545             << "                dataOK = false;\n"
546             << "                break;\n"
547             << "            }\n"
548             << "        }\n"
549             << "    }\n"
550             << "\n"
551             << "    {\n"
552             << "        uint baseVal = (rowId + 1u) * 2000;\n"
553             << "        uvec3 expected = uvec3(baseVal, baseVal + 1, baseVal + 2);\n"
554             << "        if (td.externalData.workGroupData.WorkGroupIdPlusOnex2000Iota != expected) {\n"
555             << "            dataOK = false;\n"
556             << "        }\n"
557             << "    }\n"
558             << "\n"
559             << "    {\n"
560             << "        uint baseVal = (rowId + 1u) * 3000;\n"
561             << "        vec2 expected = vec2(baseVal, baseVal + 1);\n"
562             << "        if (td.externalData.workGroupData.WorkGroupIdPlusOnex3000Iota != expected) {\n"
563             << "            dataOK = false;\n"
564             << "        }\n"
565             << "    }\n"
566             << "\n"
567             << "    uint columnId = gl_WorkGroupID.x;\n"
568             << "\n"
569             << "    if (dataOK) {\n"
570             << "        gl_PrimitiveCountNV = 2u;\n"
571             << "    }\n"
572             << "    else {\n"
573             << "        gl_PrimitiveCountNV = 0u;\n"
574             << "        return;\n"
575             << "    }\n"
576             << "\n"
577             << "    const vec4 outColor = vec4(rowId, columnId, 1.0f, 1.0f);\n"
578             << "    triangleColor[0] = outColor;\n"
579             << "    triangleColor[1] = outColor;\n"
580             << "\n"
581             << "    // Each local invocation will generate two points and one triangle from the quad.\n"
582             << "    // The first local invocation will generate the top quad vertices.\n"
583             << "    // The second invocation will generate the two bottom vertices.\n"
584             << "    vec4 left  = vec4(0.0, 0.0, 0.0, 1.0);\n"
585             << "    vec4 right = vec4(1.0, 0.0, 0.0, 1.0);\n"
586             << "\n"
587             << "    float localInvocationOffsetY = float(gl_LocalInvocationID.x);\n"
588             << "    left.y  += localInvocationOffsetY;\n"
589             << "    right.y += localInvocationOffsetY;\n"
590             << "\n"
591             << "    // The code above creates a quad from (0, 0) to (1, 1) but we need to offset it\n"
592             << "    // in X and/or Y depending on the row and column, to place it in other quadrants.\n"
593             << "    float quadrantOffsetX = float(int(columnId) - 1);\n"
594             << "    float quadrantOffsetY = float(int(rowId) - 1);\n"
595             << "\n"
596             << "    left.x  += quadrantOffsetX;\n"
597             << "    right.x += quadrantOffsetX;\n"
598             << "\n"
599             << "    left.y  += quadrantOffsetY;\n"
600             << "    right.y += quadrantOffsetY;\n"
601             << "\n"
602             << "    uint baseVertexId = 2*gl_LocalInvocationID.x;\n"
603             << "    gl_MeshVerticesNV[baseVertexId + 0].gl_Position = left;\n"
604             << "    gl_MeshVerticesNV[baseVertexId + 1].gl_Position = right;\n"
605             << "\n"
606             << "    uint baseIndexId = 3*gl_LocalInvocationID.x;\n"
607             << "    // 0,1,2 or 1,2,3 (note: triangles alternate front face this way)\n"
608             << "    gl_PrimitiveIndicesNV[baseIndexId + 0] = 0 + gl_LocalInvocationID.x;\n"
609             << "    gl_PrimitiveIndicesNV[baseIndexId + 1] = 1 + gl_LocalInvocationID.x;\n"
610             << "    gl_PrimitiveIndicesNV[baseIndexId + 2] = 2 + gl_LocalInvocationID.x;\n"
611             << "}\n";
612         programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
613     }
614 }
615 
createInstance(Context & context) const616 TestInstance *ComplexTaskDataCase::createInstance(Context &context) const
617 {
618     return new ComplexTaskDataInstance(context, m_params.get());
619 }
620 
621 // Verify drawing a single point.
622 class SinglePointCase : public MeshShaderMiscCase
623 {
624 public:
SinglePointCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)625     SinglePointCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
626         : MeshShaderMiscCase(testCtx, name, std::move(params))
627     {
628     }
629 
630     void initPrograms(vk::SourceCollections &programCollection) const override;
631     TestInstance *createInstance(Context &context) const override;
632 };
633 
634 class SinglePointInstance : public MeshShaderMiscInstance
635 {
636 public:
SinglePointInstance(Context & context,const MiscTestParams * params)637     SinglePointInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
638     {
639     }
640 
641     void generateReferenceLevel() override;
642 };
643 
createInstance(Context & context) const644 TestInstance *SinglePointCase::createInstance(Context &context) const
645 {
646     return new SinglePointInstance(context, m_params.get());
647 }
648 
initPrograms(vk::SourceCollections & programCollection) const649 void SinglePointCase::initPrograms(vk::SourceCollections &programCollection) const
650 {
651     DE_ASSERT(!m_params->needsTaskShader());
652 
653     MeshShaderMiscCase::initPrograms(programCollection);
654 
655     std::ostringstream mesh;
656     mesh << "#version 450\n"
657          << "#extension GL_NV_mesh_shader : enable\n"
658          << "\n"
659          << "layout(local_size_x=1) in;\n"
660          << "layout(points) out;\n"
661          << "layout(max_vertices=256, max_primitives=256) out;\n"
662          << "\n"
663          << "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
664          << "\n"
665          << "void main ()\n"
666          << "{\n"
667          << "    gl_PrimitiveCountNV = 1u;\n"
668          << "    pointColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
669          << "    gl_MeshVerticesNV[0].gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n"
670          << "    gl_MeshVerticesNV[0].gl_PointSize = 1.0f;\n"
671          << "    gl_PrimitiveIndicesNV[0] = 0;\n"
672          << "}\n";
673     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
674 }
675 
generateReferenceLevel()676 void SinglePointInstance::generateReferenceLevel()
677 {
678     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
679 
680     const auto halfWidth  = static_cast<int>(m_params->width / 2u);
681     const auto halfHeight = static_cast<int>(m_params->height / 2u);
682     const auto access     = m_referenceLevel->getAccess();
683 
684     access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), halfWidth, halfHeight);
685 }
686 
687 // Verify drawing a single line.
688 class SingleLineCase : public MeshShaderMiscCase
689 {
690 public:
SingleLineCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)691     SingleLineCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
692         : MeshShaderMiscCase(testCtx, name, std::move(params))
693     {
694     }
695 
696     void initPrograms(vk::SourceCollections &programCollection) const override;
697     TestInstance *createInstance(Context &context) const override;
698 };
699 
700 class SingleLineInstance : public MeshShaderMiscInstance
701 {
702 public:
SingleLineInstance(Context & context,const MiscTestParams * params)703     SingleLineInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
704     {
705     }
706 
707     void generateReferenceLevel() override;
708 };
709 
createInstance(Context & context) const710 TestInstance *SingleLineCase::createInstance(Context &context) const
711 {
712     return new SingleLineInstance(context, m_params.get());
713 }
714 
initPrograms(vk::SourceCollections & programCollection) const715 void SingleLineCase::initPrograms(vk::SourceCollections &programCollection) const
716 {
717     DE_ASSERT(!m_params->needsTaskShader());
718 
719     MeshShaderMiscCase::initPrograms(programCollection);
720 
721     std::ostringstream mesh;
722     mesh << "#version 450\n"
723          << "#extension GL_NV_mesh_shader : enable\n"
724          << "\n"
725          << "layout(local_size_x=1) in;\n"
726          << "layout(lines) out;\n"
727          << "layout(max_vertices=256, max_primitives=256) out;\n"
728          << "\n"
729          << "layout (location=0) out perprimitiveNV vec4 lineColor[];\n"
730          << "\n"
731          << "void main ()\n"
732          << "{\n"
733          << "    gl_PrimitiveCountNV = 1u;\n"
734          << "    lineColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
735          << "    gl_MeshVerticesNV[0].gl_Position = vec4(-1.0f, 0.0f, 0.0f, 1.0f);\n"
736          << "    gl_MeshVerticesNV[1].gl_Position = vec4( 1.0f, 0.0f, 0.0f, 1.0f);\n"
737          << "    gl_PrimitiveIndicesNV[0] = 0;\n"
738          << "    gl_PrimitiveIndicesNV[1] = 1;\n"
739          << "}\n";
740     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
741 }
742 
generateReferenceLevel()743 void SingleLineInstance::generateReferenceLevel()
744 {
745     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
746 
747     const auto iWidth     = static_cast<int>(m_params->width);
748     const auto halfHeight = static_cast<int>(m_params->height / 2u);
749     const auto access     = m_referenceLevel->getAccess();
750 
751     // Center row.
752     for (int x = 0; x < iWidth; ++x)
753         access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), x, halfHeight);
754 }
755 
756 // Verify drawing a single triangle.
757 class SingleTriangleCase : public MeshShaderMiscCase
758 {
759 public:
SingleTriangleCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)760     SingleTriangleCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
761         : MeshShaderMiscCase(testCtx, name, std::move(params))
762     {
763     }
764 
765     void initPrograms(vk::SourceCollections &programCollection) const override;
766     TestInstance *createInstance(Context &context) const override;
767 };
768 
769 class SingleTriangleInstance : public MeshShaderMiscInstance
770 {
771 public:
SingleTriangleInstance(Context & context,const MiscTestParams * params)772     SingleTriangleInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
773     {
774     }
775 
776     void generateReferenceLevel() override;
777 };
778 
createInstance(Context & context) const779 TestInstance *SingleTriangleCase::createInstance(Context &context) const
780 {
781     return new SingleTriangleInstance(context, m_params.get());
782 }
783 
initPrograms(vk::SourceCollections & programCollection) const784 void SingleTriangleCase::initPrograms(vk::SourceCollections &programCollection) const
785 {
786     DE_ASSERT(!m_params->needsTaskShader());
787 
788     MeshShaderMiscCase::initPrograms(programCollection);
789 
790     const float halfPixelX = 2.0f / static_cast<float>(m_params->width);
791     const float halfPixelY = 2.0f / static_cast<float>(m_params->height);
792 
793     std::ostringstream mesh;
794     mesh << "#version 450\n"
795          << "#extension GL_NV_mesh_shader : enable\n"
796          << "\n"
797          << "layout(local_size_x=1) in;\n"
798          << "layout(triangles) out;\n"
799          << "layout(max_vertices=256, max_primitives=256) out;\n"
800          << "\n"
801          << "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
802          << "\n"
803          << "void main ()\n"
804          << "{\n"
805          << "    gl_PrimitiveCountNV = 1u;\n"
806          << "    triangleColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
807          << "    gl_MeshVerticesNV[0].gl_Position = vec4(" << halfPixelY << ", " << -halfPixelX << ", 0.0f, 1.0f);\n"
808          << "    gl_MeshVerticesNV[1].gl_Position = vec4(" << halfPixelY << ", " << halfPixelX << ", 0.0f, 1.0f);\n"
809          << "    gl_MeshVerticesNV[2].gl_Position = vec4(" << -halfPixelY << ", 0.0f, 0.0f, 1.0f);\n"
810          << "    gl_PrimitiveIndicesNV[0] = 0;\n"
811          << "    gl_PrimitiveIndicesNV[1] = 1;\n"
812          << "    gl_PrimitiveIndicesNV[2] = 2;\n"
813          << "}\n";
814     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
815 }
816 
generateReferenceLevel()817 void SingleTriangleInstance::generateReferenceLevel()
818 {
819     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
820 
821     const auto halfWidth  = static_cast<int>(m_params->width / 2u);
822     const auto halfHeight = static_cast<int>(m_params->height / 2u);
823     const auto access     = m_referenceLevel->getAccess();
824 
825     // Single pixel in the center.
826     access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), halfWidth, halfHeight);
827 }
828 
829 // Verify drawing the maximum number of points.
830 class MaxPointsCase : public MeshShaderMiscCase
831 {
832 public:
MaxPointsCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)833     MaxPointsCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
834         : MeshShaderMiscCase(testCtx, name, std::move(params))
835     {
836     }
837 
838     void initPrograms(vk::SourceCollections &programCollection) const override;
839     TestInstance *createInstance(Context &context) const override;
840 };
841 
842 class MaxPointsInstance : public MeshShaderMiscInstance
843 {
844 public:
MaxPointsInstance(Context & context,const MiscTestParams * params)845     MaxPointsInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
846     {
847     }
848 
849     void generateReferenceLevel() override;
850 };
851 
createInstance(Context & context) const852 TestInstance *MaxPointsCase::createInstance(Context &context) const
853 {
854     return new MaxPointsInstance(context, m_params.get());
855 }
856 
initPrograms(vk::SourceCollections & programCollection) const857 void MaxPointsCase::initPrograms(vk::SourceCollections &programCollection) const
858 {
859     DE_ASSERT(!m_params->needsTaskShader());
860 
861     MeshShaderMiscCase::initPrograms(programCollection);
862 
863     // Fill a 16x16 image with 256 points. Each of the 32 local invocations will handle a segment of 8 pixels. Two segments per row.
864     DE_ASSERT(m_params->width == 16u && m_params->height == 16u);
865 
866     std::ostringstream mesh;
867     mesh << "#version 450\n"
868          << "#extension GL_NV_mesh_shader : enable\n"
869          << "\n"
870          << "layout(local_size_x=32) in;\n"
871          << "layout(points) out;\n"
872          << "layout(max_vertices=256, max_primitives=256) out;\n"
873          << "\n"
874          << "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
875          << "\n"
876          << "void main ()\n"
877          << "{\n"
878          << "    gl_PrimitiveCountNV = 256u;\n"
879          << "    uint firstPixel = 8u * gl_LocalInvocationID.x;\n"
880          << "    uint row = firstPixel / 16u;\n"
881          << "    uint col = firstPixel % 16u;\n"
882          << "    float pixSize = 2.0f / 16.0f;\n"
883          << "    float yCoord = pixSize * (float(row) + 0.5f) - 1.0f;\n"
884          << "    float baseXCoord = pixSize * (float(col) + 0.5f) - 1.0f;\n"
885          << "    for (uint i = 0; i < 8u; i++) {\n"
886          << "        float xCoord = baseXCoord + pixSize * float(i);\n"
887          << "        uint pixId = firstPixel + i;\n"
888          << "        gl_MeshVerticesNV[pixId].gl_Position = vec4(xCoord, yCoord, 0.0f, 1.0f);\n"
889          << "        gl_MeshVerticesNV[pixId].gl_PointSize = 1.0f;\n"
890          << "        gl_PrimitiveIndicesNV[pixId] = pixId;\n"
891          << "        pointColor[pixId] = vec4(((xCoord + 1.0f) / 2.0f), ((yCoord + 1.0f) / 2.0f), 0.0f, 1.0f);\n"
892          << "    }\n"
893          << "}\n";
894     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
895 }
896 
generateReferenceLevel()897 void MaxPointsInstance::generateReferenceLevel()
898 {
899     const auto format    = getOutputFormat();
900     const auto tcuFormat = mapVkFormat(format);
901 
902     const auto iWidth  = static_cast<int>(m_params->width);
903     const auto iHeight = static_cast<int>(m_params->height);
904     const auto fWidth  = static_cast<float>(m_params->width);
905     const auto fHeight = static_cast<float>(m_params->height);
906 
907     m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
908 
909     const auto access = m_referenceLevel->getAccess();
910 
911     // Fill with gradient like the shader does.
912     for (int y = 0; y < iHeight; ++y)
913         for (int x = 0; x < iWidth; ++x)
914         {
915             const tcu::Vec4 color(((static_cast<float>(x) + 0.5f) / fWidth), ((static_cast<float>(y) + 0.5f) / fHeight),
916                                   0.0f, 1.0f);
917             access.setPixel(color, x, y);
918         }
919 }
920 
921 // Verify drawing the maximum number of lines.
922 class MaxLinesCase : public MeshShaderMiscCase
923 {
924 public:
MaxLinesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)925     MaxLinesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
926         : MeshShaderMiscCase(testCtx, name, std::move(params))
927     {
928     }
929 
930     void initPrograms(vk::SourceCollections &programCollection) const override;
931     TestInstance *createInstance(Context &context) const override;
932 };
933 
934 class MaxLinesInstance : public MeshShaderMiscInstance
935 {
936 public:
MaxLinesInstance(Context & context,const MiscTestParams * params)937     MaxLinesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
938     {
939     }
940 
941     void generateReferenceLevel() override;
942 };
943 
createInstance(Context & context) const944 TestInstance *MaxLinesCase::createInstance(Context &context) const
945 {
946     return new MaxLinesInstance(context, m_params.get());
947 }
948 
initPrograms(vk::SourceCollections & programCollection) const949 void MaxLinesCase::initPrograms(vk::SourceCollections &programCollection) const
950 {
951     DE_ASSERT(!m_params->needsTaskShader());
952 
953     MeshShaderMiscCase::initPrograms(programCollection);
954 
955     // Fill a 1x1020 image with 255 lines, each line being 4 pixels tall. Each invocation will generate ~8 lines.
956     DE_ASSERT(m_params->width == 1u && m_params->height == 1020u);
957 
958     std::ostringstream mesh;
959     mesh << "#version 450\n"
960          << "#extension GL_NV_mesh_shader : enable\n"
961          << "\n"
962          << "layout(local_size_x=32) in;\n"
963          << "layout(lines) out;\n"
964          << "layout(max_vertices=256, max_primitives=255) out;\n"
965          << "\n"
966          << "layout (location=0) out perprimitiveNV vec4 lineColor[];\n"
967          << "\n"
968          << "void main ()\n"
969          << "{\n"
970          << "    gl_PrimitiveCountNV = 255u;\n"
971          << "    uint firstLine = 8u * gl_LocalInvocationID.x;\n"
972          << "    for (uint i = 0u; i < 8u; i++) {\n"
973          << "        uint lineId = firstLine + i;\n"
974          << "        uint topPixel = 4u * lineId;\n"
975          << "        uint bottomPixel = 3u + topPixel;\n"
976          << "        if (bottomPixel < 1020u) {\n"
977          << "            float bottomCoord = ((float(bottomPixel) + 1.0f) / 1020.0) * 2.0 - 1.0;\n"
978          << "            gl_MeshVerticesNV[lineId + 1u].gl_Position = vec4(0.0, bottomCoord, 0.0f, 1.0f);\n"
979          << "            gl_PrimitiveIndicesNV[lineId * 2u] = lineId;\n"
980          << "            gl_PrimitiveIndicesNV[lineId * 2u + 1u] = lineId + 1u;\n"
981          << "            lineColor[lineId] = vec4(0.0f, 1.0f, float(lineId) / 255.0f, 1.0f);\n"
982          << "        } else {\n"
983          << "            // The last iteration of the last invocation emits the first point\n"
984          << "            gl_MeshVerticesNV[0].gl_Position = vec4(0.0, -1.0, 0.0f, 1.0f);\n"
985          << "        }\n"
986          << "    }\n"
987          << "}\n";
988     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
989 }
990 
generateReferenceLevel()991 void MaxLinesInstance::generateReferenceLevel()
992 {
993     const auto format    = getOutputFormat();
994     const auto tcuFormat = mapVkFormat(format);
995 
996     const auto iWidth  = static_cast<int>(m_params->width);
997     const auto iHeight = static_cast<int>(m_params->height);
998 
999     m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
1000 
1001     const auto access = m_referenceLevel->getAccess();
1002 
1003     // Fill lines, 4 pixels per line.
1004     const uint32_t kNumLines   = 255u;
1005     const uint32_t kLineHeight = 4u;
1006 
1007     for (uint32_t i = 0u; i < kNumLines; ++i)
1008     {
1009         const tcu::Vec4 color(0.0f, 1.0f, static_cast<float>(i) / static_cast<float>(kNumLines), 1.0f);
1010         for (uint32_t j = 0u; j < kLineHeight; ++j)
1011             access.setPixel(color, 0, i * kLineHeight + j);
1012     }
1013 }
1014 
1015 // Verify drawing the maximum number of triangles.
1016 class MaxTrianglesCase : public MeshShaderMiscCase
1017 {
1018 public:
MaxTrianglesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1019     MaxTrianglesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1020         : MeshShaderMiscCase(testCtx, name, std::move(params))
1021     {
1022     }
1023 
1024     void initPrograms(vk::SourceCollections &programCollection) const override;
1025     TestInstance *createInstance(Context &context) const override;
1026 };
1027 
1028 class MaxTrianglesInstance : public MeshShaderMiscInstance
1029 {
1030 public:
MaxTrianglesInstance(Context & context,const MiscTestParams * params)1031     MaxTrianglesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
1032     {
1033     }
1034 
1035     void generateReferenceLevel() override;
1036 };
1037 
createInstance(Context & context) const1038 TestInstance *MaxTrianglesCase::createInstance(Context &context) const
1039 {
1040     return new MaxTrianglesInstance(context, m_params.get());
1041 }
1042 
initPrograms(vk::SourceCollections & programCollection) const1043 void MaxTrianglesCase::initPrograms(vk::SourceCollections &programCollection) const
1044 {
1045     DE_ASSERT(!m_params->needsTaskShader());
1046 
1047     MeshShaderMiscCase::initPrograms(programCollection);
1048 
1049     // Fill a sufficiently large image with solid color. Generate a quarter of a circle with the center in the top left corner,
1050     // using a triangle fan that advances from top to bottom. Each invocation will generate ~8 triangles.
1051     std::ostringstream mesh;
1052     mesh << "#version 450\n"
1053          << "#extension GL_NV_mesh_shader : enable\n"
1054          << "\n"
1055          << "layout(local_size_x=32) in;\n"
1056          << "layout(triangles) out;\n"
1057          << "layout(max_vertices=256, max_primitives=254) out;\n"
1058          << "\n"
1059          << "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
1060          << "\n"
1061          << "const float PI_2 = 1.57079632679489661923;\n"
1062          << "const float RADIUS = 4.5;\n"
1063          << "\n"
1064          << "void main ()\n"
1065          << "{\n"
1066          << "    gl_PrimitiveCountNV = 254u;\n"
1067          << "    uint firstTriangle = 8u * gl_LocalInvocationID.x;\n"
1068          << "    for (uint i = 0u; i < 8u; i++) {\n"
1069          << "        uint triangleId = firstTriangle + i;\n"
1070          << "        if (triangleId < 254u) {\n"
1071          << "            uint vertexId = triangleId + 2u;\n"
1072          << "            float angleProportion = float(vertexId - 1u) / 254.0f;\n"
1073          << "            float angle = PI_2 * angleProportion;\n"
1074          << "            float xCoord = cos(angle) * RADIUS - 1.0;\n"
1075          << "            float yCoord = sin(angle) * RADIUS - 1.0;\n"
1076          << "            gl_MeshVerticesNV[vertexId].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
1077          << "            gl_PrimitiveIndicesNV[triangleId * 3u + 0u] = 0u;\n"
1078          << "            gl_PrimitiveIndicesNV[triangleId * 3u + 1u] = triangleId + 1u;\n"
1079          << "            gl_PrimitiveIndicesNV[triangleId * 3u + 2u] = triangleId + 2u;\n"
1080          << "            triangleColor[triangleId] = vec4(0.0f, 0.0f, 1.0f, 1.0f);\n"
1081          << "        } else {\n"
1082          << "            // The last iterations of the last invocation emit the first two vertices\n"
1083          << "            uint vertexId = triangleId - 254u;\n"
1084          << "            if (vertexId == 0u) {\n"
1085          << "                gl_MeshVerticesNV[0u].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
1086          << "            } else {\n"
1087          << "                gl_MeshVerticesNV[1u].gl_Position = vec4(RADIUS, -1.0, 0.0, 1.0);\n"
1088          << "            }\n"
1089          << "        }\n"
1090          << "    }\n"
1091          << "}\n";
1092     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
1093 }
1094 
generateReferenceLevel()1095 void MaxTrianglesInstance::generateReferenceLevel()
1096 {
1097     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1098 }
1099 
1100 // Large work groups with many threads.
1101 class LargeWorkGroupCase : public MeshShaderMiscCase
1102 {
1103 public:
LargeWorkGroupCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1104     LargeWorkGroupCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1105         : MeshShaderMiscCase(testCtx, name, std::move(params))
1106     {
1107     }
1108 
1109     void initPrograms(vk::SourceCollections &programCollection) const override;
1110     TestInstance *createInstance(Context &context) const override;
1111 
1112     static constexpr uint32_t kLocalInvocations = 32u;
1113 };
1114 
1115 class LargeWorkGroupInstance : public MeshShaderMiscInstance
1116 {
1117 public:
LargeWorkGroupInstance(Context & context,const MiscTestParams * params)1118     LargeWorkGroupInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
1119     {
1120     }
1121 
1122     void generateReferenceLevel() override;
1123 };
1124 
createInstance(Context & context) const1125 TestInstance *LargeWorkGroupCase::createInstance(Context &context) const
1126 {
1127     return new LargeWorkGroupInstance(context, m_params.get());
1128 }
1129 
generateReferenceLevel()1130 void LargeWorkGroupInstance::generateReferenceLevel()
1131 {
1132     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1133 }
1134 
initPrograms(vk::SourceCollections & programCollection) const1135 void LargeWorkGroupCase::initPrograms(vk::SourceCollections &programCollection) const
1136 {
1137     const auto useTaskShader  = m_params->needsTaskShader();
1138     const auto taskMultiplier = (useTaskShader ? m_params->taskCount.get() : 1u);
1139 
1140     // Add the frag shader.
1141     MeshShaderMiscCase::initPrograms(programCollection);
1142 
1143     std::ostringstream taskData;
1144     taskData << "taskNV TaskData {\n"
1145              << "    uint parentTask[" << kLocalInvocations << "];\n"
1146              << "} td;\n";
1147     const auto taskDataStr = taskData.str();
1148 
1149     if (useTaskShader)
1150     {
1151         std::ostringstream task;
1152         task << "#version 450\n"
1153              << "#extension GL_NV_mesh_shader : enable\n"
1154              << "\n"
1155              << "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1156              << "\n"
1157              << "out " << taskDataStr << "\n"
1158              << "void main () {\n"
1159              << "    gl_TaskCountNV = " << m_params->meshCount << ";\n"
1160              << "    td.parentTask[gl_LocalInvocationID.x] = gl_WorkGroupID.x;\n"
1161              << "}\n";
1162         programCollection.glslSources.add("task") << glu::TaskSource(task.str());
1163     }
1164 
1165     // Needed for the code below to work.
1166     DE_ASSERT(m_params->width * m_params->height == taskMultiplier * m_params->meshCount * kLocalInvocations);
1167     DE_UNREF(taskMultiplier); // For release builds.
1168 
1169     // Emit one point per framebuffer pixel. The number of jobs (kLocalInvocations in each mesh shader work group, multiplied by the
1170     // number of mesh work groups emitted by each task work group) must be the same as the total framebuffer size. Calculate a job
1171     // ID corresponding to the current mesh shader invocation, and assign a pixel position to it. Draw a point at that position.
1172     std::ostringstream mesh;
1173     mesh << "#version 450\n"
1174          << "#extension GL_NV_mesh_shader : enable\n"
1175          << "\n"
1176          << "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1177          << "layout (points) out;\n"
1178          << "layout (max_vertices=" << kLocalInvocations << ", max_primitives=" << kLocalInvocations << ") out;\n"
1179          << "\n"
1180          << (useTaskShader ? "in " + taskDataStr : "") << "\n"
1181          << "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
1182          << "\n"
1183          << "void main () {\n";
1184 
1185     if (useTaskShader)
1186     {
1187         mesh << "    uint parentTask = td.parentTask[0];\n"
1188              << "    if (td.parentTask[gl_LocalInvocationID.x] != parentTask) {\n"
1189              << "        return;\n"
1190              << "    }\n";
1191     }
1192     else
1193     {
1194         mesh << "    uint parentTask = 0;\n";
1195     }
1196 
1197     mesh << "    gl_PrimitiveCountNV = " << kLocalInvocations << ";\n"
1198          << "    uint jobId = ((parentTask * " << m_params->meshCount << ") + gl_WorkGroupID.x) * " << kLocalInvocations
1199          << " + gl_LocalInvocationID.x;\n"
1200          << "    uint row = jobId / " << m_params->width << ";\n"
1201          << "    uint col = jobId % " << m_params->width << ";\n"
1202          << "    float yCoord = (float(row + 0.5) / " << m_params->height << ".0) * 2.0 - 1.0;\n"
1203          << "    float xCoord = (float(col + 0.5) / " << m_params->width << ".0) * 2.0 - 1.0;\n"
1204          << "    gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
1205          << "    gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_PointSize = 1.0;\n"
1206          << "    gl_PrimitiveIndicesNV[gl_LocalInvocationID.x] = gl_LocalInvocationID.x;\n"
1207          << "    pointColor[gl_LocalInvocationID.x] = vec4(0.0, 0.0, 1.0, 1.0);\n"
1208          << "}\n";
1209     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
1210 }
1211 
1212 // Tests that generate no primitives of a given type.
1213 enum class PrimitiveType
1214 {
1215     POINTS = 0,
1216     LINES,
1217     TRIANGLES
1218 };
1219 
primitiveTypeName(PrimitiveType primitiveType)1220 std::string primitiveTypeName(PrimitiveType primitiveType)
1221 {
1222     std::string primitiveName;
1223 
1224     switch (primitiveType)
1225     {
1226     case PrimitiveType::POINTS:
1227         primitiveName = "points";
1228         break;
1229     case PrimitiveType::LINES:
1230         primitiveName = "lines";
1231         break;
1232     case PrimitiveType::TRIANGLES:
1233         primitiveName = "triangles";
1234         break;
1235     default:
1236         DE_ASSERT(false);
1237         break;
1238     }
1239 
1240     return primitiveName;
1241 }
1242 
1243 struct NoPrimitivesParams : public MiscTestParams
1244 {
NoPrimitivesParamsvkt::MeshShader::__anon5941fe3f0111::NoPrimitivesParams1245     NoPrimitivesParams(const tcu::Maybe<uint32_t> &taskCount_, uint32_t meshCount_, uint32_t width_, uint32_t height_,
1246                        PrimitiveType primitiveType_)
1247         : MiscTestParams(taskCount_, meshCount_, width_, height_)
1248         , primitiveType(primitiveType_)
1249     {
1250     }
1251 
1252     PrimitiveType primitiveType;
1253 };
1254 
1255 class NoPrimitivesCase : public MeshShaderMiscCase
1256 {
1257 public:
NoPrimitivesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1258     NoPrimitivesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1259         : MeshShaderMiscCase(testCtx, name, std::move(params))
1260     {
1261     }
1262 
1263     void initPrograms(vk::SourceCollections &programCollection) const override;
1264     TestInstance *createInstance(Context &context) const override;
1265 };
1266 
1267 class NoPrimitivesInstance : public MeshShaderMiscInstance
1268 {
1269 public:
NoPrimitivesInstance(Context & context,const MiscTestParams * params)1270     NoPrimitivesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
1271     {
1272     }
1273 
1274     void generateReferenceLevel() override;
1275 };
1276 
generateReferenceLevel()1277 void NoPrimitivesInstance::generateReferenceLevel()
1278 {
1279     // No primitives: clear color.
1280     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
1281 }
1282 
createInstance(Context & context) const1283 TestInstance *NoPrimitivesCase::createInstance(Context &context) const
1284 {
1285     return new NoPrimitivesInstance(context, m_params.get());
1286 }
1287 
initPrograms(vk::SourceCollections & programCollection) const1288 void NoPrimitivesCase::initPrograms(vk::SourceCollections &programCollection) const
1289 {
1290     const auto params = dynamic_cast<NoPrimitivesParams *>(m_params.get());
1291 
1292     DE_ASSERT(params);
1293     DE_ASSERT(!params->needsTaskShader());
1294 
1295     const auto primitiveName = primitiveTypeName(params->primitiveType);
1296 
1297     std::ostringstream mesh;
1298     mesh << "#version 450\n"
1299          << "#extension GL_NV_mesh_shader : enable\n"
1300          << "\n"
1301          << "layout (local_size_x=32) in;\n"
1302          << "layout (" << primitiveName << ") out;\n"
1303          << "layout (max_vertices=256, max_primitives=256) out;\n"
1304          << "\n"
1305          << "layout (location=0) out perprimitiveNV vec4 primitiveColor[];\n"
1306          << "\n"
1307          << "void main () {\n"
1308          << "    gl_PrimitiveCountNV = 0u;\n"
1309          << "}\n";
1310 
1311     MeshShaderMiscCase::initPrograms(programCollection);
1312     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
1313 }
1314 
1315 class NoPrimitivesExtraWritesCase : public NoPrimitivesCase
1316 {
1317 public:
NoPrimitivesExtraWritesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1318     NoPrimitivesExtraWritesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1319         : NoPrimitivesCase(testCtx, name, std::move(params))
1320     {
1321     }
1322 
1323     void initPrograms(vk::SourceCollections &programCollection) const override;
1324 
1325     static constexpr uint32_t kLocalInvocations = 32u;
1326 };
1327 
initPrograms(vk::SourceCollections & programCollection) const1328 void NoPrimitivesExtraWritesCase::initPrograms(vk::SourceCollections &programCollection) const
1329 {
1330     const auto params = dynamic_cast<NoPrimitivesParams *>(m_params.get());
1331 
1332     DE_ASSERT(params);
1333     DE_ASSERT(m_params->needsTaskShader());
1334 
1335     std::ostringstream taskData;
1336     taskData << "taskNV TaskData {\n"
1337              << "    uint localInvocations[" << kLocalInvocations << "];\n"
1338              << "} td;\n";
1339     const auto taskDataStr = taskData.str();
1340 
1341     std::ostringstream task;
1342     task << "#version 450\n"
1343          << "#extension GL_NV_mesh_shader : enable\n"
1344          << "\n"
1345          << "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1346          << "\n"
1347          << "out " << taskDataStr << "\n"
1348          << "void main () {\n"
1349          << "    gl_TaskCountNV = " << params->meshCount << ";\n"
1350          << "    td.localInvocations[gl_LocalInvocationID.x] = gl_LocalInvocationID.x;\n"
1351          << "}\n";
1352     programCollection.glslSources.add("task") << glu::TaskSource(task.str());
1353 
1354     const auto primitiveName = primitiveTypeName(params->primitiveType);
1355 
1356     // Otherwise the shader would be illegal.
1357     DE_ASSERT(kLocalInvocations > 2u);
1358 
1359     uint32_t maxPrimitives = 0u;
1360     switch (params->primitiveType)
1361     {
1362     case PrimitiveType::POINTS:
1363         maxPrimitives = kLocalInvocations - 0u;
1364         break;
1365     case PrimitiveType::LINES:
1366         maxPrimitives = kLocalInvocations - 1u;
1367         break;
1368     case PrimitiveType::TRIANGLES:
1369         maxPrimitives = kLocalInvocations - 2u;
1370         break;
1371     default:
1372         DE_ASSERT(false);
1373         break;
1374     }
1375 
1376     const std::string pointSizeDecl = ((params->primitiveType == PrimitiveType::POINTS) ?
1377                                            "        gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_PointSize = 1.0;\n" :
1378                                            "");
1379 
1380     std::ostringstream mesh;
1381     mesh << "#version 450\n"
1382          << "#extension GL_NV_mesh_shader : enable\n"
1383          << "\n"
1384          << "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1385          << "layout (" << primitiveName << ") out;\n"
1386          << "layout (max_vertices=" << kLocalInvocations << ", max_primitives=" << maxPrimitives << ") out;\n"
1387          << "\n"
1388          << "in " << taskDataStr << "\n"
1389          << "layout (location=0) out perprimitiveNV vec4 primitiveColor[];\n"
1390          << "\n"
1391          << "shared uint sumOfIds;\n"
1392          << "\n"
1393          << "const float PI_2 = 1.57079632679489661923;\n"
1394          << "const float RADIUS = 1.0f;\n"
1395          << "\n"
1396          << "void main ()\n"
1397          << "{\n"
1398          << "    sumOfIds = 0u;\n"
1399          << "    memoryBarrierShared();\n"
1400          << "    barrier();\n"
1401          << "    atomicAdd(sumOfIds, td.localInvocations[gl_LocalInvocationID.x]);\n"
1402          << "    memoryBarrierShared();\n"
1403          << "    barrier();\n"
1404          << "    // This should dynamically give 0\n"
1405          << "    gl_PrimitiveCountNV = sumOfIds - (" << kLocalInvocations * (kLocalInvocations - 1u) / 2u << ");\n"
1406          << "\n"
1407          << "    // Emit points and primitives to the arrays in any case\n"
1408          << "    if (gl_LocalInvocationID.x > 0u) {\n"
1409          << "        float proportion = (float(gl_LocalInvocationID.x - 1u) + 0.5f) / float(" << kLocalInvocations
1410          << " - 1u);\n"
1411          << "        float angle = PI_2 * proportion;\n"
1412          << "        float xCoord = cos(angle) * RADIUS - 1.0;\n"
1413          << "        float yCoord = sin(angle) * RADIUS - 1.0;\n"
1414          << "        gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
1415          << pointSizeDecl << "    } else {\n"
1416          << "        gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1417          << pointSizeDecl << "    }\n"
1418          << "    uint primitiveId = max(gl_LocalInvocationID.x, " << (maxPrimitives - 1u) << ");\n"
1419          << "    primitiveColor[primitiveId] = vec4(0.0, 0.0, 1.0, 1.0);\n";
1420 
1421     if (params->primitiveType == PrimitiveType::POINTS)
1422     {
1423         mesh << "    gl_PrimitiveIndicesNV[primitiveId] = primitiveId;\n";
1424     }
1425     else if (params->primitiveType == PrimitiveType::LINES)
1426     {
1427         mesh << "    gl_PrimitiveIndicesNV[primitiveId * 2u + 0u] = primitiveId + 0u;\n"
1428              << "    gl_PrimitiveIndicesNV[primitiveId * 2u + 1u] = primitiveId + 1u;\n";
1429     }
1430     else if (params->primitiveType == PrimitiveType::TRIANGLES)
1431     {
1432         mesh << "    gl_PrimitiveIndicesNV[primitiveId * 3u + 0u] = 0u;\n"
1433              << "    gl_PrimitiveIndicesNV[primitiveId * 3u + 1u] = primitiveId + 1u;\n"
1434              << "    gl_PrimitiveIndicesNV[primitiveId * 3u + 2u] = primitiveId + 3u;\n";
1435     }
1436     else
1437         DE_ASSERT(false);
1438 
1439     mesh << "}\n";
1440 
1441     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
1442 
1443     MeshShaderMiscCase::initPrograms(programCollection);
1444 }
1445 
1446 // Case testing barrier().
1447 class SimpleBarrierCase : public MeshShaderMiscCase
1448 {
1449 public:
SimpleBarrierCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1450     SimpleBarrierCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1451         : MeshShaderMiscCase(testCtx, name, std::move(params))
1452     {
1453     }
1454 
1455     void initPrograms(vk::SourceCollections &programCollection) const override;
1456     TestInstance *createInstance(Context &context) const override;
1457 
1458     static constexpr uint32_t kLocalInvocations = 32u;
1459 };
1460 
1461 class SimpleBarrierInstance : public MeshShaderMiscInstance
1462 {
1463 public:
SimpleBarrierInstance(Context & context,const MiscTestParams * params)1464     SimpleBarrierInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
1465     {
1466     }
1467 
1468     void generateReferenceLevel() override;
1469 };
1470 
createInstance(Context & context) const1471 TestInstance *SimpleBarrierCase::createInstance(Context &context) const
1472 {
1473     return new SimpleBarrierInstance(context, m_params.get());
1474 }
1475 
generateReferenceLevel()1476 void SimpleBarrierInstance::generateReferenceLevel()
1477 {
1478     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1479 }
1480 
initPrograms(vk::SourceCollections & programCollection) const1481 void SimpleBarrierCase::initPrograms(vk::SourceCollections &programCollection) const
1482 {
1483     // Generate frag shader.
1484     MeshShaderMiscCase::initPrograms(programCollection);
1485 
1486     DE_ASSERT(m_params->meshCount == 1u);
1487     DE_ASSERT(m_params->width == 1u && m_params->height == 1u);
1488 
1489     std::ostringstream meshPrimData;
1490     meshPrimData << "gl_PrimitiveCountNV = 1u;\n"
1491                  << "gl_MeshVerticesNV[0].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1492                  << "gl_MeshVerticesNV[0].gl_PointSize = 1.0;\n"
1493                  << "primitiveColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
1494                  << "gl_PrimitiveIndicesNV[0] = 0;\n";
1495     const std::string meshPrimStr = meshPrimData.str();
1496 
1497     const std::string taskOK   = "gl_TaskCountNV = 1u;\n";
1498     const std::string taskFAIL = "gl_TaskCountNV = 0u;\n";
1499 
1500     const std::string meshOK   = meshPrimStr;
1501     const std::string meshFAIL = "gl_PrimitiveCountNV = 0u;\n";
1502 
1503     const std::string okStatement   = (m_params->needsTaskShader() ? taskOK : meshOK);
1504     const std::string failStatement = (m_params->needsTaskShader() ? taskFAIL : meshFAIL);
1505 
1506     const std::string sharedDecl = "shared uint counter;\n\n";
1507     std::ostringstream verification;
1508     verification << "counter = 0;\n"
1509                  << "memoryBarrierShared();\n"
1510                  << "barrier();\n"
1511                  << "atomicAdd(counter, 1u);\n"
1512                  << "memoryBarrierShared();\n"
1513                  << "barrier();\n"
1514                  << "if (gl_LocalInvocationID.x == 0u) {\n"
1515                  << "    if (counter == " << kLocalInvocations << ") {\n"
1516                  << "\n"
1517                  << okStatement << "\n"
1518                  << "    } else {\n"
1519                  << "\n"
1520                  << failStatement << "\n"
1521                  << "    }\n"
1522                  << "}\n";
1523 
1524     // The mesh shader is very similar in both cases, so we use a template.
1525     std::ostringstream meshTemplateStr;
1526     meshTemplateStr << "#version 450\n"
1527                     << "#extension GL_NV_mesh_shader : enable\n"
1528                     << "\n"
1529                     << "layout (local_size_x=${LOCAL_SIZE}) in;\n"
1530                     << "layout (points) out;\n"
1531                     << "layout (max_vertices=1, max_primitives=1) out;\n"
1532                     << "\n"
1533                     << "layout (location=0) out perprimitiveNV vec4 primitiveColor[];\n"
1534                     << "\n"
1535                     << "${GLOBALS:opt}"
1536                     << "void main ()\n"
1537                     << "{\n"
1538                     << "${BODY}"
1539                     << "}\n";
1540     const tcu::StringTemplate meshTemplate(meshTemplateStr.str());
1541 
1542     if (m_params->needsTaskShader())
1543     {
1544         std::ostringstream task;
1545         task << "#version 450\n"
1546              << "#extension GL_NV_mesh_shader : enable\n"
1547              << "\n"
1548              << "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1549              << "\n"
1550              << sharedDecl << "void main ()\n"
1551              << "{\n"
1552              << verification.str() << "}\n";
1553 
1554         std::map<std::string, std::string> replacements;
1555         replacements["LOCAL_SIZE"] = "1";
1556         replacements["BODY"]       = meshPrimStr;
1557 
1558         const auto meshStr = meshTemplate.specialize(replacements);
1559 
1560         programCollection.glslSources.add("task") << glu::TaskSource(task.str());
1561         programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr);
1562     }
1563     else
1564     {
1565         std::map<std::string, std::string> replacements;
1566         replacements["LOCAL_SIZE"] = std::to_string(kLocalInvocations);
1567         replacements["BODY"]       = verification.str();
1568         replacements["GLOBALS"]    = sharedDecl;
1569 
1570         const auto meshStr = meshTemplate.specialize(replacements);
1571 
1572         programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr);
1573     }
1574 }
1575 
1576 // Case testing memoryBarrierShared() and groupMemoryBarrier().
1577 enum class MemoryBarrierType
1578 {
1579     SHARED = 0,
1580     GROUP
1581 };
1582 
1583 struct MemoryBarrierParams : public MiscTestParams
1584 {
MemoryBarrierParamsvkt::MeshShader::__anon5941fe3f0111::MemoryBarrierParams1585     MemoryBarrierParams(const tcu::Maybe<uint32_t> &taskCount_, uint32_t meshCount_, uint32_t width_, uint32_t height_,
1586                         MemoryBarrierType memBarrierType_)
1587         : MiscTestParams(taskCount_, meshCount_, width_, height_)
1588         , memBarrierType(memBarrierType_)
1589     {
1590     }
1591 
1592     MemoryBarrierType memBarrierType;
1593 
glslFuncvkt::MeshShader::__anon5941fe3f0111::MemoryBarrierParams1594     std::string glslFunc() const
1595     {
1596         std::string funcName;
1597 
1598         switch (memBarrierType)
1599         {
1600         case MemoryBarrierType::SHARED:
1601             funcName = "memoryBarrierShared";
1602             break;
1603         case MemoryBarrierType::GROUP:
1604             funcName = "groupMemoryBarrier";
1605             break;
1606         default:
1607             DE_ASSERT(false);
1608             break;
1609         }
1610 
1611         return funcName;
1612     }
1613 };
1614 
1615 class MemoryBarrierCase : public MeshShaderMiscCase
1616 {
1617 public:
MemoryBarrierCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1618     MemoryBarrierCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1619         : MeshShaderMiscCase(testCtx, name, std::move(params))
1620     {
1621     }
1622 
1623     void initPrograms(vk::SourceCollections &programCollection) const override;
1624     TestInstance *createInstance(Context &context) const override;
1625 
1626     static constexpr uint32_t kLocalInvocations = 2u;
1627 };
1628 
1629 class MemoryBarrierInstance : public MeshShaderMiscInstance
1630 {
1631 public:
MemoryBarrierInstance(Context & context,const MiscTestParams * params)1632     MemoryBarrierInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
1633     {
1634     }
1635 
1636     void generateReferenceLevel() override;
1637     bool verifyResult(const tcu::ConstPixelBufferAccess &resultAccess) const override;
1638 
1639 protected:
1640     // Allow two possible outcomes.
1641     std::unique_ptr<tcu::TextureLevel> m_referenceLevel2;
1642 };
1643 
createInstance(Context & context) const1644 TestInstance *MemoryBarrierCase::createInstance(Context &context) const
1645 {
1646     return new MemoryBarrierInstance(context, m_params.get());
1647 }
1648 
generateReferenceLevel()1649 void MemoryBarrierInstance::generateReferenceLevel()
1650 {
1651     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1652     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f), m_referenceLevel2);
1653 }
1654 
verifyResult(const tcu::ConstPixelBufferAccess & resultAccess) const1655 bool MemoryBarrierInstance::verifyResult(const tcu::ConstPixelBufferAccess &resultAccess) const
1656 {
1657     // Any of the two results is considered valid.
1658     // Clarify what we are checking in the logs; otherwise, they could be confusing.
1659     auto &log                                     = m_context.getTestContext().getLog();
1660     const std::vector<tcu::TextureLevel *> levels = {m_referenceLevel.get(), m_referenceLevel2.get()};
1661 
1662     bool good = false;
1663     for (size_t i = 0; i < levels.size(); ++i)
1664     {
1665         log << tcu::TestLog::Message << "Comparing result with reference " << i << "..." << tcu::TestLog::EndMessage;
1666         const auto success = MeshShaderMiscInstance::verifyResult(resultAccess, *levels[i]);
1667         if (success)
1668         {
1669             log << tcu::TestLog::Message << "Match! The test has passed" << tcu::TestLog::EndMessage;
1670             good = true;
1671             break;
1672         }
1673     }
1674 
1675     return good;
1676 }
1677 
initPrograms(vk::SourceCollections & programCollection) const1678 void MemoryBarrierCase::initPrograms(vk::SourceCollections &programCollection) const
1679 {
1680     const auto params = dynamic_cast<MemoryBarrierParams *>(m_params.get());
1681     DE_ASSERT(params);
1682 
1683     // Generate frag shader.
1684     MeshShaderMiscCase::initPrograms(programCollection);
1685 
1686     DE_ASSERT(params->meshCount == 1u);
1687     DE_ASSERT(params->width == 1u && params->height == 1u);
1688 
1689     const bool taskShader = params->needsTaskShader();
1690 
1691     const std::string taskDataDecl = "taskNV TaskData { float blue; } td;\n\n";
1692     const std::string inTaskData   = "in " + taskDataDecl;
1693     const std::string outTaskData  = "out " + taskDataDecl;
1694     const auto barrierFunc         = params->glslFunc();
1695 
1696     std::ostringstream meshPrimData;
1697     meshPrimData << "gl_PrimitiveCountNV = 1u;\n"
1698                  << "gl_MeshVerticesNV[0].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1699                  << "gl_MeshVerticesNV[0].gl_PointSize = 1.0;\n"
1700                  << "primitiveColor[0] = vec4(0.0, 0.0, " << (taskShader ? "td.blue" : "float(iterations % 2u)")
1701                  << ", 1.0);\n"
1702                  << "gl_PrimitiveIndicesNV[0] = 0;\n";
1703     const std::string meshPrimStr = meshPrimData.str();
1704 
1705     const std::string taskAction = "gl_TaskCountNV = 1u;\ntd.blue = float(iterations % 2u);\n";
1706     const std::string meshAction = meshPrimStr;
1707     const std::string action     = (taskShader ? taskAction : meshAction);
1708 
1709     const std::string sharedDecl = "shared uint flags[2];\n\n";
1710     std::ostringstream verification;
1711     verification << "flags[gl_LocalInvocationID.x] = 0u;\n"
1712                  << "barrier();\n"
1713                  << "flags[gl_LocalInvocationID.x] = 1u;\n"
1714                  << barrierFunc << "();\n"
1715                  << "uint otherInvocation = 1u - gl_LocalInvocationID.x;\n"
1716                  << "uint iterations = 0u;\n"
1717                  << "while (flags[otherInvocation] != 1u) {\n"
1718                  << "    iterations++;\n"
1719                  << "}\n"
1720                  << "if (gl_LocalInvocationID.x == 0u) {\n"
1721                  << "\n"
1722                  << action << "\n"
1723                  << "}\n";
1724 
1725     // The mesh shader is very similar in both cases, so we use a template.
1726     std::ostringstream meshTemplateStr;
1727     meshTemplateStr << "#version 450\n"
1728                     << "#extension GL_NV_mesh_shader : enable\n"
1729                     << "\n"
1730                     << "layout (local_size_x=${LOCAL_SIZE}) in;\n"
1731                     << "layout (points) out;\n"
1732                     << "layout (max_vertices=1, max_primitives=1) out;\n"
1733                     << "\n"
1734                     << "layout (location=0) out perprimitiveNV vec4 primitiveColor[];\n"
1735                     << "\n"
1736                     << "${GLOBALS}"
1737                     << "void main ()\n"
1738                     << "{\n"
1739                     << "${BODY}"
1740                     << "}\n";
1741     const tcu::StringTemplate meshTemplate(meshTemplateStr.str());
1742 
1743     if (params->needsTaskShader())
1744     {
1745         std::ostringstream task;
1746         task << "#version 450\n"
1747              << "#extension GL_NV_mesh_shader : enable\n"
1748              << "\n"
1749              << "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1750              << "\n"
1751              << sharedDecl << outTaskData << "void main ()\n"
1752              << "{\n"
1753              << verification.str() << "}\n";
1754 
1755         std::map<std::string, std::string> replacements;
1756         replacements["LOCAL_SIZE"] = "1";
1757         replacements["BODY"]       = meshPrimStr;
1758         replacements["GLOBALS"]    = inTaskData;
1759 
1760         const auto meshStr = meshTemplate.specialize(replacements);
1761 
1762         programCollection.glslSources.add("task") << glu::TaskSource(task.str());
1763         programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr);
1764     }
1765     else
1766     {
1767         std::map<std::string, std::string> replacements;
1768         replacements["LOCAL_SIZE"] = std::to_string(kLocalInvocations);
1769         replacements["BODY"]       = verification.str();
1770         replacements["GLOBALS"]    = sharedDecl;
1771 
1772         const auto meshStr = meshTemplate.specialize(replacements);
1773 
1774         programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr);
1775     }
1776 }
1777 
1778 class CustomAttributesCase : public MeshShaderMiscCase
1779 {
1780 public:
CustomAttributesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1781     CustomAttributesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1782         : MeshShaderMiscCase(testCtx, name, std::move(params))
1783     {
1784     }
~CustomAttributesCase(void)1785     virtual ~CustomAttributesCase(void)
1786     {
1787     }
1788 
1789     TestInstance *createInstance(Context &context) const override;
1790     void checkSupport(Context &context) const override;
1791     void initPrograms(vk::SourceCollections &programCollection) const override;
1792 };
1793 
1794 class CustomAttributesInstance : public MeshShaderMiscInstance
1795 {
1796 public:
CustomAttributesInstance(Context & context,const MiscTestParams * params)1797     CustomAttributesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
1798     {
1799     }
~CustomAttributesInstance(void)1800     virtual ~CustomAttributesInstance(void)
1801     {
1802     }
1803 
1804     void generateReferenceLevel() override;
1805     tcu::TestStatus iterate(void) override;
1806 };
1807 
createInstance(Context & context) const1808 TestInstance *CustomAttributesCase::createInstance(Context &context) const
1809 {
1810     return new CustomAttributesInstance(context, m_params.get());
1811 }
1812 
checkSupport(Context & context) const1813 void CustomAttributesCase::checkSupport(Context &context) const
1814 {
1815     MeshShaderMiscCase::checkSupport(context);
1816 
1817     context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_MULTI_VIEWPORT);
1818     context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_CLIP_DISTANCE);
1819 }
1820 
initPrograms(vk::SourceCollections & programCollection) const1821 void CustomAttributesCase::initPrograms(vk::SourceCollections &programCollection) const
1822 {
1823     std::ostringstream frag;
1824     frag << "#version 450\n"
1825          << "#extension GL_NV_mesh_shader : enable\n"
1826          << "\n"
1827          << "layout (location=0) in vec4 customAttribute1;\n"
1828          << "layout (location=1) in flat float customAttribute2;\n"
1829          << "layout (location=2) in flat int customAttribute3;\n"
1830          << "\n"
1831          << "layout (location=3) in perprimitiveNV flat uvec4 customAttribute4;\n"
1832          << "layout (location=4) in perprimitiveNV float customAttribute5;\n"
1833          << "\n"
1834          << "layout (location=0) out vec4 outColor;\n"
1835          << "\n"
1836          << "void main ()\n"
1837          << "{\n"
1838          << "    bool goodPrimitiveID = (gl_PrimitiveID == 1000 || gl_PrimitiveID == 1001);\n"
1839          << "    bool goodViewportIndex = (gl_ViewportIndex == 1);\n"
1840          << "    bool goodCustom1 = (customAttribute1.x >= 0.25 && customAttribute1.x <= 0.5 &&\n"
1841          << "                        customAttribute1.y >= 0.5  && customAttribute1.y <= 1.0 &&\n"
1842          << "                        customAttribute1.z >= 10.0 && customAttribute1.z <= 20.0 &&\n"
1843          << "                        customAttribute1.w == 3.0);\n"
1844          << "    bool goodCustom2 = (customAttribute2 == 1.0 || customAttribute2 == 2.0);\n"
1845          << "    bool goodCustom3 = (customAttribute3 == 3 || customAttribute3 == 4);\n"
1846          << "    bool goodCustom4 = ((gl_PrimitiveID == 1000 && customAttribute4 == uvec4(100, 101, 102, 103)) ||\n"
1847          << "                        (gl_PrimitiveID == 1001 && customAttribute4 == uvec4(200, 201, 202, 203)));\n"
1848          << "    bool goodCustom5 = ((gl_PrimitiveID == 1000 && customAttribute5 == 6.0) ||\n"
1849          << "                        (gl_PrimitiveID == 1001 && customAttribute5 == 7.0));\n"
1850          << "    \n"
1851          << "    if (goodPrimitiveID && goodViewportIndex && goodCustom1 && goodCustom2 && goodCustom3 && goodCustom4 "
1852             "&& goodCustom5) {\n"
1853          << "        outColor = vec4(0.0, 0.0, 1.0, 1.0);\n"
1854          << "    } else {\n"
1855          << "        outColor = vec4(0.0, 0.0, 0.0, 1.0);\n"
1856          << "    }\n"
1857          << "}\n";
1858     programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
1859 
1860     std::ostringstream pvdDataDeclStream;
1861     pvdDataDeclStream << "    vec4 positions[4];\n"
1862                       << "    float pointSizes[4];\n"
1863                       << "    float clipDistances[4];\n"
1864                       << "    vec4 custom1[4];\n"
1865                       << "    float custom2[4];\n"
1866                       << "    int custom3[4];\n";
1867     const auto pvdDataDecl = pvdDataDeclStream.str();
1868 
1869     std::ostringstream ppdDataDeclStream;
1870     ppdDataDeclStream << "    int primitiveIds[2];\n"
1871                       << "    int viewportIndices[2];\n"
1872                       << "    uvec4 custom4[2];\n"
1873                       << "    float custom5[2];\n";
1874     const auto ppdDataDecl = ppdDataDeclStream.str();
1875 
1876     std::ostringstream bindingsDeclStream;
1877     bindingsDeclStream << "layout (set=0, binding=0, std430) buffer PerVertexData {\n"
1878                        << pvdDataDecl << "} pvd;\n"
1879                        << "layout (set=0, binding=1) uniform PerPrimitiveData {\n"
1880                        << ppdDataDecl << "} ppd;\n"
1881                        << "\n";
1882     const auto bindingsDecl = bindingsDeclStream.str();
1883 
1884     std::ostringstream taskDataStream;
1885     taskDataStream << "taskNV TaskData {\n"
1886                    << pvdDataDecl << ppdDataDecl << "} td;\n"
1887                    << "\n";
1888     const auto taskDataDecl = taskDataStream.str();
1889 
1890     const auto taskShader = m_params->needsTaskShader();
1891 
1892     const auto meshPvdPrefix = (taskShader ? "td" : "pvd");
1893     const auto meshPpdPrefix = (taskShader ? "td" : "ppd");
1894 
1895     std::ostringstream mesh;
1896     mesh << "#version 450\n"
1897          << "#extension GL_NV_mesh_shader : enable\n"
1898          << "\n"
1899          << "layout (local_size_x=1) in;\n"
1900          << "layout (max_primitives=2, max_vertices=4) out;\n"
1901          << "layout (triangles) out;\n"
1902          << "\n"
1903          << "out gl_MeshPerVertexNV {\n"
1904          << "    vec4  gl_Position;\n"
1905          << "    float gl_PointSize;\n"
1906          << "    float gl_ClipDistance[1];\n"
1907          << "} gl_MeshVerticesNV[];\n"
1908          << "\n"
1909          << "layout (location=0) out vec4 customAttribute1[];\n"
1910          << "layout (location=1) out flat float customAttribute2[];\n"
1911          << "layout (location=2) out int customAttribute3[];\n"
1912          << "\n"
1913          << "layout (location=3) out perprimitiveNV uvec4 customAttribute4[];\n"
1914          << "layout (location=4) out perprimitiveNV float customAttribute5[];\n"
1915          << "\n"
1916          << "out perprimitiveNV gl_MeshPerPrimitiveNV {\n"
1917          << "  int gl_PrimitiveID;\n"
1918          << "  int gl_ViewportIndex;\n"
1919          << "} gl_MeshPrimitivesNV[];\n"
1920          << "\n"
1921          << (taskShader ? "in " + taskDataDecl : bindingsDecl) << "void main ()\n"
1922          << "{\n"
1923          << "    gl_PrimitiveCountNV = 2u;\n"
1924          << "\n"
1925          << "    gl_MeshVerticesNV[0].gl_Position = " << meshPvdPrefix
1926          << ".positions[0]; //vec4(-1.0, -1.0, 0.0, 1.0)\n"
1927          << "    gl_MeshVerticesNV[1].gl_Position = " << meshPvdPrefix
1928          << ".positions[1]; //vec4( 1.0, -1.0, 0.0, 1.0)\n"
1929          << "    gl_MeshVerticesNV[2].gl_Position = " << meshPvdPrefix
1930          << ".positions[2]; //vec4(-1.0,  1.0, 0.0, 1.0)\n"
1931          << "    gl_MeshVerticesNV[3].gl_Position = " << meshPvdPrefix
1932          << ".positions[3]; //vec4( 1.0,  1.0, 0.0, 1.0)\n"
1933          << "\n"
1934          << "    gl_MeshVerticesNV[0].gl_PointSize = " << meshPvdPrefix << ".pointSizes[0]; //1.0\n"
1935          << "    gl_MeshVerticesNV[1].gl_PointSize = " << meshPvdPrefix << ".pointSizes[1]; //1.0\n"
1936          << "    gl_MeshVerticesNV[2].gl_PointSize = " << meshPvdPrefix << ".pointSizes[2]; //1.0\n"
1937          << "    gl_MeshVerticesNV[3].gl_PointSize = " << meshPvdPrefix << ".pointSizes[3]; //1.0\n"
1938          << "\n"
1939          << "    // Remove geometry on the right side.\n"
1940          << "    gl_MeshVerticesNV[0].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[0]; // 1.0\n"
1941          << "    gl_MeshVerticesNV[1].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[1]; //-1.0\n"
1942          << "    gl_MeshVerticesNV[2].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[2]; // 1.0\n"
1943          << "    gl_MeshVerticesNV[3].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[3]; //-1.0\n"
1944          << "    \n"
1945          << "    gl_PrimitiveIndicesNV[0] = 0;\n"
1946          << "    gl_PrimitiveIndicesNV[1] = 2;\n"
1947          << "    gl_PrimitiveIndicesNV[2] = 1;\n"
1948          << "\n"
1949          << "    gl_PrimitiveIndicesNV[3] = 2;\n"
1950          << "    gl_PrimitiveIndicesNV[4] = 3;\n"
1951          << "    gl_PrimitiveIndicesNV[5] = 1;\n"
1952          << "\n"
1953          << "    gl_MeshPrimitivesNV[0].gl_PrimitiveID = " << meshPpdPrefix << ".primitiveIds[0]; //1000\n"
1954          << "    gl_MeshPrimitivesNV[1].gl_PrimitiveID = " << meshPpdPrefix << ".primitiveIds[1]; //1001\n"
1955          << "\n"
1956          << "    gl_MeshPrimitivesNV[0].gl_ViewportIndex = " << meshPpdPrefix << ".viewportIndices[0]; //1\n"
1957          << "    gl_MeshPrimitivesNV[1].gl_ViewportIndex = " << meshPpdPrefix << ".viewportIndices[1]; //1\n"
1958          << "\n"
1959          << "    // Custom per-vertex attributes\n"
1960          << "    customAttribute1[0] = " << meshPvdPrefix << ".custom1[0]; //vec4(0.25, 0.5, 10.0, 3.0)\n"
1961          << "    customAttribute1[1] = " << meshPvdPrefix << ".custom1[1]; //vec4(0.25, 1.0, 20.0, 3.0)\n"
1962          << "    customAttribute1[2] = " << meshPvdPrefix << ".custom1[2]; //vec4( 0.5, 0.5, 20.0, 3.0)\n"
1963          << "    customAttribute1[3] = " << meshPvdPrefix << ".custom1[3]; //vec4( 0.5, 1.0, 10.0, 3.0)\n"
1964          << "\n"
1965          << "    customAttribute2[0] = " << meshPvdPrefix << ".custom2[0]; //1.0f\n"
1966          << "    customAttribute2[1] = " << meshPvdPrefix << ".custom2[1]; //1.0f\n"
1967          << "    customAttribute2[2] = " << meshPvdPrefix << ".custom2[2]; //2.0f\n"
1968          << "    customAttribute2[3] = " << meshPvdPrefix << ".custom2[3]; //2.0f\n"
1969          << "\n"
1970          << "    customAttribute3[0] = " << meshPvdPrefix << ".custom3[0]; //3\n"
1971          << "    customAttribute3[1] = " << meshPvdPrefix << ".custom3[1]; //3\n"
1972          << "    customAttribute3[2] = " << meshPvdPrefix << ".custom3[2]; //4\n"
1973          << "    customAttribute3[3] = " << meshPvdPrefix << ".custom3[3]; //4\n"
1974          << "\n"
1975          << "    // Custom per-primitive attributes.\n"
1976          << "    customAttribute4[0] = " << meshPpdPrefix << ".custom4[0]; //uvec4(100, 101, 102, 103)\n"
1977          << "    customAttribute4[1] = " << meshPpdPrefix << ".custom4[1]; //uvec4(200, 201, 202, 203)\n"
1978          << "\n"
1979          << "    customAttribute5[0] = " << meshPpdPrefix << ".custom5[0]; //6.0\n"
1980          << "    customAttribute5[1] = " << meshPpdPrefix << ".custom5[1]; //7.0\n"
1981          << "}\n";
1982     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
1983 
1984     if (taskShader)
1985     {
1986         std::ostringstream task;
1987         task << "#version 450\n"
1988              << "#extension GL_NV_mesh_shader : enable\n"
1989              << "\n"
1990              << "out " << taskDataDecl << bindingsDecl << "void main ()\n"
1991              << "{\n"
1992              << "    gl_TaskCountNV = " << m_params->meshCount << ";\n"
1993              << "\n"
1994              << "    td.positions[0] = pvd.positions[0];\n"
1995              << "    td.positions[1] = pvd.positions[1];\n"
1996              << "    td.positions[2] = pvd.positions[2];\n"
1997              << "    td.positions[3] = pvd.positions[3];\n"
1998              << "\n"
1999              << "    td.pointSizes[0] = pvd.pointSizes[0];\n"
2000              << "    td.pointSizes[1] = pvd.pointSizes[1];\n"
2001              << "    td.pointSizes[2] = pvd.pointSizes[2];\n"
2002              << "    td.pointSizes[3] = pvd.pointSizes[3];\n"
2003              << "\n"
2004              << "    td.clipDistances[0] = pvd.clipDistances[0];\n"
2005              << "    td.clipDistances[1] = pvd.clipDistances[1];\n"
2006              << "    td.clipDistances[2] = pvd.clipDistances[2];\n"
2007              << "    td.clipDistances[3] = pvd.clipDistances[3];\n"
2008              << "\n"
2009              << "    td.custom1[0] = pvd.custom1[0];\n"
2010              << "    td.custom1[1] = pvd.custom1[1];\n"
2011              << "    td.custom1[2] = pvd.custom1[2];\n"
2012              << "    td.custom1[3] = pvd.custom1[3];\n"
2013              << "\n"
2014              << "    td.custom2[0] = pvd.custom2[0];\n"
2015              << "    td.custom2[1] = pvd.custom2[1];\n"
2016              << "    td.custom2[2] = pvd.custom2[2];\n"
2017              << "    td.custom2[3] = pvd.custom2[3];\n"
2018              << "\n"
2019              << "    td.custom3[0] = pvd.custom3[0];\n"
2020              << "    td.custom3[1] = pvd.custom3[1];\n"
2021              << "    td.custom3[2] = pvd.custom3[2];\n"
2022              << "    td.custom3[3] = pvd.custom3[3];\n"
2023              << "\n"
2024              << "    td.primitiveIds[0] = ppd.primitiveIds[0];\n"
2025              << "    td.primitiveIds[1] = ppd.primitiveIds[1];\n"
2026              << "\n"
2027              << "    td.viewportIndices[0] = ppd.viewportIndices[0];\n"
2028              << "    td.viewportIndices[1] = ppd.viewportIndices[1];\n"
2029              << "\n"
2030              << "    td.custom4[0] = ppd.custom4[0];\n"
2031              << "    td.custom4[1] = ppd.custom4[1];\n"
2032              << "\n"
2033              << "    td.custom5[0] = ppd.custom5[0];\n"
2034              << "    td.custom5[1] = ppd.custom5[1];\n"
2035              << "}\n";
2036         programCollection.glslSources.add("task") << glu::TaskSource(task.str());
2037     }
2038 }
2039 
generateReferenceLevel()2040 void CustomAttributesInstance::generateReferenceLevel()
2041 {
2042     const auto format    = getOutputFormat();
2043     const auto tcuFormat = mapVkFormat(format);
2044 
2045     const auto iWidth  = static_cast<int>(m_params->width);
2046     const auto iHeight = static_cast<int>(m_params->height);
2047 
2048     const auto halfWidth  = iWidth / 2;
2049     const auto halfHeight = iHeight / 2;
2050 
2051     m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
2052 
2053     const auto access     = m_referenceLevel->getAccess();
2054     const auto clearColor = tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f);
2055     const auto blueColor  = tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f);
2056 
2057     tcu::clear(access, clearColor);
2058 
2059     // Fill the top left quarter.
2060     for (int y = 0; y < halfWidth; ++y)
2061         for (int x = 0; x < halfHeight; ++x)
2062         {
2063             access.setPixel(blueColor, x, y);
2064         }
2065 }
2066 
iterate()2067 tcu::TestStatus CustomAttributesInstance::iterate()
2068 {
2069     struct PerVertexData
2070     {
2071         tcu::Vec4 positions[4];
2072         float pointSizes[4];
2073         float clipDistances[4];
2074         tcu::Vec4 custom1[4];
2075         float custom2[4];
2076         int32_t custom3[4];
2077     };
2078 
2079     struct PerPrimitiveData
2080     {
2081         // Note some of these are declared as vectors to match the std140 layout.
2082         tcu::IVec4 primitiveIds[2];
2083         tcu::IVec4 viewportIndices[2];
2084         tcu::UVec4 custom4[2];
2085         tcu::Vec4 custom5[2];
2086     };
2087 
2088     const auto &vkd       = m_context.getDeviceInterface();
2089     const auto device     = m_context.getDevice();
2090     auto &alloc           = m_context.getDefaultAllocator();
2091     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
2092     const auto queue      = m_context.getUniversalQueue();
2093 
2094     const auto imageFormat = getOutputFormat();
2095     const auto tcuFormat   = mapVkFormat(imageFormat);
2096     const auto imageExtent = makeExtent3D(m_params->width, m_params->height, 1u);
2097     const auto imageUsage  = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2098 
2099     const auto &binaries = m_context.getBinaryCollection();
2100     const auto hasTask   = binaries.contains("task");
2101     const auto bufStages = (hasTask ? VK_SHADER_STAGE_TASK_BIT_NV : VK_SHADER_STAGE_MESH_BIT_NV);
2102 
2103     const VkImageCreateInfo colorBufferInfo = {
2104         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
2105         nullptr,                             // const void* pNext;
2106         0u,                                  // VkImageCreateFlags flags;
2107         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
2108         imageFormat,                         // VkFormat format;
2109         imageExtent,                         // VkExtent3D extent;
2110         1u,                                  // uint32_t mipLevels;
2111         1u,                                  // uint32_t arrayLayers;
2112         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
2113         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
2114         imageUsage,                          // VkImageUsageFlags usage;
2115         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
2116         0u,                                  // uint32_t queueFamilyIndexCount;
2117         nullptr,                             // const uint32_t* pQueueFamilyIndices;
2118         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
2119     };
2120 
2121     // Create color image and view.
2122     ImageWithMemory colorImage(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
2123     const auto colorSRR  = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2124     const auto colorSRL  = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
2125     const auto colorView = makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
2126 
2127     // Create a memory buffer for verification.
2128     const auto verificationBufferSize =
2129         static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
2130     const auto verificationBufferUsage = (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2131     const auto verificationBufferInfo  = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
2132 
2133     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
2134     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
2135     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
2136 
2137     // This needs to match what the fragment shader will expect.
2138     const PerVertexData perVertexData = {
2139         // tcu::Vec4 positions[4];
2140         {
2141             tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
2142             tcu::Vec4(1.0f, -1.0f, 0.0f, 1.0f),
2143             tcu::Vec4(-1.0f, 1.0f, 0.0f, 1.0f),
2144             tcu::Vec4(1.0f, 1.0f, 0.0f, 1.0f),
2145         },
2146         // float pointSizes[4];
2147         {
2148             1.0f,
2149             1.0f,
2150             1.0f,
2151             1.0f,
2152         },
2153         // float clipDistances[4];
2154         {
2155             1.0f,
2156             -1.0f,
2157             1.0f,
2158             -1.0f,
2159         },
2160         // tcu::Vec4 custom1[4];
2161         {
2162             tcu::Vec4(0.25, 0.5, 10.0, 3.0),
2163             tcu::Vec4(0.25, 1.0, 20.0, 3.0),
2164             tcu::Vec4(0.5, 0.5, 20.0, 3.0),
2165             tcu::Vec4(0.5, 1.0, 10.0, 3.0),
2166         },
2167         // float custom2[4];
2168         {
2169             1.0f,
2170             1.0f,
2171             2.0f,
2172             2.0f,
2173         },
2174         // int32_t custom3[4];
2175         {3, 3, 4, 4},
2176     };
2177 
2178     // This needs to match what the fragment shader will expect. Reminder: some of these are declared as gvec4 to match the std140
2179     // layout, but only the first component is actually used.
2180     const PerPrimitiveData perPrimitiveData = {
2181         // int primitiveIds[2];
2182         {
2183             tcu::IVec4(1000, 0, 0, 0),
2184             tcu::IVec4(1001, 0, 0, 0),
2185         },
2186         // int viewportIndices[2];
2187         {
2188             tcu::IVec4(1, 0, 0, 0),
2189             tcu::IVec4(1, 0, 0, 0),
2190         },
2191         // uvec4 custom4[2];
2192         {
2193             tcu::UVec4(100u, 101u, 102u, 103u),
2194             tcu::UVec4(200u, 201u, 202u, 203u),
2195         },
2196         // float custom5[2];
2197         {
2198             tcu::Vec4(6.0f, 0.0f, 0.0f, 0.0f),
2199             tcu::Vec4(7.0f, 0.0f, 0.0f, 0.0f),
2200         },
2201     };
2202 
2203     // Create and fill buffers with this data.
2204     const auto pvdSize = static_cast<VkDeviceSize>(sizeof(perVertexData));
2205     const auto pvdInfo = makeBufferCreateInfo(pvdSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
2206     BufferWithMemory pvdData(vkd, device, alloc, pvdInfo, MemoryRequirement::HostVisible);
2207     auto &pvdAlloc = pvdData.getAllocation();
2208     void *pvdPtr   = pvdAlloc.getHostPtr();
2209 
2210     const auto ppdSize = static_cast<VkDeviceSize>(sizeof(perPrimitiveData));
2211     const auto ppdInfo = makeBufferCreateInfo(ppdSize, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
2212     BufferWithMemory ppdData(vkd, device, alloc, ppdInfo, MemoryRequirement::HostVisible);
2213     auto &ppdAlloc = ppdData.getAllocation();
2214     void *ppdPtr   = ppdAlloc.getHostPtr();
2215 
2216     deMemcpy(pvdPtr, &perVertexData, sizeof(perVertexData));
2217     deMemcpy(ppdPtr, &perPrimitiveData, sizeof(perPrimitiveData));
2218 
2219     flushAlloc(vkd, device, pvdAlloc);
2220     flushAlloc(vkd, device, ppdAlloc);
2221 
2222     // Descriptor set layout.
2223     DescriptorSetLayoutBuilder setLayoutBuilder;
2224     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bufStages);
2225     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, bufStages);
2226     const auto setLayout = setLayoutBuilder.build(vkd, device);
2227 
2228     // Create and update descriptor set.
2229     DescriptorPoolBuilder descriptorPoolBuilder;
2230     descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2231     descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
2232     const auto descriptorPool =
2233         descriptorPoolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2234     const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
2235 
2236     DescriptorSetUpdateBuilder updateBuilder;
2237     const auto storageBufferInfo = makeDescriptorBufferInfo(pvdData.get(), 0ull, pvdSize);
2238     const auto uniformBufferInfo = makeDescriptorBufferInfo(ppdData.get(), 0ull, ppdSize);
2239     updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
2240                               VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &storageBufferInfo);
2241     updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
2242                               VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferInfo);
2243     updateBuilder.update(vkd, device);
2244 
2245     // Pipeline layout.
2246     const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
2247 
2248     // Shader modules.
2249     const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
2250     const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
2251 
2252     Move<VkShaderModule> taskShader;
2253     if (hasTask)
2254         taskShader = createShaderModule(vkd, device, binaries.get("task"));
2255 
2256     // Render pass.
2257     const auto renderPass = makeRenderPass(vkd, device, imageFormat);
2258 
2259     // Framebuffer.
2260     const auto framebuffer =
2261         makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
2262 
2263     // Viewport and scissor.
2264     const auto topHalf = makeViewport(imageExtent.width, imageExtent.height / 2u);
2265     const std::vector<VkViewport> viewports{makeViewport(imageExtent), topHalf};
2266     const std::vector<VkRect2D> scissors(2u, makeRect2D(imageExtent));
2267 
2268     const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(),
2269                                                fragShader.get(), renderPass.get(), viewports, scissors);
2270 
2271     // Command pool and buffer.
2272     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
2273     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2274     const auto cmdBuffer    = cmdBufferPtr.get();
2275 
2276     beginCommandBuffer(vkd, cmdBuffer);
2277 
2278     // Run pipeline.
2279     const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 0.0f);
2280     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
2281     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
2282     vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u,
2283                               &descriptorSet.get(), 0u, nullptr);
2284     vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params->drawCount(), 0u);
2285     endRenderPass(vkd, cmdBuffer);
2286 
2287     // Copy color buffer to verification buffer.
2288     const auto colorAccess   = (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
2289     const auto transferRead  = VK_ACCESS_TRANSFER_READ_BIT;
2290     const auto transferWrite = VK_ACCESS_TRANSFER_WRITE_BIT;
2291     const auto hostRead      = VK_ACCESS_HOST_READ_BIT;
2292 
2293     const auto preCopyBarrier =
2294         makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2295                                VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
2296     const auto postCopyBarrier = makeMemoryBarrier(transferWrite, hostRead);
2297     const auto copyRegion      = makeBufferImageCopy(imageExtent, colorSRL);
2298 
2299     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
2300                            0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
2301     vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2302                              verificationBuffer.get(), 1u, &copyRegion);
2303     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
2304                            &postCopyBarrier, 0u, nullptr, 0u, nullptr);
2305 
2306     endCommandBuffer(vkd, cmdBuffer);
2307     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2308 
2309     // Generate reference image and compare results.
2310     const tcu::IVec3 iExtent(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
2311     const tcu::ConstPixelBufferAccess verificationAccess(tcuFormat, iExtent, verificationBufferData);
2312 
2313     generateReferenceLevel();
2314     invalidateAlloc(vkd, device, verificationBufferAlloc);
2315     if (!verifyResult(verificationAccess))
2316         TCU_FAIL("Result does not match reference; check log for details");
2317 
2318     return tcu::TestStatus::pass("Pass");
2319 }
2320 
2321 // Tests that use push constants in the new stages.
2322 class PushConstantCase : public MeshShaderMiscCase
2323 {
2324 public:
PushConstantCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)2325     PushConstantCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
2326         : MeshShaderMiscCase(testCtx, name, std::move(params))
2327     {
2328     }
2329 
2330     void initPrograms(vk::SourceCollections &programCollection) const override;
2331     TestInstance *createInstance(Context &context) const override;
2332 };
2333 
2334 class PushConstantInstance : public MeshShaderMiscInstance
2335 {
2336 public:
PushConstantInstance(Context & context,const MiscTestParams * params)2337     PushConstantInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
2338     {
2339     }
2340 
2341     void generateReferenceLevel() override;
2342     tcu::TestStatus iterate() override;
2343 };
2344 
createInstance(Context & context) const2345 TestInstance *PushConstantCase::createInstance(Context &context) const
2346 {
2347     return new PushConstantInstance(context, m_params.get());
2348 }
2349 
generateReferenceLevel()2350 void PushConstantInstance::generateReferenceLevel()
2351 {
2352     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
2353 }
2354 
initPrograms(vk::SourceCollections & programCollection) const2355 void PushConstantCase::initPrograms(vk::SourceCollections &programCollection) const
2356 {
2357     const auto useTaskShader = m_params->needsTaskShader();
2358     const auto pcNumFloats   = (useTaskShader ? 2u : 4u);
2359 
2360     std::ostringstream pushConstantStream;
2361     pushConstantStream << "layout (push_constant, std430) uniform PushConstantBlock {\n"
2362                        << "    layout (offset=${PCOFFSET}) float values[" << pcNumFloats << "];\n"
2363                        << "} pc;\n"
2364                        << "\n";
2365     const tcu::StringTemplate pushConstantsTemplate(pushConstantStream.str());
2366     using TemplateMap = std::map<std::string, std::string>;
2367 
2368     std::ostringstream taskDataStream;
2369     taskDataStream << "taskNV TaskData {\n"
2370                    << "    float values[2];\n"
2371                    << "} td;\n"
2372                    << "\n";
2373     const auto taskDataDecl = taskDataStream.str();
2374 
2375     if (useTaskShader)
2376     {
2377         TemplateMap taskMap;
2378         taskMap["PCOFFSET"] = std::to_string(2u * sizeof(float));
2379 
2380         std::ostringstream task;
2381         task << "#version 450\n"
2382              << "#extension GL_NV_mesh_shader : enable\n"
2383              << "\n"
2384              << "layout(local_size_x=1) in;\n"
2385              << "\n"
2386              << "out " << taskDataDecl << pushConstantsTemplate.specialize(taskMap) << "void main ()\n"
2387              << "{\n"
2388              << "    gl_TaskCountNV = " << m_params->meshCount << ";\n"
2389              << "\n"
2390              << "    td.values[0] = pc.values[0];\n"
2391              << "    td.values[1] = pc.values[1];\n"
2392              << "}\n";
2393         programCollection.glslSources.add("task") << glu::TaskSource(task.str());
2394     }
2395 
2396     {
2397         const std::string blue  = (useTaskShader ? "td.values[0] + pc.values[0]" : "pc.values[0] + pc.values[2]");
2398         const std::string alpha = (useTaskShader ? "td.values[1] + pc.values[1]" : "pc.values[1] + pc.values[3]");
2399 
2400         TemplateMap meshMap;
2401         meshMap["PCOFFSET"] = "0";
2402 
2403         std::ostringstream mesh;
2404         mesh << "#version 450\n"
2405              << "#extension GL_NV_mesh_shader : enable\n"
2406              << "\n"
2407              << "layout(local_size_x=1) in;\n"
2408              << "layout(triangles) out;\n"
2409              << "layout(max_vertices=3, max_primitives=1) out;\n"
2410              << "\n"
2411              << "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
2412              << "\n"
2413              << pushConstantsTemplate.specialize(meshMap) << (useTaskShader ? "in " + taskDataDecl : "")
2414              << "void main ()\n"
2415              << "{\n"
2416              << "    gl_PrimitiveCountNV = 1;\n"
2417              << "\n"
2418              << "    gl_MeshVerticesNV[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
2419              << "    gl_MeshVerticesNV[1].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
2420              << "    gl_MeshVerticesNV[2].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
2421              << "\n"
2422              << "    gl_PrimitiveIndicesNV[0] = 0;\n"
2423              << "    gl_PrimitiveIndicesNV[1] = 1;\n"
2424              << "    gl_PrimitiveIndicesNV[2] = 2;\n"
2425              << "\n"
2426              << "    triangleColor[0] = vec4(0.0, 0.0, " << blue << ", " << alpha << ");\n"
2427              << "}\n";
2428         programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
2429     }
2430 
2431     // Add default fragment shader.
2432     MeshShaderMiscCase::initPrograms(programCollection);
2433 }
2434 
iterate()2435 tcu::TestStatus PushConstantInstance::iterate()
2436 {
2437     const auto &vkd       = m_context.getDeviceInterface();
2438     const auto device     = m_context.getDevice();
2439     auto &alloc           = m_context.getDefaultAllocator();
2440     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
2441     const auto queue      = m_context.getUniversalQueue();
2442 
2443     const auto imageFormat = getOutputFormat();
2444     const auto tcuFormat   = mapVkFormat(imageFormat);
2445     const auto imageExtent = makeExtent3D(m_params->width, m_params->height, 1u);
2446     const auto imageUsage  = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2447 
2448     const auto &binaries = m_context.getBinaryCollection();
2449     const auto hasTask   = binaries.contains("task");
2450 
2451     const VkImageCreateInfo colorBufferInfo = {
2452         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
2453         nullptr,                             // const void* pNext;
2454         0u,                                  // VkImageCreateFlags flags;
2455         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
2456         imageFormat,                         // VkFormat format;
2457         imageExtent,                         // VkExtent3D extent;
2458         1u,                                  // uint32_t mipLevels;
2459         1u,                                  // uint32_t arrayLayers;
2460         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
2461         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
2462         imageUsage,                          // VkImageUsageFlags usage;
2463         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
2464         0u,                                  // uint32_t queueFamilyIndexCount;
2465         nullptr,                             // const uint32_t* pQueueFamilyIndices;
2466         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
2467     };
2468 
2469     // Create color image and view.
2470     ImageWithMemory colorImage(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
2471     const auto colorSRR  = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2472     const auto colorSRL  = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
2473     const auto colorView = makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
2474 
2475     // Create a memory buffer for verification.
2476     const auto verificationBufferSize =
2477         static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
2478     const auto verificationBufferUsage = (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2479     const auto verificationBufferInfo  = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
2480 
2481     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
2482     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
2483     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
2484 
2485     // Push constant ranges.
2486     std::vector<float> pcData{0.25f, 0.25f, 0.75f, 0.75f};
2487     const auto pcSize     = static_cast<uint32_t>(de::dataSize(pcData));
2488     const auto pcHalfSize = pcSize / 2u;
2489 
2490     std::vector<VkPushConstantRange> pcRanges;
2491     if (hasTask)
2492     {
2493         pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_MESH_BIT_NV, 0u, pcHalfSize));
2494         pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_TASK_BIT_NV, pcHalfSize, pcHalfSize));
2495     }
2496     else
2497     {
2498         pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_MESH_BIT_NV, 0u, pcSize));
2499     }
2500 
2501     // Pipeline layout.
2502     const auto pipelineLayout =
2503         makePipelineLayout(vkd, device, 0u, nullptr, static_cast<uint32_t>(pcRanges.size()), de::dataOrNull(pcRanges));
2504 
2505     // Shader modules.
2506     const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
2507     const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
2508 
2509     Move<VkShaderModule> taskShader;
2510     if (hasTask)
2511         taskShader = createShaderModule(vkd, device, binaries.get("task"));
2512 
2513     // Render pass.
2514     const auto renderPass = makeRenderPass(vkd, device, imageFormat);
2515 
2516     // Framebuffer.
2517     const auto framebuffer =
2518         makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
2519 
2520     // Viewport and scissor.
2521     const std::vector<VkViewport> viewports(1u, makeViewport(imageExtent));
2522     const std::vector<VkRect2D> scissors(1u, makeRect2D(imageExtent));
2523 
2524     const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(),
2525                                                fragShader.get(), renderPass.get(), viewports, scissors);
2526 
2527     // Command pool and buffer.
2528     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
2529     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2530     const auto cmdBuffer    = cmdBufferPtr.get();
2531 
2532     beginCommandBuffer(vkd, cmdBuffer);
2533 
2534     // Run pipeline.
2535     const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 0.0f);
2536     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
2537     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
2538     for (const auto &range : pcRanges)
2539         vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), range.stageFlags, range.offset, range.size,
2540                              reinterpret_cast<const char *>(pcData.data()) + range.offset);
2541     vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params->drawCount(), 0u);
2542     endRenderPass(vkd, cmdBuffer);
2543 
2544     // Copy color buffer to verification buffer.
2545     const auto colorAccess   = (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
2546     const auto transferRead  = VK_ACCESS_TRANSFER_READ_BIT;
2547     const auto transferWrite = VK_ACCESS_TRANSFER_WRITE_BIT;
2548     const auto hostRead      = VK_ACCESS_HOST_READ_BIT;
2549 
2550     const auto preCopyBarrier =
2551         makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2552                                VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
2553     const auto postCopyBarrier = makeMemoryBarrier(transferWrite, hostRead);
2554     const auto copyRegion      = makeBufferImageCopy(imageExtent, colorSRL);
2555 
2556     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
2557                            0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
2558     vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2559                              verificationBuffer.get(), 1u, &copyRegion);
2560     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
2561                            &postCopyBarrier, 0u, nullptr, 0u, nullptr);
2562 
2563     endCommandBuffer(vkd, cmdBuffer);
2564     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2565 
2566     // Generate reference image and compare results.
2567     const tcu::IVec3 iExtent(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
2568     const tcu::ConstPixelBufferAccess verificationAccess(tcuFormat, iExtent, verificationBufferData);
2569 
2570     generateReferenceLevel();
2571     invalidateAlloc(vkd, device, verificationBufferAlloc);
2572     if (!verifyResult(verificationAccess))
2573         TCU_FAIL("Result does not match reference; check log for details");
2574 
2575     return tcu::TestStatus::pass("Pass");
2576 }
2577 
2578 // Use large work group size, large number of vertices and large number of primitives.
2579 struct MaximizeThreadsParams : public MiscTestParams
2580 {
MaximizeThreadsParamsvkt::MeshShader::__anon5941fe3f0111::MaximizeThreadsParams2581     MaximizeThreadsParams(const tcu::Maybe<uint32_t> &taskCount_, uint32_t meshCount_, uint32_t width_,
2582                           uint32_t height_, uint32_t localSize_, uint32_t numVertices_, uint32_t numPrimitives_)
2583         : MiscTestParams(taskCount_, meshCount_, width_, height_)
2584         , localSize(localSize_)
2585         , numVertices(numVertices_)
2586         , numPrimitives(numPrimitives_)
2587     {
2588     }
2589 
2590     uint32_t localSize;
2591     uint32_t numVertices;
2592     uint32_t numPrimitives;
2593 
checkSupportvkt::MeshShader::__anon5941fe3f0111::MaximizeThreadsParams2594     void checkSupport(Context &context) const
2595     {
2596         const auto &properties = context.getMeshShaderProperties();
2597 
2598         if (localSize > properties.maxMeshWorkGroupSize[0])
2599             TCU_THROW(NotSupportedError, "Required local size not supported");
2600 
2601         if (numVertices > properties.maxMeshOutputVertices)
2602             TCU_THROW(NotSupportedError, "Required number of output vertices not supported");
2603 
2604         if (numPrimitives > properties.maxMeshOutputPrimitives)
2605             TCU_THROW(NotSupportedError, "Required number of output primitives not supported");
2606     }
2607 };
2608 
2609 // Focus on the number of primitives.
2610 class MaximizePrimitivesCase : public MeshShaderMiscCase
2611 {
2612 public:
MaximizePrimitivesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)2613     MaximizePrimitivesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
2614         : MeshShaderMiscCase(testCtx, name, std::move(params))
2615     {
2616         const auto mtParams = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2617         DE_ASSERT(mtParams);
2618         DE_UNREF(mtParams); // For release builds.
2619     }
2620 
2621     void initPrograms(vk::SourceCollections &programCollection) const override;
2622     void checkSupport(Context &context) const override;
2623     TestInstance *createInstance(Context &context) const override;
2624 };
2625 
2626 class MaximizePrimitivesInstance : public MeshShaderMiscInstance
2627 {
2628 public:
MaximizePrimitivesInstance(Context & context,const MiscTestParams * params)2629     MaximizePrimitivesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
2630     {
2631     }
2632 
2633     void generateReferenceLevel() override;
2634 };
2635 
createInstance(Context & context) const2636 TestInstance *MaximizePrimitivesCase::createInstance(Context &context) const
2637 {
2638     return new MaximizePrimitivesInstance(context, m_params.get());
2639 }
2640 
checkSupport(Context & context) const2641 void MaximizePrimitivesCase::checkSupport(Context &context) const
2642 {
2643     MeshShaderMiscCase::checkSupport(context);
2644 
2645     const auto params = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2646     params->checkSupport(context);
2647 }
2648 
initPrograms(vk::SourceCollections & programCollection) const2649 void MaximizePrimitivesCase::initPrograms(vk::SourceCollections &programCollection) const
2650 {
2651     const auto params = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2652 
2653     DE_ASSERT(!params->needsTaskShader());
2654     MeshShaderMiscCase::initPrograms(programCollection);
2655 
2656     // Idea behind the test: generate 128 vertices, 1 per each pixel in a 128x1 image. Then, use each vertex to generate two points,
2657     // adding the colors of each point using color blending to make sure every point is properly generated.
2658 
2659     DE_ASSERT(params->numPrimitives == params->numVertices * 2u);
2660     DE_ASSERT(params->numVertices == params->width);
2661 
2662     const auto verticesPerInvocation = params->numVertices / params->localSize;
2663     const auto primitivesPerVertex   = params->numPrimitives / params->numVertices;
2664 
2665     std::ostringstream mesh;
2666     mesh << "#version 450\n"
2667          << "#extension GL_NV_mesh_shader : enable\n"
2668          << "\n"
2669          << "layout(local_size_x=" << params->localSize << ") in;\n"
2670          << "layout(points) out;\n"
2671          << "layout(max_vertices=" << params->numVertices << ", max_primitives=" << params->numPrimitives << ") out;\n"
2672          << "\n"
2673          << "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
2674          << "\n"
2675          << "const uint verticesPerInvocation = " << verticesPerInvocation << ";\n"
2676          << "const uint primitivesPerVertex   = " << primitivesPerVertex << ";\n"
2677          << "\n"
2678          << "vec4 colors[primitivesPerVertex] = vec4[](\n"
2679          << "    vec4(0.0, 0.0, 1.0, 1.0),\n"
2680          << "    vec4(1.0, 0.0, 0.0, 1.0)\n"
2681          << ");\n"
2682          << "void main ()\n"
2683          << "{\n"
2684          << "    gl_PrimitiveCountNV = " << params->numPrimitives << ";\n"
2685          << "    const uint firstVertex = gl_LocalInvocationIndex * verticesPerInvocation;\n"
2686          << "    for (uint i = 0u; i < verticesPerInvocation; ++i)\n"
2687          << "    {\n"
2688          << "        const uint vertexNumber = firstVertex + i;\n"
2689          << "        const float xCoord = ((float(vertexNumber) + 0.5) / " << params->width << ".0) * 2.0 - 1.0;\n"
2690          << "        const float yCoord = 0.0;\n"
2691          << "        gl_MeshVerticesNV[vertexNumber].gl_Position = vec4(xCoord, yCoord, 0.0f, 1.0f);\n"
2692          << "        gl_MeshVerticesNV[vertexNumber].gl_PointSize = 1.0f;\n"
2693          << "        for (uint j = 0u; j < primitivesPerVertex; ++j)\n"
2694          << "        {\n"
2695          << "            const uint primitiveNumber = vertexNumber * primitivesPerVertex + j;\n"
2696          << "            gl_PrimitiveIndicesNV[primitiveNumber] = vertexNumber;\n"
2697          << "            pointColor[primitiveNumber] = colors[j];\n"
2698          << "        }\n"
2699          << "    }\n"
2700          << "}\n";
2701     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
2702 }
2703 
generateReferenceLevel()2704 void MaximizePrimitivesInstance::generateReferenceLevel()
2705 {
2706     generateSolidRefLevel(tcu::Vec4(1.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
2707 }
2708 
2709 // Focus on the number of vertices.
2710 class MaximizeVerticesCase : public MeshShaderMiscCase
2711 {
2712 public:
MaximizeVerticesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)2713     MaximizeVerticesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
2714         : MeshShaderMiscCase(testCtx, name, std::move(params))
2715     {
2716         const auto mtParams = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2717         DE_ASSERT(mtParams);
2718         DE_UNREF(mtParams); // For release builds.
2719     }
2720 
2721     void initPrograms(vk::SourceCollections &programCollection) const override;
2722     void checkSupport(Context &context) const override;
2723     TestInstance *createInstance(Context &context) const override;
2724 };
2725 
2726 class MaximizeVerticesInstance : public MeshShaderMiscInstance
2727 {
2728 public:
MaximizeVerticesInstance(Context & context,const MiscTestParams * params)2729     MaximizeVerticesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
2730     {
2731     }
2732 
2733     void generateReferenceLevel() override;
2734 };
2735 
createInstance(Context & context) const2736 TestInstance *MaximizeVerticesCase::createInstance(Context &context) const
2737 {
2738     return new MaximizeVerticesInstance(context, m_params.get());
2739 }
2740 
checkSupport(Context & context) const2741 void MaximizeVerticesCase::checkSupport(Context &context) const
2742 {
2743     MeshShaderMiscCase::checkSupport(context);
2744 
2745     const auto params = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2746     params->checkSupport(context);
2747 }
2748 
initPrograms(vk::SourceCollections & programCollection) const2749 void MaximizeVerticesCase::initPrograms(vk::SourceCollections &programCollection) const
2750 {
2751     const auto params = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2752 
2753     DE_ASSERT(!params->needsTaskShader());
2754     MeshShaderMiscCase::initPrograms(programCollection);
2755 
2756     // Idea behind the test: cover a framebuffer using a triangle quad per pixel (4 vertices, 2 triangles).
2757     DE_ASSERT(params->numVertices == params->numPrimitives * 2u);
2758     DE_ASSERT(params->numPrimitives == params->width * 2u);
2759 
2760     const auto pixelsPerInvocation     = params->width / params->localSize;
2761     const auto verticesPerPixel        = 4u;
2762     const auto primitivesPerPixel      = 2u;
2763     const auto verticesPerInvocation   = pixelsPerInvocation * verticesPerPixel;
2764     const auto primitivesPerInvocation = pixelsPerInvocation * primitivesPerPixel;
2765 
2766     std::ostringstream mesh;
2767     mesh << "#version 450\n"
2768          << "#extension GL_NV_mesh_shader : enable\n"
2769          << "\n"
2770          << "layout(local_size_x=" << params->localSize << ") in;\n"
2771          << "layout(triangles) out;\n"
2772          << "layout(max_vertices=" << params->numVertices << ", max_primitives=" << params->numPrimitives << ") out;\n"
2773          << "\n"
2774          << "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
2775          << "\n"
2776          << "const uint pixelsPerInvocation     = " << pixelsPerInvocation << ";\n"
2777          << "const uint verticesPerInvocation   = " << verticesPerInvocation << ";\n"
2778          << "const uint primitivesPerInvocation = " << primitivesPerInvocation << ";\n"
2779          << "const uint indicesPerInvocation    = primitivesPerInvocation * 3u;\n"
2780          << "const uint verticesPerPixel        = " << verticesPerPixel << ";\n"
2781          << "const uint primitivesPerPixel      = " << primitivesPerPixel << ";\n"
2782          << "const uint indicesPerPixel         = primitivesPerPixel * 3u;\n"
2783          << "\n"
2784          << "void main ()\n"
2785          << "{\n"
2786          << "    gl_PrimitiveCountNV = " << params->numPrimitives << ";\n"
2787          << "\n"
2788          << "    const uint firstPixel    = gl_LocalInvocationIndex * pixelsPerInvocation;\n"
2789          << "    const float pixelWidth   = 2.0 / float(" << params->width << ");\n"
2790          << "    const float quarterWidth = pixelWidth / 4.0;\n"
2791          << "\n"
2792          << "    for (uint pixelIdx = 0u; pixelIdx < pixelsPerInvocation; ++pixelIdx)\n"
2793          << "    {\n"
2794          << "        const uint pixelId      = firstPixel + pixelIdx;\n"
2795          << "        const float pixelCenter = (float(pixelId) + 0.5) / float(" << params->width << ") * 2.0 - 1.0;\n"
2796          << "        const float left        = pixelCenter - quarterWidth;\n"
2797          << "        const float right       = pixelCenter + quarterWidth;\n"
2798          << "\n"
2799          << "        const uint firstVertex = gl_LocalInvocationIndex * verticesPerInvocation + pixelIdx * "
2800             "verticesPerPixel;\n"
2801          << "        gl_MeshVerticesNV[firstVertex + 0].gl_Position = vec4(left,  -1.0, 0.0f, 1.0f);\n"
2802          << "        gl_MeshVerticesNV[firstVertex + 1].gl_Position = vec4(left,   1.0, 0.0f, 1.0f);\n"
2803          << "        gl_MeshVerticesNV[firstVertex + 2].gl_Position = vec4(right, -1.0, 0.0f, 1.0f);\n"
2804          << "        gl_MeshVerticesNV[firstVertex + 3].gl_Position = vec4(right,  1.0, 0.0f, 1.0f);\n"
2805          << "\n"
2806          << "        const uint firstPrimitive = gl_LocalInvocationIndex * primitivesPerInvocation + pixelIdx * "
2807             "primitivesPerPixel;\n"
2808          << "        triangleColor[firstPrimitive + 0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
2809          << "        triangleColor[firstPrimitive + 1] = vec4(0.0, 0.0, 1.0, 1.0);\n"
2810          << "\n"
2811          << "        const uint firstIndex = gl_LocalInvocationIndex * indicesPerInvocation + pixelIdx * "
2812             "indicesPerPixel;\n"
2813          << "        gl_PrimitiveIndicesNV[firstIndex + 0] = firstVertex + 0;\n"
2814          << "        gl_PrimitiveIndicesNV[firstIndex + 1] = firstVertex + 1;\n"
2815          << "        gl_PrimitiveIndicesNV[firstIndex + 2] = firstVertex + 2;\n"
2816          << "        gl_PrimitiveIndicesNV[firstIndex + 3] = firstVertex + 1;\n"
2817          << "        gl_PrimitiveIndicesNV[firstIndex + 4] = firstVertex + 3;\n"
2818          << "        gl_PrimitiveIndicesNV[firstIndex + 5] = firstVertex + 2;\n"
2819          << "    }\n"
2820          << "}\n";
2821     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
2822 }
2823 
generateReferenceLevel()2824 void MaximizeVerticesInstance::generateReferenceLevel()
2825 {
2826     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
2827 }
2828 
2829 // Focus on the number of invocations.
2830 class MaximizeInvocationsCase : public MeshShaderMiscCase
2831 {
2832 public:
MaximizeInvocationsCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)2833     MaximizeInvocationsCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
2834         : MeshShaderMiscCase(testCtx, name, std::move(params))
2835     {
2836         const auto mtParams = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2837         DE_ASSERT(mtParams);
2838         DE_UNREF(mtParams); // For release builds.
2839     }
2840 
2841     void initPrograms(vk::SourceCollections &programCollection) const override;
2842     void checkSupport(Context &context) const override;
2843     TestInstance *createInstance(Context &context) const override;
2844 };
2845 
2846 class MaximizeInvocationsInstance : public MeshShaderMiscInstance
2847 {
2848 public:
MaximizeInvocationsInstance(Context & context,const MiscTestParams * params)2849     MaximizeInvocationsInstance(Context &context, const MiscTestParams *params)
2850         : MeshShaderMiscInstance(context, params)
2851     {
2852     }
2853 
2854     void generateReferenceLevel() override;
2855 };
2856 
createInstance(Context & context) const2857 TestInstance *MaximizeInvocationsCase::createInstance(Context &context) const
2858 {
2859     return new MaximizeInvocationsInstance(context, m_params.get());
2860 }
2861 
checkSupport(Context & context) const2862 void MaximizeInvocationsCase::checkSupport(Context &context) const
2863 {
2864     MeshShaderMiscCase::checkSupport(context);
2865 
2866     const auto params = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2867     params->checkSupport(context);
2868 }
2869 
initPrograms(vk::SourceCollections & programCollection) const2870 void MaximizeInvocationsCase::initPrograms(vk::SourceCollections &programCollection) const
2871 {
2872     const auto params = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2873 
2874     DE_ASSERT(!params->needsTaskShader());
2875     MeshShaderMiscCase::initPrograms(programCollection);
2876 
2877     // Idea behind the test: use two invocations to generate one point per framebuffer pixel.
2878     DE_ASSERT(params->localSize == params->width * 2u);
2879     DE_ASSERT(params->localSize == params->numPrimitives * 2u);
2880     DE_ASSERT(params->localSize == params->numVertices * 2u);
2881 
2882     std::ostringstream mesh;
2883     mesh << "#version 450\n"
2884          << "#extension GL_NV_mesh_shader : enable\n"
2885          << "\n"
2886          << "layout(local_size_x=" << params->localSize << ") in;\n"
2887          << "layout(points) out;\n"
2888          << "layout(max_vertices=" << params->numVertices << ", max_primitives=" << params->numPrimitives << ") out;\n"
2889          << "\n"
2890          << "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
2891          << "\n"
2892          << "void main ()\n"
2893          << "{\n"
2894          << "    gl_PrimitiveCountNV = " << params->numPrimitives << ";\n"
2895          << "    const uint pixelId = gl_LocalInvocationIndex / 2u;\n"
2896          << "    if (gl_LocalInvocationIndex % 2u == 0u)\n"
2897          << "    {\n"
2898          << "        const float xCoord = (float(pixelId) + 0.5) / float(" << params->width << ") * 2.0 - 1.0;\n"
2899          << "        gl_MeshVerticesNV[pixelId].gl_Position = vec4(xCoord, 0.0, 0.0f, 1.0f);\n"
2900          << "        gl_MeshVerticesNV[pixelId].gl_PointSize = 1.0f;\n"
2901          << "    }\n"
2902          << "    else\n"
2903          << "    {\n"
2904          << "        gl_PrimitiveIndicesNV[pixelId] = pixelId;\n"
2905          << "        pointColor[pixelId] = vec4(0.0, 0.0, 1.0, 1.0);\n"
2906          << "    }\n"
2907          << "}\n";
2908     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
2909 }
2910 
generateReferenceLevel()2911 void MaximizeInvocationsInstance::generateReferenceLevel()
2912 {
2913     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
2914 }
2915 
2916 // Tests checking varied interfaces between task, mesh and frag.
2917 
2918 enum class Owner
2919 {
2920     VERTEX = 0,
2921     PRIMITIVE,
2922 };
2923 
2924 enum class DataType
2925 {
2926     INTEGER = 0,
2927     FLOAT,
2928 };
2929 
2930 // Note: 8-bit variables not available for Input/Output.
2931 enum class BitWidth
2932 {
2933     B64 = 64,
2934     B32 = 32,
2935     B16 = 16,
2936 };
2937 
2938 enum class DataDim
2939 {
2940     SCALAR = 1,
2941     VEC2   = 2,
2942     VEC3   = 3,
2943     VEC4   = 4,
2944 };
2945 
2946 enum class Interpolation
2947 {
2948     NORMAL = 0,
2949     FLAT,
2950 };
2951 
2952 enum class Direction
2953 {
2954     IN = 0,
2955     OUT,
2956 };
2957 
2958 // Interface variable.
2959 struct IfaceVar
2960 {
2961     static constexpr uint32_t kNumVertices   = 4u;
2962     static constexpr uint32_t kNumPrimitives = 2u;
2963     static constexpr uint32_t kVarsPerType   = 2u;
2964 
IfaceVarvkt::MeshShader::__anon5941fe3f0111::IfaceVar2965     IfaceVar(Owner owner_, DataType dataType_, BitWidth bitWidth_, DataDim dataDim_, Interpolation interpolation_,
2966              uint32_t index_)
2967         : owner(owner_)
2968         , dataType(dataType_)
2969         , bitWidth(bitWidth_)
2970         , dataDim(dataDim_)
2971         , interpolation(interpolation_)
2972         , index(index_)
2973     {
2974         DE_ASSERT(!(dataType == DataType::INTEGER && interpolation == Interpolation::NORMAL));
2975         DE_ASSERT(!(owner == Owner::PRIMITIVE && interpolation == Interpolation::NORMAL));
2976         DE_ASSERT(
2977             !(dataType == DataType::FLOAT && bitWidth == BitWidth::B64 && interpolation == Interpolation::NORMAL));
2978         DE_ASSERT(index < kVarsPerType);
2979     }
2980 
2981     // This constructor needs to be defined for the code to compile, but it should never be actually called.
2982     // To make sure it's not used, the index is defined to be very large, which should trigger the assertion in getName() below.
IfaceVarvkt::MeshShader::__anon5941fe3f0111::IfaceVar2983     IfaceVar()
2984         : owner(Owner::VERTEX)
2985         , dataType(DataType::FLOAT)
2986         , bitWidth(BitWidth::B32)
2987         , dataDim(DataDim::VEC4)
2988         , interpolation(Interpolation::NORMAL)
2989         , index(std::numeric_limits<uint32_t>::max())
2990     {
2991     }
2992 
2993     Owner owner;
2994     DataType dataType;
2995     BitWidth bitWidth;
2996     DataDim dataDim;
2997     Interpolation interpolation;
2998     uint32_t index; // In case there are several variables matching this type.
2999 
3000     // The variable name will be unique and depend on its type.
getNamevkt::MeshShader::__anon5941fe3f0111::IfaceVar3001     std::string getName() const
3002     {
3003         DE_ASSERT(index < kVarsPerType);
3004 
3005         std::ostringstream name;
3006         name << ((owner == Owner::VERTEX) ? "vert" : "prim") << "_" << ((dataType == DataType::INTEGER) ? "i" : "f")
3007              << static_cast<int>(bitWidth) << "d" << static_cast<int>(dataDim) << "_"
3008              << ((interpolation == Interpolation::NORMAL) ? "inter" : "flat") << "_" << index;
3009         return name.str();
3010     }
3011 
3012     // Get location size according to the type.
getLocationSizevkt::MeshShader::__anon5941fe3f0111::IfaceVar3013     uint32_t getLocationSize() const
3014     {
3015         return ((bitWidth == BitWidth::B64 && dataDim >= DataDim::VEC3) ? 2u : 1u);
3016     }
3017 
3018     // Get the variable type in GLSL.
getGLSLTypevkt::MeshShader::__anon5941fe3f0111::IfaceVar3019     std::string getGLSLType() const
3020     {
3021         const auto widthStr     = std::to_string(static_cast<int>(bitWidth));
3022         const auto dimStr       = std::to_string(static_cast<int>(dataDim));
3023         const auto shortTypeStr = ((dataType == DataType::INTEGER) ? "i" : "f");
3024         const auto typeStr      = ((dataType == DataType::INTEGER) ? "int" : "float");
3025 
3026         if (dataDim == DataDim::SCALAR)
3027             return typeStr + widthStr + "_t";            // e.g. int32_t or float16_t
3028         return shortTypeStr + widthStr + "vec" + dimStr; // e.g. i16vec2 or f64vec4.
3029     }
3030 
3031     // Get a simple declaration of type and name. This can be reused for several things.
getTypeAndNamevkt::MeshShader::__anon5941fe3f0111::IfaceVar3032     std::string getTypeAndName() const
3033     {
3034         return getGLSLType() + " " + getName();
3035     }
3036 
getTypeAndNameDeclvkt::MeshShader::__anon5941fe3f0111::IfaceVar3037     std::string getTypeAndNameDecl(bool arrayDecl = false) const
3038     {
3039         std::ostringstream decl;
3040         decl << "    " << getTypeAndName();
3041         if (arrayDecl)
3042             decl << "[" << ((owner == Owner::PRIMITIVE) ? IfaceVar::kNumPrimitives : IfaceVar::kNumVertices) << "]";
3043         decl << ";\n";
3044         return decl.str();
3045     }
3046 
3047     // Variable declaration statement given its location and direction.
getLocationDeclvkt::MeshShader::__anon5941fe3f0111::IfaceVar3048     std::string getLocationDecl(size_t location, Direction direction) const
3049     {
3050         std::ostringstream decl;
3051         decl << "layout (location=" << location << ") " << ((direction == Direction::IN) ? "in" : "out") << " "
3052              << ((owner == Owner::PRIMITIVE) ? "perprimitiveNV " : "")
3053              << ((interpolation == Interpolation::FLAT) ? "flat " : "") << getTypeAndName()
3054              << ((direction == Direction::OUT) ? "[]" : "") << ";\n";
3055         return decl.str();
3056     }
3057 
3058     // Get the name of the source data for this variable. Tests will use a storage buffer for the per-vertex data and a uniform
3059     // buffer for the per-primitive data. The names in those will match.
getDataSourceNamevkt::MeshShader::__anon5941fe3f0111::IfaceVar3060     std::string getDataSourceName() const
3061     {
3062         // per-primitive data or per-vertex data buffers.
3063         return ((owner == Owner::PRIMITIVE) ? "ppd" : "pvd") + ("." + getName());
3064     }
3065 
3066     // Get the boolean check variable name (see below).
getCheckNamevkt::MeshShader::__anon5941fe3f0111::IfaceVar3067     std::string getCheckName() const
3068     {
3069         return "good_" + getName();
3070     }
3071 
3072     // Get the check statement that would be used in the fragment shader.
getCheckStatementvkt::MeshShader::__anon5941fe3f0111::IfaceVar3073     std::string getCheckStatement() const
3074     {
3075         std::ostringstream check;
3076         const auto sourceName = getDataSourceName();
3077         const auto glslType   = getGLSLType();
3078         const auto name       = getName();
3079 
3080         check << "    bool " << getCheckName() << " = ";
3081         if (owner == Owner::VERTEX)
3082         {
3083             // There will be 4 values in the buffers.
3084             std::ostringstream maxElem;
3085             std::ostringstream minElem;
3086 
3087             maxElem << glslType << "(max(max(max(" << sourceName << "[0], " << sourceName << "[1]), " << sourceName
3088                     << "[2]), " << sourceName << "[3]))";
3089             minElem << glslType << "(min(min(min(" << sourceName << "[0], " << sourceName << "[1]), " << sourceName
3090                     << "[2]), " << sourceName << "[3]))";
3091 
3092             if (dataDim == DataDim::SCALAR)
3093             {
3094                 check << "(" << name << " <= " << maxElem.str() << ") && (" << name << " >= " << minElem.str() << ")";
3095             }
3096             else
3097             {
3098                 check << "all(lessThanEqual(" << name << ", " << maxElem.str() << ")) && "
3099                       << "all(greaterThanEqual(" << name << ", " << minElem.str() << "))";
3100             }
3101         }
3102         else if (owner == Owner::PRIMITIVE)
3103         {
3104             // There will be 2 values in the buffers.
3105             check << "((gl_PrimitiveID == 0 || gl_PrimitiveID == 1) && ("
3106                   << "(gl_PrimitiveID == 0 && " << name << " == " << sourceName << "[0]) || "
3107                   << "(gl_PrimitiveID == 1 && " << name << " == " << sourceName << "[1])))";
3108         }
3109         check << ";\n";
3110 
3111         return check.str();
3112     }
3113 
3114     // Get an assignment statement for an out variable.
getAssignmentStatementvkt::MeshShader::__anon5941fe3f0111::IfaceVar3115     std::string getAssignmentStatement(size_t arrayIndex, const std::string &leftPrefix,
3116                                        const std::string &rightPrefix) const
3117     {
3118         const auto name    = getName();
3119         const auto typeStr = getGLSLType();
3120         std::ostringstream stmt;
3121 
3122         stmt << "    " << leftPrefix << (leftPrefix.empty() ? "" : ".") << name << "[" << arrayIndex
3123              << "] = " << typeStr << "(" << rightPrefix << (rightPrefix.empty() ? "" : ".") << name << "[" << arrayIndex
3124              << "]);\n";
3125         return stmt.str();
3126     }
3127 
3128     // Get the corresponding array size based on the owner (vertex or primitive)
getArraySizevkt::MeshShader::__anon5941fe3f0111::IfaceVar3129     uint32_t getArraySize() const
3130     {
3131         return ((owner == Owner::PRIMITIVE) ? IfaceVar::kNumPrimitives : IfaceVar::kNumVertices);
3132     }
3133 };
3134 
3135 using IfaceVarVec    = std::vector<IfaceVar>;
3136 using IfaceVarVecPtr = std::unique_ptr<IfaceVarVec>;
3137 
3138 struct InterfaceVariableParams : public MiscTestParams
3139 {
InterfaceVariableParamsvkt::MeshShader::__anon5941fe3f0111::InterfaceVariableParams3140     InterfaceVariableParams(const tcu::Maybe<uint32_t> &taskCount_, uint32_t meshCount_, uint32_t width_,
3141                             uint32_t height_, bool useInt64_, bool useFloat64_, bool useInt16_, bool useFloat16_,
3142                             IfaceVarVecPtr vars_)
3143         : MiscTestParams(taskCount_, meshCount_, width_, height_)
3144         , useInt64(useInt64_)
3145         , useFloat64(useFloat64_)
3146         , useInt16(useInt16_)
3147         , useFloat16(useFloat16_)
3148         , ifaceVars(std::move(vars_))
3149     {
3150     }
3151 
3152     // These need to match the list of interface variables.
3153     bool useInt64;
3154     bool useFloat64;
3155     bool useInt16;
3156     bool useFloat16;
3157 
3158     IfaceVarVecPtr ifaceVars;
3159 };
3160 
3161 class InterfaceVariablesCase : public MeshShaderMiscCase
3162 {
3163 public:
InterfaceVariablesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)3164     InterfaceVariablesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
3165         : MeshShaderMiscCase(testCtx, name, std::move(params))
3166     {
3167     }
~InterfaceVariablesCase(void)3168     virtual ~InterfaceVariablesCase(void)
3169     {
3170     }
3171 
3172     TestInstance *createInstance(Context &context) const override;
3173     void checkSupport(Context &context) const override;
3174     void initPrograms(vk::SourceCollections &programCollection) const override;
3175 
3176     // Note data types in the input buffers are always plain floats or ints. They will be converted to the appropriate type when
3177     // copying them in or out of output variables. Note we have two variables per type, as per IfaceVar::kVarsPerType.
3178 
3179     struct PerVertexData
3180     {
3181         // Interpolated floats.
3182 
3183         tcu::Vec4 vert_f64d4_inter_0[IfaceVar::kNumVertices];
3184         tcu::Vec4 vert_f64d4_inter_1[IfaceVar::kNumVertices];
3185 
3186         tcu::Vec3 vert_f64d3_inter_0[IfaceVar::kNumVertices];
3187         tcu::Vec3 vert_f64d3_inter_1[IfaceVar::kNumVertices];
3188 
3189         tcu::Vec2 vert_f64d2_inter_0[IfaceVar::kNumVertices];
3190         tcu::Vec2 vert_f64d2_inter_1[IfaceVar::kNumVertices];
3191 
3192         float vert_f64d1_inter_0[IfaceVar::kNumVertices];
3193         float vert_f64d1_inter_1[IfaceVar::kNumVertices];
3194 
3195         tcu::Vec4 vert_f32d4_inter_0[IfaceVar::kNumVertices];
3196         tcu::Vec4 vert_f32d4_inter_1[IfaceVar::kNumVertices];
3197 
3198         tcu::Vec3 vert_f32d3_inter_0[IfaceVar::kNumVertices];
3199         tcu::Vec3 vert_f32d3_inter_1[IfaceVar::kNumVertices];
3200 
3201         tcu::Vec2 vert_f32d2_inter_0[IfaceVar::kNumVertices];
3202         tcu::Vec2 vert_f32d2_inter_1[IfaceVar::kNumVertices];
3203 
3204         float vert_f32d1_inter_0[IfaceVar::kNumVertices];
3205         float vert_f32d1_inter_1[IfaceVar::kNumVertices];
3206 
3207         tcu::Vec4 vert_f16d4_inter_0[IfaceVar::kNumVertices];
3208         tcu::Vec4 vert_f16d4_inter_1[IfaceVar::kNumVertices];
3209 
3210         tcu::Vec3 vert_f16d3_inter_0[IfaceVar::kNumVertices];
3211         tcu::Vec3 vert_f16d3_inter_1[IfaceVar::kNumVertices];
3212 
3213         tcu::Vec2 vert_f16d2_inter_0[IfaceVar::kNumVertices];
3214         tcu::Vec2 vert_f16d2_inter_1[IfaceVar::kNumVertices];
3215 
3216         float vert_f16d1_inter_0[IfaceVar::kNumVertices];
3217         float vert_f16d1_inter_1[IfaceVar::kNumVertices];
3218 
3219         // Flat floats.
3220 
3221         tcu::Vec4 vert_f64d4_flat_0[IfaceVar::kNumVertices];
3222         tcu::Vec4 vert_f64d4_flat_1[IfaceVar::kNumVertices];
3223 
3224         tcu::Vec3 vert_f64d3_flat_0[IfaceVar::kNumVertices];
3225         tcu::Vec3 vert_f64d3_flat_1[IfaceVar::kNumVertices];
3226 
3227         tcu::Vec2 vert_f64d2_flat_0[IfaceVar::kNumVertices];
3228         tcu::Vec2 vert_f64d2_flat_1[IfaceVar::kNumVertices];
3229 
3230         float vert_f64d1_flat_0[IfaceVar::kNumVertices];
3231         float vert_f64d1_flat_1[IfaceVar::kNumVertices];
3232 
3233         tcu::Vec4 vert_f32d4_flat_0[IfaceVar::kNumVertices];
3234         tcu::Vec4 vert_f32d4_flat_1[IfaceVar::kNumVertices];
3235 
3236         tcu::Vec3 vert_f32d3_flat_0[IfaceVar::kNumVertices];
3237         tcu::Vec3 vert_f32d3_flat_1[IfaceVar::kNumVertices];
3238 
3239         tcu::Vec2 vert_f32d2_flat_0[IfaceVar::kNumVertices];
3240         tcu::Vec2 vert_f32d2_flat_1[IfaceVar::kNumVertices];
3241 
3242         float vert_f32d1_flat_0[IfaceVar::kNumVertices];
3243         float vert_f32d1_flat_1[IfaceVar::kNumVertices];
3244 
3245         tcu::Vec4 vert_f16d4_flat_0[IfaceVar::kNumVertices];
3246         tcu::Vec4 vert_f16d4_flat_1[IfaceVar::kNumVertices];
3247 
3248         tcu::Vec3 vert_f16d3_flat_0[IfaceVar::kNumVertices];
3249         tcu::Vec3 vert_f16d3_flat_1[IfaceVar::kNumVertices];
3250 
3251         tcu::Vec2 vert_f16d2_flat_0[IfaceVar::kNumVertices];
3252         tcu::Vec2 vert_f16d2_flat_1[IfaceVar::kNumVertices];
3253 
3254         float vert_f16d1_flat_0[IfaceVar::kNumVertices];
3255         float vert_f16d1_flat_1[IfaceVar::kNumVertices];
3256 
3257         // Flat ints.
3258 
3259         tcu::IVec4 vert_i64d4_flat_0[IfaceVar::kNumVertices];
3260         tcu::IVec4 vert_i64d4_flat_1[IfaceVar::kNumVertices];
3261 
3262         tcu::IVec3 vert_i64d3_flat_0[IfaceVar::kNumVertices];
3263         tcu::IVec3 vert_i64d3_flat_1[IfaceVar::kNumVertices];
3264 
3265         tcu::IVec2 vert_i64d2_flat_0[IfaceVar::kNumVertices];
3266         tcu::IVec2 vert_i64d2_flat_1[IfaceVar::kNumVertices];
3267 
3268         int32_t vert_i64d1_flat_0[IfaceVar::kNumVertices];
3269         int32_t vert_i64d1_flat_1[IfaceVar::kNumVertices];
3270 
3271         tcu::IVec4 vert_i32d4_flat_0[IfaceVar::kNumVertices];
3272         tcu::IVec4 vert_i32d4_flat_1[IfaceVar::kNumVertices];
3273 
3274         tcu::IVec3 vert_i32d3_flat_0[IfaceVar::kNumVertices];
3275         tcu::IVec3 vert_i32d3_flat_1[IfaceVar::kNumVertices];
3276 
3277         tcu::IVec2 vert_i32d2_flat_0[IfaceVar::kNumVertices];
3278         tcu::IVec2 vert_i32d2_flat_1[IfaceVar::kNumVertices];
3279 
3280         int32_t vert_i32d1_flat_0[IfaceVar::kNumVertices];
3281         int32_t vert_i32d1_flat_1[IfaceVar::kNumVertices];
3282 
3283         tcu::IVec4 vert_i16d4_flat_0[IfaceVar::kNumVertices];
3284         tcu::IVec4 vert_i16d4_flat_1[IfaceVar::kNumVertices];
3285 
3286         tcu::IVec3 vert_i16d3_flat_0[IfaceVar::kNumVertices];
3287         tcu::IVec3 vert_i16d3_flat_1[IfaceVar::kNumVertices];
3288 
3289         tcu::IVec2 vert_i16d2_flat_0[IfaceVar::kNumVertices];
3290         tcu::IVec2 vert_i16d2_flat_1[IfaceVar::kNumVertices];
3291 
3292         int32_t vert_i16d1_flat_0[IfaceVar::kNumVertices];
3293         int32_t vert_i16d1_flat_1[IfaceVar::kNumVertices];
3294     };
3295 
3296     struct PerPrimitiveData
3297     {
3298         // Flat floats.
3299 
3300         tcu::Vec4 prim_f64d4_flat_0[IfaceVar::kNumPrimitives];
3301         tcu::Vec4 prim_f64d4_flat_1[IfaceVar::kNumPrimitives];
3302 
3303         tcu::Vec3 prim_f64d3_flat_0[IfaceVar::kNumPrimitives];
3304         tcu::Vec3 prim_f64d3_flat_1[IfaceVar::kNumPrimitives];
3305 
3306         tcu::Vec2 prim_f64d2_flat_0[IfaceVar::kNumPrimitives];
3307         tcu::Vec2 prim_f64d2_flat_1[IfaceVar::kNumPrimitives];
3308 
3309         float prim_f64d1_flat_0[IfaceVar::kNumPrimitives];
3310         float prim_f64d1_flat_1[IfaceVar::kNumPrimitives];
3311 
3312         tcu::Vec4 prim_f32d4_flat_0[IfaceVar::kNumPrimitives];
3313         tcu::Vec4 prim_f32d4_flat_1[IfaceVar::kNumPrimitives];
3314 
3315         tcu::Vec3 prim_f32d3_flat_0[IfaceVar::kNumPrimitives];
3316         tcu::Vec3 prim_f32d3_flat_1[IfaceVar::kNumPrimitives];
3317 
3318         tcu::Vec2 prim_f32d2_flat_0[IfaceVar::kNumPrimitives];
3319         tcu::Vec2 prim_f32d2_flat_1[IfaceVar::kNumPrimitives];
3320 
3321         float prim_f32d1_flat_0[IfaceVar::kNumPrimitives];
3322         float prim_f32d1_flat_1[IfaceVar::kNumPrimitives];
3323 
3324         tcu::Vec4 prim_f16d4_flat_0[IfaceVar::kNumPrimitives];
3325         tcu::Vec4 prim_f16d4_flat_1[IfaceVar::kNumPrimitives];
3326 
3327         tcu::Vec3 prim_f16d3_flat_0[IfaceVar::kNumPrimitives];
3328         tcu::Vec3 prim_f16d3_flat_1[IfaceVar::kNumPrimitives];
3329 
3330         tcu::Vec2 prim_f16d2_flat_0[IfaceVar::kNumPrimitives];
3331         tcu::Vec2 prim_f16d2_flat_1[IfaceVar::kNumPrimitives];
3332 
3333         float prim_f16d1_flat_0[IfaceVar::kNumPrimitives];
3334         float prim_f16d1_flat_1[IfaceVar::kNumPrimitives];
3335 
3336         // Flat ints.
3337 
3338         tcu::IVec4 prim_i64d4_flat_0[IfaceVar::kNumPrimitives];
3339         tcu::IVec4 prim_i64d4_flat_1[IfaceVar::kNumPrimitives];
3340 
3341         tcu::IVec3 prim_i64d3_flat_0[IfaceVar::kNumPrimitives];
3342         tcu::IVec3 prim_i64d3_flat_1[IfaceVar::kNumPrimitives];
3343 
3344         tcu::IVec2 prim_i64d2_flat_0[IfaceVar::kNumPrimitives];
3345         tcu::IVec2 prim_i64d2_flat_1[IfaceVar::kNumPrimitives];
3346 
3347         int32_t prim_i64d1_flat_0[IfaceVar::kNumPrimitives];
3348         int32_t prim_i64d1_flat_1[IfaceVar::kNumPrimitives];
3349 
3350         tcu::IVec4 prim_i32d4_flat_0[IfaceVar::kNumPrimitives];
3351         tcu::IVec4 prim_i32d4_flat_1[IfaceVar::kNumPrimitives];
3352 
3353         tcu::IVec3 prim_i32d3_flat_0[IfaceVar::kNumPrimitives];
3354         tcu::IVec3 prim_i32d3_flat_1[IfaceVar::kNumPrimitives];
3355 
3356         tcu::IVec2 prim_i32d2_flat_0[IfaceVar::kNumPrimitives];
3357         tcu::IVec2 prim_i32d2_flat_1[IfaceVar::kNumPrimitives];
3358 
3359         int32_t prim_i32d1_flat_0[IfaceVar::kNumPrimitives];
3360         int32_t prim_i32d1_flat_1[IfaceVar::kNumPrimitives];
3361 
3362         tcu::IVec4 prim_i16d4_flat_0[IfaceVar::kNumPrimitives];
3363         tcu::IVec4 prim_i16d4_flat_1[IfaceVar::kNumPrimitives];
3364 
3365         tcu::IVec3 prim_i16d3_flat_0[IfaceVar::kNumPrimitives];
3366         tcu::IVec3 prim_i16d3_flat_1[IfaceVar::kNumPrimitives];
3367 
3368         tcu::IVec2 prim_i16d2_flat_0[IfaceVar::kNumPrimitives];
3369         tcu::IVec2 prim_i16d2_flat_1[IfaceVar::kNumPrimitives];
3370 
3371         int32_t prim_i16d1_flat_0[IfaceVar::kNumPrimitives];
3372         int32_t prim_i16d1_flat_1[IfaceVar::kNumPrimitives];
3373     };
3374 
3375     static constexpr uint32_t kGlslangBuiltInCount = 11u;
3376     static constexpr uint32_t kMaxLocations        = 16u;
3377 };
3378 
3379 class InterfaceVariablesInstance : public MeshShaderMiscInstance
3380 {
3381 public:
InterfaceVariablesInstance(Context & context,const MiscTestParams * params)3382     InterfaceVariablesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
3383     {
3384     }
~InterfaceVariablesInstance(void)3385     virtual ~InterfaceVariablesInstance(void)
3386     {
3387     }
3388 
3389     void generateReferenceLevel() override;
3390     tcu::TestStatus iterate(void) override;
3391 };
3392 
createInstance(Context & context) const3393 TestInstance *InterfaceVariablesCase::createInstance(Context &context) const
3394 {
3395     return new InterfaceVariablesInstance(context, m_params.get());
3396 }
3397 
checkSupport(Context & context) const3398 void InterfaceVariablesCase::checkSupport(Context &context) const
3399 {
3400     const auto params = dynamic_cast<InterfaceVariableParams *>(m_params.get());
3401     DE_ASSERT(params);
3402 
3403     MeshShaderMiscCase::checkSupport(context);
3404 
3405     if (params->useFloat64)
3406         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_FLOAT64);
3407 
3408     if (params->useInt64)
3409         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_INT64);
3410 
3411     if (params->useInt16)
3412         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_INT16);
3413 
3414     if (params->useFloat16)
3415     {
3416         const auto &features = context.getShaderFloat16Int8Features();
3417         if (!features.shaderFloat16)
3418             TCU_THROW(NotSupportedError, "shaderFloat16 feature not supported");
3419     }
3420 
3421     if (params->useInt16 || params->useFloat16)
3422     {
3423         const auto &features = context.get16BitStorageFeatures();
3424         if (!features.storageInputOutput16)
3425             TCU_THROW(NotSupportedError, "storageInputOutput16 feature not supported");
3426     }
3427 
3428     // glslang will use several built-ins in the generated mesh code, which count against the location and component limits.
3429     {
3430         const auto neededComponents = (kGlslangBuiltInCount + kMaxLocations) * 4u;
3431         const auto &properties      = context.getDeviceProperties();
3432 
3433         if (neededComponents > properties.limits.maxFragmentInputComponents)
3434             TCU_THROW(NotSupportedError, "maxFragmentInputComponents too low to run this test");
3435     }
3436 }
3437 
initPrograms(vk::SourceCollections & programCollection) const3438 void InterfaceVariablesCase::initPrograms(vk::SourceCollections &programCollection) const
3439 {
3440     // Bindings needs to match the PerVertexData and PerPrimitiveData structures.
3441     std::ostringstream bindings;
3442     bindings << "layout(set=0, binding=0, std430) readonly buffer PerVertexBlock {\n"
3443              << "    vec4   vert_f64d4_inter_0[" << IfaceVar::kNumVertices << "];\n"
3444              << "    vec4   vert_f64d4_inter_1[" << IfaceVar::kNumVertices << "];\n"
3445              << "    vec3   vert_f64d3_inter_0[" << IfaceVar::kNumVertices << "];\n"
3446              << "    vec3   vert_f64d3_inter_1[" << IfaceVar::kNumVertices << "];\n"
3447              << "    vec2   vert_f64d2_inter_0[" << IfaceVar::kNumVertices << "];\n"
3448              << "    vec2   vert_f64d2_inter_1[" << IfaceVar::kNumVertices << "];\n"
3449              << "    float  vert_f64d1_inter_0[" << IfaceVar::kNumVertices << "];\n"
3450              << "    float  vert_f64d1_inter_1[" << IfaceVar::kNumVertices << "];\n"
3451              << "    vec4   vert_f32d4_inter_0[" << IfaceVar::kNumVertices << "];\n"
3452              << "    vec4   vert_f32d4_inter_1[" << IfaceVar::kNumVertices << "];\n"
3453              << "    vec3   vert_f32d3_inter_0[" << IfaceVar::kNumVertices << "];\n"
3454              << "    vec3   vert_f32d3_inter_1[" << IfaceVar::kNumVertices << "];\n"
3455              << "    vec2   vert_f32d2_inter_0[" << IfaceVar::kNumVertices << "];\n"
3456              << "    vec2   vert_f32d2_inter_1[" << IfaceVar::kNumVertices << "];\n"
3457              << "    float  vert_f32d1_inter_0[" << IfaceVar::kNumVertices << "];\n"
3458              << "    float  vert_f32d1_inter_1[" << IfaceVar::kNumVertices << "];\n"
3459              << "    vec4   vert_f16d4_inter_0[" << IfaceVar::kNumVertices << "];\n"
3460              << "    vec4   vert_f16d4_inter_1[" << IfaceVar::kNumVertices << "];\n"
3461              << "    vec3   vert_f16d3_inter_0[" << IfaceVar::kNumVertices << "];\n"
3462              << "    vec3   vert_f16d3_inter_1[" << IfaceVar::kNumVertices << "];\n"
3463              << "    vec2   vert_f16d2_inter_0[" << IfaceVar::kNumVertices << "];\n"
3464              << "    vec2   vert_f16d2_inter_1[" << IfaceVar::kNumVertices << "];\n"
3465              << "    float  vert_f16d1_inter_0[" << IfaceVar::kNumVertices << "];\n"
3466              << "    float  vert_f16d1_inter_1[" << IfaceVar::kNumVertices << "];\n"
3467              << "    vec4   vert_f64d4_flat_0[" << IfaceVar::kNumVertices << "];\n"
3468              << "    vec4   vert_f64d4_flat_1[" << IfaceVar::kNumVertices << "];\n"
3469              << "    vec3   vert_f64d3_flat_0[" << IfaceVar::kNumVertices << "];\n"
3470              << "    vec3   vert_f64d3_flat_1[" << IfaceVar::kNumVertices << "];\n"
3471              << "    vec2   vert_f64d2_flat_0[" << IfaceVar::kNumVertices << "];\n"
3472              << "    vec2   vert_f64d2_flat_1[" << IfaceVar::kNumVertices << "];\n"
3473              << "    float  vert_f64d1_flat_0[" << IfaceVar::kNumVertices << "];\n"
3474              << "    float  vert_f64d1_flat_1[" << IfaceVar::kNumVertices << "];\n"
3475              << "    vec4   vert_f32d4_flat_0[" << IfaceVar::kNumVertices << "];\n"
3476              << "    vec4   vert_f32d4_flat_1[" << IfaceVar::kNumVertices << "];\n"
3477              << "    vec3   vert_f32d3_flat_0[" << IfaceVar::kNumVertices << "];\n"
3478              << "    vec3   vert_f32d3_flat_1[" << IfaceVar::kNumVertices << "];\n"
3479              << "    vec2   vert_f32d2_flat_0[" << IfaceVar::kNumVertices << "];\n"
3480              << "    vec2   vert_f32d2_flat_1[" << IfaceVar::kNumVertices << "];\n"
3481              << "    float  vert_f32d1_flat_0[" << IfaceVar::kNumVertices << "];\n"
3482              << "    float  vert_f32d1_flat_1[" << IfaceVar::kNumVertices << "];\n"
3483              << "    vec4   vert_f16d4_flat_0[" << IfaceVar::kNumVertices << "];\n"
3484              << "    vec4   vert_f16d4_flat_1[" << IfaceVar::kNumVertices << "];\n"
3485              << "    vec3   vert_f16d3_flat_0[" << IfaceVar::kNumVertices << "];\n"
3486              << "    vec3   vert_f16d3_flat_1[" << IfaceVar::kNumVertices << "];\n"
3487              << "    vec2   vert_f16d2_flat_0[" << IfaceVar::kNumVertices << "];\n"
3488              << "    vec2   vert_f16d2_flat_1[" << IfaceVar::kNumVertices << "];\n"
3489              << "    float  vert_f16d1_flat_0[" << IfaceVar::kNumVertices << "];\n"
3490              << "    float  vert_f16d1_flat_1[" << IfaceVar::kNumVertices << "];\n"
3491              << "    ivec4  vert_i64d4_flat_0[" << IfaceVar::kNumVertices << "];\n"
3492              << "    ivec4  vert_i64d4_flat_1[" << IfaceVar::kNumVertices << "];\n"
3493              << "    ivec3  vert_i64d3_flat_0[" << IfaceVar::kNumVertices << "];\n"
3494              << "    ivec3  vert_i64d3_flat_1[" << IfaceVar::kNumVertices << "];\n"
3495              << "    ivec2  vert_i64d2_flat_0[" << IfaceVar::kNumVertices << "];\n"
3496              << "    ivec2  vert_i64d2_flat_1[" << IfaceVar::kNumVertices << "];\n"
3497              << "    int    vert_i64d1_flat_0[" << IfaceVar::kNumVertices << "];\n"
3498              << "    int    vert_i64d1_flat_1[" << IfaceVar::kNumVertices << "];\n"
3499              << "    ivec4  vert_i32d4_flat_0[" << IfaceVar::kNumVertices << "];\n"
3500              << "    ivec4  vert_i32d4_flat_1[" << IfaceVar::kNumVertices << "];\n"
3501              << "    ivec3  vert_i32d3_flat_0[" << IfaceVar::kNumVertices << "];\n"
3502              << "    ivec3  vert_i32d3_flat_1[" << IfaceVar::kNumVertices << "];\n"
3503              << "    ivec2  vert_i32d2_flat_0[" << IfaceVar::kNumVertices << "];\n"
3504              << "    ivec2  vert_i32d2_flat_1[" << IfaceVar::kNumVertices << "];\n"
3505              << "    int    vert_i32d1_flat_0[" << IfaceVar::kNumVertices << "];\n"
3506              << "    int    vert_i32d1_flat_1[" << IfaceVar::kNumVertices << "];\n"
3507              << "    ivec4  vert_i16d4_flat_0[" << IfaceVar::kNumVertices << "];\n"
3508              << "    ivec4  vert_i16d4_flat_1[" << IfaceVar::kNumVertices << "];\n"
3509              << "    ivec3  vert_i16d3_flat_0[" << IfaceVar::kNumVertices << "];\n"
3510              << "    ivec3  vert_i16d3_flat_1[" << IfaceVar::kNumVertices << "];\n"
3511              << "    ivec2  vert_i16d2_flat_0[" << IfaceVar::kNumVertices << "];\n"
3512              << "    ivec2  vert_i16d2_flat_1[" << IfaceVar::kNumVertices << "];\n"
3513              << "    int    vert_i16d1_flat_0[" << IfaceVar::kNumVertices << "];\n"
3514              << "    int    vert_i16d1_flat_1[" << IfaceVar::kNumVertices << "];\n"
3515              << " } pvd;\n"
3516              << "\n"
3517              << "layout(set=0, binding=1, std430) readonly buffer PerPrimitiveBlock {\n"
3518              << "    vec4   prim_f64d4_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3519              << "    vec4   prim_f64d4_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3520              << "    vec3   prim_f64d3_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3521              << "    vec3   prim_f64d3_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3522              << "    vec2   prim_f64d2_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3523              << "    vec2   prim_f64d2_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3524              << "    float  prim_f64d1_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3525              << "    float  prim_f64d1_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3526              << "    vec4   prim_f32d4_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3527              << "    vec4   prim_f32d4_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3528              << "    vec3   prim_f32d3_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3529              << "    vec3   prim_f32d3_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3530              << "    vec2   prim_f32d2_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3531              << "    vec2   prim_f32d2_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3532              << "    float  prim_f32d1_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3533              << "    float  prim_f32d1_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3534              << "    vec4   prim_f16d4_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3535              << "    vec4   prim_f16d4_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3536              << "    vec3   prim_f16d3_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3537              << "    vec3   prim_f16d3_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3538              << "    vec2   prim_f16d2_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3539              << "    vec2   prim_f16d2_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3540              << "    float  prim_f16d1_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3541              << "    float  prim_f16d1_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3542              << "    ivec4  prim_i64d4_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3543              << "    ivec4  prim_i64d4_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3544              << "    ivec3  prim_i64d3_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3545              << "    ivec3  prim_i64d3_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3546              << "    ivec2  prim_i64d2_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3547              << "    ivec2  prim_i64d2_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3548              << "    int    prim_i64d1_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3549              << "    int    prim_i64d1_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3550              << "    ivec4  prim_i32d4_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3551              << "    ivec4  prim_i32d4_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3552              << "    ivec3  prim_i32d3_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3553              << "    ivec3  prim_i32d3_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3554              << "    ivec2  prim_i32d2_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3555              << "    ivec2  prim_i32d2_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3556              << "    int    prim_i32d1_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3557              << "    int    prim_i32d1_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3558              << "    ivec4  prim_i16d4_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3559              << "    ivec4  prim_i16d4_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3560              << "    ivec3  prim_i16d3_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3561              << "    ivec3  prim_i16d3_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3562              << "    ivec2  prim_i16d2_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3563              << "    ivec2  prim_i16d2_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3564              << "    int    prim_i16d1_flat_0[" << IfaceVar::kNumPrimitives << "];\n"
3565              << "    int    prim_i16d1_flat_1[" << IfaceVar::kNumPrimitives << "];\n"
3566              << " } ppd;\n"
3567              << "\n";
3568     const auto bindingsDecl = bindings.str();
3569 
3570     const auto params = dynamic_cast<InterfaceVariableParams *>(m_params.get());
3571     DE_ASSERT(params);
3572     const auto &varVec = *(params->ifaceVars);
3573 
3574     std::ostringstream frag;
3575     frag << "#version 450\n"
3576          << "#extension GL_NV_mesh_shader : enable\n"
3577          << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
3578          << "\n"
3579          << bindingsDecl;
3580 
3581     // Declare interface variables as Input in the fragment shader.
3582     {
3583         uint32_t usedLocations = 0u;
3584         for (const auto &var : varVec)
3585         {
3586             frag << var.getLocationDecl(usedLocations, Direction::IN);
3587             usedLocations += var.getLocationSize();
3588         }
3589     }
3590 
3591     frag << "\n"
3592          << "layout (location=0) out vec4 outColor;\n"
3593          << "\n"
3594          << "void main ()\n"
3595          << "{\n";
3596 
3597     // Emit checks for each variable value in the fragment shader.
3598     std::ostringstream allConditions;
3599 
3600     for (size_t i = 0; i < varVec.size(); ++i)
3601     {
3602         frag << varVec[i].getCheckStatement();
3603         allConditions << ((i == 0) ? "" : " && ") << varVec[i].getCheckName();
3604     }
3605 
3606     // Emit final check.
3607     frag << "    if (" << allConditions.str() << ") {\n"
3608          << "        outColor = vec4(0.0, 0.0, 1.0, 1.0);\n"
3609          << "    } else {\n"
3610          << "        outColor = vec4(0.0, 0.0, 0.0, 1.0);\n"
3611          << "    }\n"
3612          << "}\n";
3613     programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
3614 
3615     std::ostringstream pvdDataDeclStream;
3616     pvdDataDeclStream << "    vec4 positions[4];\n"
3617                       << "    float pointSizes[4];\n"
3618                       << "    float clipDistances[4];\n"
3619                       << "    vec4 custom1[4];\n"
3620                       << "    float custom2[4];\n"
3621                       << "    int custom3[4];\n";
3622     const auto pvdDataDecl = pvdDataDeclStream.str();
3623 
3624     std::ostringstream ppdDataDeclStream;
3625     ppdDataDeclStream << "    int primitiveIds[2];\n"
3626                       << "    int viewportIndices[2];\n"
3627                       << "    uvec4 custom4[2];\n"
3628                       << "    float custom5[2];\n";
3629     const auto ppdDataDecl = ppdDataDeclStream.str();
3630 
3631     std::ostringstream taskDataStream;
3632     taskDataStream << "taskNV TaskData {\n";
3633     for (size_t i = 0; i < varVec.size(); ++i)
3634         taskDataStream << varVec[i].getTypeAndNameDecl(/*arrayDecl*/ true);
3635     taskDataStream << "} td;\n\n";
3636 
3637     const auto taskShader    = m_params->needsTaskShader();
3638     const auto taskDataDecl  = taskDataStream.str();
3639     const auto meshPvdPrefix = (taskShader ? "td" : "pvd");
3640     const auto meshPpdPrefix = (taskShader ? "td" : "ppd");
3641 
3642     std::ostringstream mesh;
3643     mesh << "#version 450\n"
3644          << "#extension GL_NV_mesh_shader : enable\n"
3645          << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
3646          << "\n"
3647          << "layout (local_size_x=1) in;\n"
3648          << "layout (max_primitives=" << IfaceVar::kNumPrimitives << ", max_vertices=" << IfaceVar::kNumVertices
3649          << ") out;\n"
3650          << "layout (triangles) out;\n"
3651          << "\n";
3652 
3653     // Declare interface variables as Output variables.
3654     {
3655         uint32_t usedLocations = 0u;
3656         for (const auto &var : varVec)
3657         {
3658             mesh << var.getLocationDecl(usedLocations, Direction::OUT);
3659             usedLocations += var.getLocationSize();
3660         }
3661     }
3662 
3663     mesh << "out gl_MeshPerVertexNV {\n"
3664          << "   vec4  gl_Position;\n"
3665          << "} gl_MeshVerticesNV[];\n"
3666          << "out perprimitiveNV gl_MeshPerPrimitiveNV {\n"
3667          << "  int gl_PrimitiveID;\n"
3668          << "} gl_MeshPrimitivesNV[];\n"
3669          << "\n"
3670          << (taskShader ? "in " + taskDataDecl : bindingsDecl) << "vec4 positions[" << IfaceVar::kNumVertices
3671          << "] = vec4[](\n"
3672          << "    vec4(-1.0, -1.0, 0.0, 1.0),\n"
3673          << "    vec4( 1.0, -1.0, 0.0, 1.0),\n"
3674          << "    vec4(-1.0,  1.0, 0.0, 1.0),\n"
3675          << "    vec4( 1.0,  1.0, 0.0, 1.0)\n"
3676          << ");\n"
3677          << "\n"
3678          << "int indices[" << (IfaceVar::kNumPrimitives * 3u) << "] = int[](\n"
3679          << "    0, 1, 2, 2, 3, 1\n"
3680          << ");\n"
3681          << "\n"
3682          << "void main ()\n"
3683          << "{\n"
3684          << "    gl_PrimitiveCountNV = " << IfaceVar::kNumPrimitives << ";\n"
3685          << "\n";
3686 
3687     // Emit positions, indices and primitive IDs.
3688     for (uint32_t i = 0; i < IfaceVar::kNumVertices; ++i)
3689         mesh << "    gl_MeshVerticesNV[" << i << "].gl_Position = positions[" << i << "];\n";
3690     mesh << "\n";
3691 
3692     for (uint32_t i = 0; i < IfaceVar::kNumPrimitives; ++i)
3693         for (uint32_t j = 0; j < 3u; ++j) // 3 vertices per triangle
3694         {
3695             const auto arrayPos = i * 3u + j;
3696             mesh << "    gl_PrimitiveIndicesNV[" << arrayPos << "] = indices[" << arrayPos << "];\n";
3697         }
3698     mesh << "\n";
3699 
3700     for (uint32_t i = 0; i < IfaceVar::kNumPrimitives; ++i)
3701         mesh << "    gl_MeshPrimitivesNV[" << i << "].gl_PrimitiveID = " << i << ";\n";
3702     mesh << "\n";
3703 
3704     // Copy data to output variables, either from the task data or the bindings.
3705     for (size_t i = 0; i < varVec.size(); ++i)
3706     {
3707         const auto arraySize = varVec[i].getArraySize();
3708         const auto prefix    = ((varVec[i].owner == Owner::VERTEX) ? meshPvdPrefix : meshPpdPrefix);
3709         for (uint32_t arrayIndex = 0u; arrayIndex < arraySize; ++arrayIndex)
3710             mesh << varVec[i].getAssignmentStatement(arrayIndex, "", prefix);
3711     }
3712 
3713     mesh << "\n"
3714          << "}\n";
3715 
3716     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
3717 
3718     // Task shader if needed.
3719     if (taskShader)
3720     {
3721         const auto &meshCount    = m_params->meshCount;
3722         const auto taskPvdPrefix = "pvd";
3723         const auto taskPpdPrefix = "ppd";
3724 
3725         std::ostringstream task;
3726         task << "#version 450\n"
3727              << "#extension GL_NV_mesh_shader : enable\n"
3728              << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
3729              << "\n"
3730              << "out " << taskDataDecl << bindingsDecl << "void main ()\n"
3731              << "{\n"
3732              << "    gl_TaskCountNV = " << meshCount << ";\n"
3733              << "\n";
3734 
3735         // Copy data from bindings to the task data structure.
3736         for (size_t i = 0; i < varVec.size(); ++i)
3737         {
3738             const auto arraySize = varVec[i].getArraySize();
3739             const auto prefix    = ((varVec[i].owner == Owner::VERTEX) ? taskPvdPrefix : taskPpdPrefix);
3740 
3741             for (uint32_t arrayIndex = 0u; arrayIndex < arraySize; ++arrayIndex)
3742                 task << varVec[i].getAssignmentStatement(arrayIndex, "td", prefix);
3743         }
3744 
3745         task << "}\n";
3746         programCollection.glslSources.add("task") << glu::TaskSource(task.str());
3747     }
3748 }
3749 
generateReferenceLevel()3750 void InterfaceVariablesInstance::generateReferenceLevel()
3751 {
3752     const auto format    = getOutputFormat();
3753     const auto tcuFormat = mapVkFormat(format);
3754 
3755     const auto iWidth  = static_cast<int>(m_params->width);
3756     const auto iHeight = static_cast<int>(m_params->height);
3757 
3758     m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
3759 
3760     const auto access    = m_referenceLevel->getAccess();
3761     const auto blueColor = tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f);
3762 
3763     tcu::clear(access, blueColor);
3764 }
3765 
iterate()3766 tcu::TestStatus InterfaceVariablesInstance::iterate()
3767 {
3768     const auto &vkd       = m_context.getDeviceInterface();
3769     const auto device     = m_context.getDevice();
3770     auto &alloc           = m_context.getDefaultAllocator();
3771     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
3772     const auto queue      = m_context.getUniversalQueue();
3773 
3774     const auto imageFormat = getOutputFormat();
3775     const auto tcuFormat   = mapVkFormat(imageFormat);
3776     const auto imageExtent = makeExtent3D(m_params->width, m_params->height, 1u);
3777     const auto imageUsage  = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3778 
3779     const auto &binaries = m_context.getBinaryCollection();
3780     const auto hasTask   = binaries.contains("task");
3781     const auto bufStages =
3782         (VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_MESH_BIT_NV | (hasTask ? VK_SHADER_STAGE_TASK_BIT_NV : 0));
3783 
3784     const VkImageCreateInfo colorBufferInfo = {
3785         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
3786         nullptr,                             // const void* pNext;
3787         0u,                                  // VkImageCreateFlags flags;
3788         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
3789         imageFormat,                         // VkFormat format;
3790         imageExtent,                         // VkExtent3D extent;
3791         1u,                                  // uint32_t mipLevels;
3792         1u,                                  // uint32_t arrayLayers;
3793         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
3794         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
3795         imageUsage,                          // VkImageUsageFlags usage;
3796         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
3797         0u,                                  // uint32_t queueFamilyIndexCount;
3798         nullptr,                             // const uint32_t* pQueueFamilyIndices;
3799         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
3800     };
3801 
3802     // Create color image and view.
3803     ImageWithMemory colorImage(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
3804     const auto colorSRR  = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
3805     const auto colorSRL  = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
3806     const auto colorView = makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
3807 
3808     // Create a memory buffer for verification.
3809     const auto verificationBufferSize =
3810         static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
3811     const auto verificationBufferUsage = (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3812     const auto verificationBufferInfo  = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
3813 
3814     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
3815     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
3816     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
3817 
3818     // Bindings data.
3819     // The initialization statements below were generated automatically with a Python script.
3820     // Note: it works with stdin/stdout.
3821 #if 0
3822 import re
3823 import sys
3824 
3825 #Lines look like : tcu::Vec4 vert_f64d4_inter_0[IfaceVar::kNumVertices];
3826 lineRE = re.compile(r'^\s*(\S+)\s+(\w+)\[(\S+)\];.*$')
3827 vecRE = re.compile(r'^.*Vec(\d)$')
3828 floatSuffixes = (
3829     (0.25, 0.50, 0.875, 0.0),
3830     (0.25, 0.75, 0.875, 0.0),
3831     (0.50, 0.50, 0.875, 0.0),
3832     (0.50, 0.75, 0.875, 0.0),
3833 )
3834 lineCounter = 0
3835 
3836 for line in sys.stdin:
3837     match = lineRE.search(line)
3838     if not match:
3839         continue
3840 
3841     varType = match.group(1)
3842     varName = match.group(2)
3843     varSize = match.group(3)
3844 
3845     arraySize = (4 if varSize == 'IfaceVar::kNumVertices' else 2)
3846     vecMatch = vecRE.match(varType)
3847     numComponents = (1 if not vecMatch else vecMatch.group(1))
3848     isFlat = '_flat_' in varName
3849 
3850     lineCounter += 1
3851     varBaseVal = 1000 + 10 * lineCounter
3852     valueTemplate = ('%s' if numComponents == 1 else '%s(%%s)' % (varType,))
3853 
3854     for index in range(arraySize):
3855         valueStr = ''
3856         for comp in range(numComponents):
3857             compValue = varBaseVal + comp + 1
3858             if not isFlat:
3859                 compValue += floatSuffixes[index][comp]
3860             valueStr += ('' if comp == 0 else ', ') + str(compValue)
3861         value = valueTemplate % (valueStr,)
3862         statement = '%s[%s] = %s;' % (varName, index, value)
3863         print('%s' % (statement,))
3864 #endif
3865     InterfaceVariablesCase::PerVertexData perVertexData;
3866     {
3867         perVertexData.vert_f64d4_inter_0[0] = tcu::Vec4(1011.25, 1012.5, 1013.875, 1014.0);
3868         perVertexData.vert_f64d4_inter_0[1] = tcu::Vec4(1011.25, 1012.75, 1013.875, 1014.0);
3869         perVertexData.vert_f64d4_inter_0[2] = tcu::Vec4(1011.5, 1012.5, 1013.875, 1014.0);
3870         perVertexData.vert_f64d4_inter_0[3] = tcu::Vec4(1011.5, 1012.75, 1013.875, 1014.0);
3871         perVertexData.vert_f64d4_inter_1[0] = tcu::Vec4(1021.25, 1022.5, 1023.875, 1024.0);
3872         perVertexData.vert_f64d4_inter_1[1] = tcu::Vec4(1021.25, 1022.75, 1023.875, 1024.0);
3873         perVertexData.vert_f64d4_inter_1[2] = tcu::Vec4(1021.5, 1022.5, 1023.875, 1024.0);
3874         perVertexData.vert_f64d4_inter_1[3] = tcu::Vec4(1021.5, 1022.75, 1023.875, 1024.0);
3875         perVertexData.vert_f64d3_inter_0[0] = tcu::Vec3(1031.25, 1032.5, 1033.875);
3876         perVertexData.vert_f64d3_inter_0[1] = tcu::Vec3(1031.25, 1032.75, 1033.875);
3877         perVertexData.vert_f64d3_inter_0[2] = tcu::Vec3(1031.5, 1032.5, 1033.875);
3878         perVertexData.vert_f64d3_inter_0[3] = tcu::Vec3(1031.5, 1032.75, 1033.875);
3879         perVertexData.vert_f64d3_inter_1[0] = tcu::Vec3(1041.25, 1042.5, 1043.875);
3880         perVertexData.vert_f64d3_inter_1[1] = tcu::Vec3(1041.25, 1042.75, 1043.875);
3881         perVertexData.vert_f64d3_inter_1[2] = tcu::Vec3(1041.5, 1042.5, 1043.875);
3882         perVertexData.vert_f64d3_inter_1[3] = tcu::Vec3(1041.5, 1042.75, 1043.875);
3883         perVertexData.vert_f64d2_inter_0[0] = tcu::Vec2(1051.25, 1052.5);
3884         perVertexData.vert_f64d2_inter_0[1] = tcu::Vec2(1051.25, 1052.75);
3885         perVertexData.vert_f64d2_inter_0[2] = tcu::Vec2(1051.5, 1052.5);
3886         perVertexData.vert_f64d2_inter_0[3] = tcu::Vec2(1051.5, 1052.75);
3887         perVertexData.vert_f64d2_inter_1[0] = tcu::Vec2(1061.25, 1062.5);
3888         perVertexData.vert_f64d2_inter_1[1] = tcu::Vec2(1061.25, 1062.75);
3889         perVertexData.vert_f64d2_inter_1[2] = tcu::Vec2(1061.5, 1062.5);
3890         perVertexData.vert_f64d2_inter_1[3] = tcu::Vec2(1061.5, 1062.75);
3891         perVertexData.vert_f64d1_inter_0[0] = 1071.25;
3892         perVertexData.vert_f64d1_inter_0[1] = 1071.25;
3893         perVertexData.vert_f64d1_inter_0[2] = 1071.5;
3894         perVertexData.vert_f64d1_inter_0[3] = 1071.5;
3895         perVertexData.vert_f64d1_inter_1[0] = 1081.25;
3896         perVertexData.vert_f64d1_inter_1[1] = 1081.25;
3897         perVertexData.vert_f64d1_inter_1[2] = 1081.5;
3898         perVertexData.vert_f64d1_inter_1[3] = 1081.5;
3899         perVertexData.vert_f32d4_inter_0[0] = tcu::Vec4(1091.25, 1092.5, 1093.875, 1094.0);
3900         perVertexData.vert_f32d4_inter_0[1] = tcu::Vec4(1091.25, 1092.75, 1093.875, 1094.0);
3901         perVertexData.vert_f32d4_inter_0[2] = tcu::Vec4(1091.5, 1092.5, 1093.875, 1094.0);
3902         perVertexData.vert_f32d4_inter_0[3] = tcu::Vec4(1091.5, 1092.75, 1093.875, 1094.0);
3903         perVertexData.vert_f32d4_inter_1[0] = tcu::Vec4(1101.25, 1102.5, 1103.875, 1104.0);
3904         perVertexData.vert_f32d4_inter_1[1] = tcu::Vec4(1101.25, 1102.75, 1103.875, 1104.0);
3905         perVertexData.vert_f32d4_inter_1[2] = tcu::Vec4(1101.5, 1102.5, 1103.875, 1104.0);
3906         perVertexData.vert_f32d4_inter_1[3] = tcu::Vec4(1101.5, 1102.75, 1103.875, 1104.0);
3907         perVertexData.vert_f32d3_inter_0[0] = tcu::Vec3(1111.25, 1112.5, 1113.875);
3908         perVertexData.vert_f32d3_inter_0[1] = tcu::Vec3(1111.25, 1112.75, 1113.875);
3909         perVertexData.vert_f32d3_inter_0[2] = tcu::Vec3(1111.5, 1112.5, 1113.875);
3910         perVertexData.vert_f32d3_inter_0[3] = tcu::Vec3(1111.5, 1112.75, 1113.875);
3911         perVertexData.vert_f32d3_inter_1[0] = tcu::Vec3(1121.25, 1122.5, 1123.875);
3912         perVertexData.vert_f32d3_inter_1[1] = tcu::Vec3(1121.25, 1122.75, 1123.875);
3913         perVertexData.vert_f32d3_inter_1[2] = tcu::Vec3(1121.5, 1122.5, 1123.875);
3914         perVertexData.vert_f32d3_inter_1[3] = tcu::Vec3(1121.5, 1122.75, 1123.875);
3915         perVertexData.vert_f32d2_inter_0[0] = tcu::Vec2(1131.25, 1132.5);
3916         perVertexData.vert_f32d2_inter_0[1] = tcu::Vec2(1131.25, 1132.75);
3917         perVertexData.vert_f32d2_inter_0[2] = tcu::Vec2(1131.5, 1132.5);
3918         perVertexData.vert_f32d2_inter_0[3] = tcu::Vec2(1131.5, 1132.75);
3919         perVertexData.vert_f32d2_inter_1[0] = tcu::Vec2(1141.25, 1142.5);
3920         perVertexData.vert_f32d2_inter_1[1] = tcu::Vec2(1141.25, 1142.75);
3921         perVertexData.vert_f32d2_inter_1[2] = tcu::Vec2(1141.5, 1142.5);
3922         perVertexData.vert_f32d2_inter_1[3] = tcu::Vec2(1141.5, 1142.75);
3923         perVertexData.vert_f32d1_inter_0[0] = 1151.25;
3924         perVertexData.vert_f32d1_inter_0[1] = 1151.25;
3925         perVertexData.vert_f32d1_inter_0[2] = 1151.5;
3926         perVertexData.vert_f32d1_inter_0[3] = 1151.5;
3927         perVertexData.vert_f32d1_inter_1[0] = 1161.25;
3928         perVertexData.vert_f32d1_inter_1[1] = 1161.25;
3929         perVertexData.vert_f32d1_inter_1[2] = 1161.5;
3930         perVertexData.vert_f32d1_inter_1[3] = 1161.5;
3931         perVertexData.vert_f16d4_inter_0[0] = tcu::Vec4(1171.25, 1172.5, 1173.875, 1174.0);
3932         perVertexData.vert_f16d4_inter_0[1] = tcu::Vec4(1171.25, 1172.75, 1173.875, 1174.0);
3933         perVertexData.vert_f16d4_inter_0[2] = tcu::Vec4(1171.5, 1172.5, 1173.875, 1174.0);
3934         perVertexData.vert_f16d4_inter_0[3] = tcu::Vec4(1171.5, 1172.75, 1173.875, 1174.0);
3935         perVertexData.vert_f16d4_inter_1[0] = tcu::Vec4(1181.25, 1182.5, 1183.875, 1184.0);
3936         perVertexData.vert_f16d4_inter_1[1] = tcu::Vec4(1181.25, 1182.75, 1183.875, 1184.0);
3937         perVertexData.vert_f16d4_inter_1[2] = tcu::Vec4(1181.5, 1182.5, 1183.875, 1184.0);
3938         perVertexData.vert_f16d4_inter_1[3] = tcu::Vec4(1181.5, 1182.75, 1183.875, 1184.0);
3939         perVertexData.vert_f16d3_inter_0[0] = tcu::Vec3(1191.25, 1192.5, 1193.875);
3940         perVertexData.vert_f16d3_inter_0[1] = tcu::Vec3(1191.25, 1192.75, 1193.875);
3941         perVertexData.vert_f16d3_inter_0[2] = tcu::Vec3(1191.5, 1192.5, 1193.875);
3942         perVertexData.vert_f16d3_inter_0[3] = tcu::Vec3(1191.5, 1192.75, 1193.875);
3943         perVertexData.vert_f16d3_inter_1[0] = tcu::Vec3(1201.25, 1202.5, 1203.875);
3944         perVertexData.vert_f16d3_inter_1[1] = tcu::Vec3(1201.25, 1202.75, 1203.875);
3945         perVertexData.vert_f16d3_inter_1[2] = tcu::Vec3(1201.5, 1202.5, 1203.875);
3946         perVertexData.vert_f16d3_inter_1[3] = tcu::Vec3(1201.5, 1202.75, 1203.875);
3947         perVertexData.vert_f16d2_inter_0[0] = tcu::Vec2(1211.25, 1212.5);
3948         perVertexData.vert_f16d2_inter_0[1] = tcu::Vec2(1211.25, 1212.75);
3949         perVertexData.vert_f16d2_inter_0[2] = tcu::Vec2(1211.5, 1212.5);
3950         perVertexData.vert_f16d2_inter_0[3] = tcu::Vec2(1211.5, 1212.75);
3951         perVertexData.vert_f16d2_inter_1[0] = tcu::Vec2(1221.25, 1222.5);
3952         perVertexData.vert_f16d2_inter_1[1] = tcu::Vec2(1221.25, 1222.75);
3953         perVertexData.vert_f16d2_inter_1[2] = tcu::Vec2(1221.5, 1222.5);
3954         perVertexData.vert_f16d2_inter_1[3] = tcu::Vec2(1221.5, 1222.75);
3955         perVertexData.vert_f16d1_inter_0[0] = 1231.25;
3956         perVertexData.vert_f16d1_inter_0[1] = 1231.25;
3957         perVertexData.vert_f16d1_inter_0[2] = 1231.5;
3958         perVertexData.vert_f16d1_inter_0[3] = 1231.5;
3959         perVertexData.vert_f16d1_inter_1[0] = 1241.25;
3960         perVertexData.vert_f16d1_inter_1[1] = 1241.25;
3961         perVertexData.vert_f16d1_inter_1[2] = 1241.5;
3962         perVertexData.vert_f16d1_inter_1[3] = 1241.5;
3963         perVertexData.vert_f64d4_flat_0[0]  = tcu::Vec4(1251, 1252, 1253, 1254);
3964         perVertexData.vert_f64d4_flat_0[1]  = tcu::Vec4(1251, 1252, 1253, 1254);
3965         perVertexData.vert_f64d4_flat_0[2]  = tcu::Vec4(1251, 1252, 1253, 1254);
3966         perVertexData.vert_f64d4_flat_0[3]  = tcu::Vec4(1251, 1252, 1253, 1254);
3967         perVertexData.vert_f64d4_flat_1[0]  = tcu::Vec4(1261, 1262, 1263, 1264);
3968         perVertexData.vert_f64d4_flat_1[1]  = tcu::Vec4(1261, 1262, 1263, 1264);
3969         perVertexData.vert_f64d4_flat_1[2]  = tcu::Vec4(1261, 1262, 1263, 1264);
3970         perVertexData.vert_f64d4_flat_1[3]  = tcu::Vec4(1261, 1262, 1263, 1264);
3971         perVertexData.vert_f64d3_flat_0[0]  = tcu::Vec3(1271, 1272, 1273);
3972         perVertexData.vert_f64d3_flat_0[1]  = tcu::Vec3(1271, 1272, 1273);
3973         perVertexData.vert_f64d3_flat_0[2]  = tcu::Vec3(1271, 1272, 1273);
3974         perVertexData.vert_f64d3_flat_0[3]  = tcu::Vec3(1271, 1272, 1273);
3975         perVertexData.vert_f64d3_flat_1[0]  = tcu::Vec3(1281, 1282, 1283);
3976         perVertexData.vert_f64d3_flat_1[1]  = tcu::Vec3(1281, 1282, 1283);
3977         perVertexData.vert_f64d3_flat_1[2]  = tcu::Vec3(1281, 1282, 1283);
3978         perVertexData.vert_f64d3_flat_1[3]  = tcu::Vec3(1281, 1282, 1283);
3979         perVertexData.vert_f64d2_flat_0[0]  = tcu::Vec2(1291, 1292);
3980         perVertexData.vert_f64d2_flat_0[1]  = tcu::Vec2(1291, 1292);
3981         perVertexData.vert_f64d2_flat_0[2]  = tcu::Vec2(1291, 1292);
3982         perVertexData.vert_f64d2_flat_0[3]  = tcu::Vec2(1291, 1292);
3983         perVertexData.vert_f64d2_flat_1[0]  = tcu::Vec2(1301, 1302);
3984         perVertexData.vert_f64d2_flat_1[1]  = tcu::Vec2(1301, 1302);
3985         perVertexData.vert_f64d2_flat_1[2]  = tcu::Vec2(1301, 1302);
3986         perVertexData.vert_f64d2_flat_1[3]  = tcu::Vec2(1301, 1302);
3987         perVertexData.vert_f64d1_flat_0[0]  = 1311;
3988         perVertexData.vert_f64d1_flat_0[1]  = 1311;
3989         perVertexData.vert_f64d1_flat_0[2]  = 1311;
3990         perVertexData.vert_f64d1_flat_0[3]  = 1311;
3991         perVertexData.vert_f64d1_flat_1[0]  = 1321;
3992         perVertexData.vert_f64d1_flat_1[1]  = 1321;
3993         perVertexData.vert_f64d1_flat_1[2]  = 1321;
3994         perVertexData.vert_f64d1_flat_1[3]  = 1321;
3995         perVertexData.vert_f32d4_flat_0[0]  = tcu::Vec4(1331, 1332, 1333, 1334);
3996         perVertexData.vert_f32d4_flat_0[1]  = tcu::Vec4(1331, 1332, 1333, 1334);
3997         perVertexData.vert_f32d4_flat_0[2]  = tcu::Vec4(1331, 1332, 1333, 1334);
3998         perVertexData.vert_f32d4_flat_0[3]  = tcu::Vec4(1331, 1332, 1333, 1334);
3999         perVertexData.vert_f32d4_flat_1[0]  = tcu::Vec4(1341, 1342, 1343, 1344);
4000         perVertexData.vert_f32d4_flat_1[1]  = tcu::Vec4(1341, 1342, 1343, 1344);
4001         perVertexData.vert_f32d4_flat_1[2]  = tcu::Vec4(1341, 1342, 1343, 1344);
4002         perVertexData.vert_f32d4_flat_1[3]  = tcu::Vec4(1341, 1342, 1343, 1344);
4003         perVertexData.vert_f32d3_flat_0[0]  = tcu::Vec3(1351, 1352, 1353);
4004         perVertexData.vert_f32d3_flat_0[1]  = tcu::Vec3(1351, 1352, 1353);
4005         perVertexData.vert_f32d3_flat_0[2]  = tcu::Vec3(1351, 1352, 1353);
4006         perVertexData.vert_f32d3_flat_0[3]  = tcu::Vec3(1351, 1352, 1353);
4007         perVertexData.vert_f32d3_flat_1[0]  = tcu::Vec3(1361, 1362, 1363);
4008         perVertexData.vert_f32d3_flat_1[1]  = tcu::Vec3(1361, 1362, 1363);
4009         perVertexData.vert_f32d3_flat_1[2]  = tcu::Vec3(1361, 1362, 1363);
4010         perVertexData.vert_f32d3_flat_1[3]  = tcu::Vec3(1361, 1362, 1363);
4011         perVertexData.vert_f32d2_flat_0[0]  = tcu::Vec2(1371, 1372);
4012         perVertexData.vert_f32d2_flat_0[1]  = tcu::Vec2(1371, 1372);
4013         perVertexData.vert_f32d2_flat_0[2]  = tcu::Vec2(1371, 1372);
4014         perVertexData.vert_f32d2_flat_0[3]  = tcu::Vec2(1371, 1372);
4015         perVertexData.vert_f32d2_flat_1[0]  = tcu::Vec2(1381, 1382);
4016         perVertexData.vert_f32d2_flat_1[1]  = tcu::Vec2(1381, 1382);
4017         perVertexData.vert_f32d2_flat_1[2]  = tcu::Vec2(1381, 1382);
4018         perVertexData.vert_f32d2_flat_1[3]  = tcu::Vec2(1381, 1382);
4019         perVertexData.vert_f32d1_flat_0[0]  = 1391;
4020         perVertexData.vert_f32d1_flat_0[1]  = 1391;
4021         perVertexData.vert_f32d1_flat_0[2]  = 1391;
4022         perVertexData.vert_f32d1_flat_0[3]  = 1391;
4023         perVertexData.vert_f32d1_flat_1[0]  = 1401;
4024         perVertexData.vert_f32d1_flat_1[1]  = 1401;
4025         perVertexData.vert_f32d1_flat_1[2]  = 1401;
4026         perVertexData.vert_f32d1_flat_1[3]  = 1401;
4027         perVertexData.vert_f16d4_flat_0[0]  = tcu::Vec4(1411, 1412, 1413, 1414);
4028         perVertexData.vert_f16d4_flat_0[1]  = tcu::Vec4(1411, 1412, 1413, 1414);
4029         perVertexData.vert_f16d4_flat_0[2]  = tcu::Vec4(1411, 1412, 1413, 1414);
4030         perVertexData.vert_f16d4_flat_0[3]  = tcu::Vec4(1411, 1412, 1413, 1414);
4031         perVertexData.vert_f16d4_flat_1[0]  = tcu::Vec4(1421, 1422, 1423, 1424);
4032         perVertexData.vert_f16d4_flat_1[1]  = tcu::Vec4(1421, 1422, 1423, 1424);
4033         perVertexData.vert_f16d4_flat_1[2]  = tcu::Vec4(1421, 1422, 1423, 1424);
4034         perVertexData.vert_f16d4_flat_1[3]  = tcu::Vec4(1421, 1422, 1423, 1424);
4035         perVertexData.vert_f16d3_flat_0[0]  = tcu::Vec3(1431, 1432, 1433);
4036         perVertexData.vert_f16d3_flat_0[1]  = tcu::Vec3(1431, 1432, 1433);
4037         perVertexData.vert_f16d3_flat_0[2]  = tcu::Vec3(1431, 1432, 1433);
4038         perVertexData.vert_f16d3_flat_0[3]  = tcu::Vec3(1431, 1432, 1433);
4039         perVertexData.vert_f16d3_flat_1[0]  = tcu::Vec3(1441, 1442, 1443);
4040         perVertexData.vert_f16d3_flat_1[1]  = tcu::Vec3(1441, 1442, 1443);
4041         perVertexData.vert_f16d3_flat_1[2]  = tcu::Vec3(1441, 1442, 1443);
4042         perVertexData.vert_f16d3_flat_1[3]  = tcu::Vec3(1441, 1442, 1443);
4043         perVertexData.vert_f16d2_flat_0[0]  = tcu::Vec2(1451, 1452);
4044         perVertexData.vert_f16d2_flat_0[1]  = tcu::Vec2(1451, 1452);
4045         perVertexData.vert_f16d2_flat_0[2]  = tcu::Vec2(1451, 1452);
4046         perVertexData.vert_f16d2_flat_0[3]  = tcu::Vec2(1451, 1452);
4047         perVertexData.vert_f16d2_flat_1[0]  = tcu::Vec2(1461, 1462);
4048         perVertexData.vert_f16d2_flat_1[1]  = tcu::Vec2(1461, 1462);
4049         perVertexData.vert_f16d2_flat_1[2]  = tcu::Vec2(1461, 1462);
4050         perVertexData.vert_f16d2_flat_1[3]  = tcu::Vec2(1461, 1462);
4051         perVertexData.vert_f16d1_flat_0[0]  = 1471;
4052         perVertexData.vert_f16d1_flat_0[1]  = 1471;
4053         perVertexData.vert_f16d1_flat_0[2]  = 1471;
4054         perVertexData.vert_f16d1_flat_0[3]  = 1471;
4055         perVertexData.vert_f16d1_flat_1[0]  = 1481;
4056         perVertexData.vert_f16d1_flat_1[1]  = 1481;
4057         perVertexData.vert_f16d1_flat_1[2]  = 1481;
4058         perVertexData.vert_f16d1_flat_1[3]  = 1481;
4059         perVertexData.vert_i64d4_flat_0[0]  = tcu::IVec4(1491, 1492, 1493, 1494);
4060         perVertexData.vert_i64d4_flat_0[1]  = tcu::IVec4(1491, 1492, 1493, 1494);
4061         perVertexData.vert_i64d4_flat_0[2]  = tcu::IVec4(1491, 1492, 1493, 1494);
4062         perVertexData.vert_i64d4_flat_0[3]  = tcu::IVec4(1491, 1492, 1493, 1494);
4063         perVertexData.vert_i64d4_flat_1[0]  = tcu::IVec4(1501, 1502, 1503, 1504);
4064         perVertexData.vert_i64d4_flat_1[1]  = tcu::IVec4(1501, 1502, 1503, 1504);
4065         perVertexData.vert_i64d4_flat_1[2]  = tcu::IVec4(1501, 1502, 1503, 1504);
4066         perVertexData.vert_i64d4_flat_1[3]  = tcu::IVec4(1501, 1502, 1503, 1504);
4067         perVertexData.vert_i64d3_flat_0[0]  = tcu::IVec3(1511, 1512, 1513);
4068         perVertexData.vert_i64d3_flat_0[1]  = tcu::IVec3(1511, 1512, 1513);
4069         perVertexData.vert_i64d3_flat_0[2]  = tcu::IVec3(1511, 1512, 1513);
4070         perVertexData.vert_i64d3_flat_0[3]  = tcu::IVec3(1511, 1512, 1513);
4071         perVertexData.vert_i64d3_flat_1[0]  = tcu::IVec3(1521, 1522, 1523);
4072         perVertexData.vert_i64d3_flat_1[1]  = tcu::IVec3(1521, 1522, 1523);
4073         perVertexData.vert_i64d3_flat_1[2]  = tcu::IVec3(1521, 1522, 1523);
4074         perVertexData.vert_i64d3_flat_1[3]  = tcu::IVec3(1521, 1522, 1523);
4075         perVertexData.vert_i64d2_flat_0[0]  = tcu::IVec2(1531, 1532);
4076         perVertexData.vert_i64d2_flat_0[1]  = tcu::IVec2(1531, 1532);
4077         perVertexData.vert_i64d2_flat_0[2]  = tcu::IVec2(1531, 1532);
4078         perVertexData.vert_i64d2_flat_0[3]  = tcu::IVec2(1531, 1532);
4079         perVertexData.vert_i64d2_flat_1[0]  = tcu::IVec2(1541, 1542);
4080         perVertexData.vert_i64d2_flat_1[1]  = tcu::IVec2(1541, 1542);
4081         perVertexData.vert_i64d2_flat_1[2]  = tcu::IVec2(1541, 1542);
4082         perVertexData.vert_i64d2_flat_1[3]  = tcu::IVec2(1541, 1542);
4083         perVertexData.vert_i64d1_flat_0[0]  = 1551;
4084         perVertexData.vert_i64d1_flat_0[1]  = 1551;
4085         perVertexData.vert_i64d1_flat_0[2]  = 1551;
4086         perVertexData.vert_i64d1_flat_0[3]  = 1551;
4087         perVertexData.vert_i64d1_flat_1[0]  = 1561;
4088         perVertexData.vert_i64d1_flat_1[1]  = 1561;
4089         perVertexData.vert_i64d1_flat_1[2]  = 1561;
4090         perVertexData.vert_i64d1_flat_1[3]  = 1561;
4091         perVertexData.vert_i32d4_flat_0[0]  = tcu::IVec4(1571, 1572, 1573, 1574);
4092         perVertexData.vert_i32d4_flat_0[1]  = tcu::IVec4(1571, 1572, 1573, 1574);
4093         perVertexData.vert_i32d4_flat_0[2]  = tcu::IVec4(1571, 1572, 1573, 1574);
4094         perVertexData.vert_i32d4_flat_0[3]  = tcu::IVec4(1571, 1572, 1573, 1574);
4095         perVertexData.vert_i32d4_flat_1[0]  = tcu::IVec4(1581, 1582, 1583, 1584);
4096         perVertexData.vert_i32d4_flat_1[1]  = tcu::IVec4(1581, 1582, 1583, 1584);
4097         perVertexData.vert_i32d4_flat_1[2]  = tcu::IVec4(1581, 1582, 1583, 1584);
4098         perVertexData.vert_i32d4_flat_1[3]  = tcu::IVec4(1581, 1582, 1583, 1584);
4099         perVertexData.vert_i32d3_flat_0[0]  = tcu::IVec3(1591, 1592, 1593);
4100         perVertexData.vert_i32d3_flat_0[1]  = tcu::IVec3(1591, 1592, 1593);
4101         perVertexData.vert_i32d3_flat_0[2]  = tcu::IVec3(1591, 1592, 1593);
4102         perVertexData.vert_i32d3_flat_0[3]  = tcu::IVec3(1591, 1592, 1593);
4103         perVertexData.vert_i32d3_flat_1[0]  = tcu::IVec3(1601, 1602, 1603);
4104         perVertexData.vert_i32d3_flat_1[1]  = tcu::IVec3(1601, 1602, 1603);
4105         perVertexData.vert_i32d3_flat_1[2]  = tcu::IVec3(1601, 1602, 1603);
4106         perVertexData.vert_i32d3_flat_1[3]  = tcu::IVec3(1601, 1602, 1603);
4107         perVertexData.vert_i32d2_flat_0[0]  = tcu::IVec2(1611, 1612);
4108         perVertexData.vert_i32d2_flat_0[1]  = tcu::IVec2(1611, 1612);
4109         perVertexData.vert_i32d2_flat_0[2]  = tcu::IVec2(1611, 1612);
4110         perVertexData.vert_i32d2_flat_0[3]  = tcu::IVec2(1611, 1612);
4111         perVertexData.vert_i32d2_flat_1[0]  = tcu::IVec2(1621, 1622);
4112         perVertexData.vert_i32d2_flat_1[1]  = tcu::IVec2(1621, 1622);
4113         perVertexData.vert_i32d2_flat_1[2]  = tcu::IVec2(1621, 1622);
4114         perVertexData.vert_i32d2_flat_1[3]  = tcu::IVec2(1621, 1622);
4115         perVertexData.vert_i32d1_flat_0[0]  = 1631;
4116         perVertexData.vert_i32d1_flat_0[1]  = 1631;
4117         perVertexData.vert_i32d1_flat_0[2]  = 1631;
4118         perVertexData.vert_i32d1_flat_0[3]  = 1631;
4119         perVertexData.vert_i32d1_flat_1[0]  = 1641;
4120         perVertexData.vert_i32d1_flat_1[1]  = 1641;
4121         perVertexData.vert_i32d1_flat_1[2]  = 1641;
4122         perVertexData.vert_i32d1_flat_1[3]  = 1641;
4123         perVertexData.vert_i16d4_flat_0[0]  = tcu::IVec4(1651, 1652, 1653, 1654);
4124         perVertexData.vert_i16d4_flat_0[1]  = tcu::IVec4(1651, 1652, 1653, 1654);
4125         perVertexData.vert_i16d4_flat_0[2]  = tcu::IVec4(1651, 1652, 1653, 1654);
4126         perVertexData.vert_i16d4_flat_0[3]  = tcu::IVec4(1651, 1652, 1653, 1654);
4127         perVertexData.vert_i16d4_flat_1[0]  = tcu::IVec4(1661, 1662, 1663, 1664);
4128         perVertexData.vert_i16d4_flat_1[1]  = tcu::IVec4(1661, 1662, 1663, 1664);
4129         perVertexData.vert_i16d4_flat_1[2]  = tcu::IVec4(1661, 1662, 1663, 1664);
4130         perVertexData.vert_i16d4_flat_1[3]  = tcu::IVec4(1661, 1662, 1663, 1664);
4131         perVertexData.vert_i16d3_flat_0[0]  = tcu::IVec3(1671, 1672, 1673);
4132         perVertexData.vert_i16d3_flat_0[1]  = tcu::IVec3(1671, 1672, 1673);
4133         perVertexData.vert_i16d3_flat_0[2]  = tcu::IVec3(1671, 1672, 1673);
4134         perVertexData.vert_i16d3_flat_0[3]  = tcu::IVec3(1671, 1672, 1673);
4135         perVertexData.vert_i16d3_flat_1[0]  = tcu::IVec3(1681, 1682, 1683);
4136         perVertexData.vert_i16d3_flat_1[1]  = tcu::IVec3(1681, 1682, 1683);
4137         perVertexData.vert_i16d3_flat_1[2]  = tcu::IVec3(1681, 1682, 1683);
4138         perVertexData.vert_i16d3_flat_1[3]  = tcu::IVec3(1681, 1682, 1683);
4139         perVertexData.vert_i16d2_flat_0[0]  = tcu::IVec2(1691, 1692);
4140         perVertexData.vert_i16d2_flat_0[1]  = tcu::IVec2(1691, 1692);
4141         perVertexData.vert_i16d2_flat_0[2]  = tcu::IVec2(1691, 1692);
4142         perVertexData.vert_i16d2_flat_0[3]  = tcu::IVec2(1691, 1692);
4143         perVertexData.vert_i16d2_flat_1[0]  = tcu::IVec2(1701, 1702);
4144         perVertexData.vert_i16d2_flat_1[1]  = tcu::IVec2(1701, 1702);
4145         perVertexData.vert_i16d2_flat_1[2]  = tcu::IVec2(1701, 1702);
4146         perVertexData.vert_i16d2_flat_1[3]  = tcu::IVec2(1701, 1702);
4147         perVertexData.vert_i16d1_flat_0[0]  = 1711;
4148         perVertexData.vert_i16d1_flat_0[1]  = 1711;
4149         perVertexData.vert_i16d1_flat_0[2]  = 1711;
4150         perVertexData.vert_i16d1_flat_0[3]  = 1711;
4151         perVertexData.vert_i16d1_flat_1[0]  = 1721;
4152         perVertexData.vert_i16d1_flat_1[1]  = 1721;
4153         perVertexData.vert_i16d1_flat_1[2]  = 1721;
4154         perVertexData.vert_i16d1_flat_1[3]  = 1721;
4155     }
4156 
4157     InterfaceVariablesCase::PerPrimitiveData perPrimitiveData;
4158     {
4159         perPrimitiveData.prim_f64d4_flat_0[0] = tcu::Vec4(1011, 1012, 1013, 1014);
4160         perPrimitiveData.prim_f64d4_flat_0[1] = tcu::Vec4(1011, 1012, 1013, 1014);
4161         perPrimitiveData.prim_f64d4_flat_1[0] = tcu::Vec4(1021, 1022, 1023, 1024);
4162         perPrimitiveData.prim_f64d4_flat_1[1] = tcu::Vec4(1021, 1022, 1023, 1024);
4163         perPrimitiveData.prim_f64d3_flat_0[0] = tcu::Vec3(1031, 1032, 1033);
4164         perPrimitiveData.prim_f64d3_flat_0[1] = tcu::Vec3(1031, 1032, 1033);
4165         perPrimitiveData.prim_f64d3_flat_1[0] = tcu::Vec3(1041, 1042, 1043);
4166         perPrimitiveData.prim_f64d3_flat_1[1] = tcu::Vec3(1041, 1042, 1043);
4167         perPrimitiveData.prim_f64d2_flat_0[0] = tcu::Vec2(1051, 1052);
4168         perPrimitiveData.prim_f64d2_flat_0[1] = tcu::Vec2(1051, 1052);
4169         perPrimitiveData.prim_f64d2_flat_1[0] = tcu::Vec2(1061, 1062);
4170         perPrimitiveData.prim_f64d2_flat_1[1] = tcu::Vec2(1061, 1062);
4171         perPrimitiveData.prim_f64d1_flat_0[0] = 1071;
4172         perPrimitiveData.prim_f64d1_flat_0[1] = 1071;
4173         perPrimitiveData.prim_f64d1_flat_1[0] = 1081;
4174         perPrimitiveData.prim_f64d1_flat_1[1] = 1081;
4175         perPrimitiveData.prim_f32d4_flat_0[0] = tcu::Vec4(1091, 1092, 1093, 1094);
4176         perPrimitiveData.prim_f32d4_flat_0[1] = tcu::Vec4(1091, 1092, 1093, 1094);
4177         perPrimitiveData.prim_f32d4_flat_1[0] = tcu::Vec4(1101, 1102, 1103, 1104);
4178         perPrimitiveData.prim_f32d4_flat_1[1] = tcu::Vec4(1101, 1102, 1103, 1104);
4179         perPrimitiveData.prim_f32d3_flat_0[0] = tcu::Vec3(1111, 1112, 1113);
4180         perPrimitiveData.prim_f32d3_flat_0[1] = tcu::Vec3(1111, 1112, 1113);
4181         perPrimitiveData.prim_f32d3_flat_1[0] = tcu::Vec3(1121, 1122, 1123);
4182         perPrimitiveData.prim_f32d3_flat_1[1] = tcu::Vec3(1121, 1122, 1123);
4183         perPrimitiveData.prim_f32d2_flat_0[0] = tcu::Vec2(1131, 1132);
4184         perPrimitiveData.prim_f32d2_flat_0[1] = tcu::Vec2(1131, 1132);
4185         perPrimitiveData.prim_f32d2_flat_1[0] = tcu::Vec2(1141, 1142);
4186         perPrimitiveData.prim_f32d2_flat_1[1] = tcu::Vec2(1141, 1142);
4187         perPrimitiveData.prim_f32d1_flat_0[0] = 1151;
4188         perPrimitiveData.prim_f32d1_flat_0[1] = 1151;
4189         perPrimitiveData.prim_f32d1_flat_1[0] = 1161;
4190         perPrimitiveData.prim_f32d1_flat_1[1] = 1161;
4191         perPrimitiveData.prim_f16d4_flat_0[0] = tcu::Vec4(1171, 1172, 1173, 1174);
4192         perPrimitiveData.prim_f16d4_flat_0[1] = tcu::Vec4(1171, 1172, 1173, 1174);
4193         perPrimitiveData.prim_f16d4_flat_1[0] = tcu::Vec4(1181, 1182, 1183, 1184);
4194         perPrimitiveData.prim_f16d4_flat_1[1] = tcu::Vec4(1181, 1182, 1183, 1184);
4195         perPrimitiveData.prim_f16d3_flat_0[0] = tcu::Vec3(1191, 1192, 1193);
4196         perPrimitiveData.prim_f16d3_flat_0[1] = tcu::Vec3(1191, 1192, 1193);
4197         perPrimitiveData.prim_f16d3_flat_1[0] = tcu::Vec3(1201, 1202, 1203);
4198         perPrimitiveData.prim_f16d3_flat_1[1] = tcu::Vec3(1201, 1202, 1203);
4199         perPrimitiveData.prim_f16d2_flat_0[0] = tcu::Vec2(1211, 1212);
4200         perPrimitiveData.prim_f16d2_flat_0[1] = tcu::Vec2(1211, 1212);
4201         perPrimitiveData.prim_f16d2_flat_1[0] = tcu::Vec2(1221, 1222);
4202         perPrimitiveData.prim_f16d2_flat_1[1] = tcu::Vec2(1221, 1222);
4203         perPrimitiveData.prim_f16d1_flat_0[0] = 1231;
4204         perPrimitiveData.prim_f16d1_flat_0[1] = 1231;
4205         perPrimitiveData.prim_f16d1_flat_1[0] = 1241;
4206         perPrimitiveData.prim_f16d1_flat_1[1] = 1241;
4207         perPrimitiveData.prim_i64d4_flat_0[0] = tcu::IVec4(1251, 1252, 1253, 1254);
4208         perPrimitiveData.prim_i64d4_flat_0[1] = tcu::IVec4(1251, 1252, 1253, 1254);
4209         perPrimitiveData.prim_i64d4_flat_1[0] = tcu::IVec4(1261, 1262, 1263, 1264);
4210         perPrimitiveData.prim_i64d4_flat_1[1] = tcu::IVec4(1261, 1262, 1263, 1264);
4211         perPrimitiveData.prim_i64d3_flat_0[0] = tcu::IVec3(1271, 1272, 1273);
4212         perPrimitiveData.prim_i64d3_flat_0[1] = tcu::IVec3(1271, 1272, 1273);
4213         perPrimitiveData.prim_i64d3_flat_1[0] = tcu::IVec3(1281, 1282, 1283);
4214         perPrimitiveData.prim_i64d3_flat_1[1] = tcu::IVec3(1281, 1282, 1283);
4215         perPrimitiveData.prim_i64d2_flat_0[0] = tcu::IVec2(1291, 1292);
4216         perPrimitiveData.prim_i64d2_flat_0[1] = tcu::IVec2(1291, 1292);
4217         perPrimitiveData.prim_i64d2_flat_1[0] = tcu::IVec2(1301, 1302);
4218         perPrimitiveData.prim_i64d2_flat_1[1] = tcu::IVec2(1301, 1302);
4219         perPrimitiveData.prim_i64d1_flat_0[0] = 1311;
4220         perPrimitiveData.prim_i64d1_flat_0[1] = 1311;
4221         perPrimitiveData.prim_i64d1_flat_1[0] = 1321;
4222         perPrimitiveData.prim_i64d1_flat_1[1] = 1321;
4223         perPrimitiveData.prim_i32d4_flat_0[0] = tcu::IVec4(1331, 1332, 1333, 1334);
4224         perPrimitiveData.prim_i32d4_flat_0[1] = tcu::IVec4(1331, 1332, 1333, 1334);
4225         perPrimitiveData.prim_i32d4_flat_1[0] = tcu::IVec4(1341, 1342, 1343, 1344);
4226         perPrimitiveData.prim_i32d4_flat_1[1] = tcu::IVec4(1341, 1342, 1343, 1344);
4227         perPrimitiveData.prim_i32d3_flat_0[0] = tcu::IVec3(1351, 1352, 1353);
4228         perPrimitiveData.prim_i32d3_flat_0[1] = tcu::IVec3(1351, 1352, 1353);
4229         perPrimitiveData.prim_i32d3_flat_1[0] = tcu::IVec3(1361, 1362, 1363);
4230         perPrimitiveData.prim_i32d3_flat_1[1] = tcu::IVec3(1361, 1362, 1363);
4231         perPrimitiveData.prim_i32d2_flat_0[0] = tcu::IVec2(1371, 1372);
4232         perPrimitiveData.prim_i32d2_flat_0[1] = tcu::IVec2(1371, 1372);
4233         perPrimitiveData.prim_i32d2_flat_1[0] = tcu::IVec2(1381, 1382);
4234         perPrimitiveData.prim_i32d2_flat_1[1] = tcu::IVec2(1381, 1382);
4235         perPrimitiveData.prim_i32d1_flat_0[0] = 1391;
4236         perPrimitiveData.prim_i32d1_flat_0[1] = 1391;
4237         perPrimitiveData.prim_i32d1_flat_1[0] = 1401;
4238         perPrimitiveData.prim_i32d1_flat_1[1] = 1401;
4239         perPrimitiveData.prim_i16d4_flat_0[0] = tcu::IVec4(1411, 1412, 1413, 1414);
4240         perPrimitiveData.prim_i16d4_flat_0[1] = tcu::IVec4(1411, 1412, 1413, 1414);
4241         perPrimitiveData.prim_i16d4_flat_1[0] = tcu::IVec4(1421, 1422, 1423, 1424);
4242         perPrimitiveData.prim_i16d4_flat_1[1] = tcu::IVec4(1421, 1422, 1423, 1424);
4243         perPrimitiveData.prim_i16d3_flat_0[0] = tcu::IVec3(1431, 1432, 1433);
4244         perPrimitiveData.prim_i16d3_flat_0[1] = tcu::IVec3(1431, 1432, 1433);
4245         perPrimitiveData.prim_i16d3_flat_1[0] = tcu::IVec3(1441, 1442, 1443);
4246         perPrimitiveData.prim_i16d3_flat_1[1] = tcu::IVec3(1441, 1442, 1443);
4247         perPrimitiveData.prim_i16d2_flat_0[0] = tcu::IVec2(1451, 1452);
4248         perPrimitiveData.prim_i16d2_flat_0[1] = tcu::IVec2(1451, 1452);
4249         perPrimitiveData.prim_i16d2_flat_1[0] = tcu::IVec2(1461, 1462);
4250         perPrimitiveData.prim_i16d2_flat_1[1] = tcu::IVec2(1461, 1462);
4251         perPrimitiveData.prim_i16d1_flat_0[0] = 1471;
4252         perPrimitiveData.prim_i16d1_flat_0[1] = 1471;
4253         perPrimitiveData.prim_i16d1_flat_1[0] = 1481;
4254         perPrimitiveData.prim_i16d1_flat_1[1] = 1481;
4255     }
4256 
4257     // Create and fill buffers with this data.
4258     const auto pvdSize = static_cast<VkDeviceSize>(sizeof(perVertexData));
4259     const auto pvdInfo = makeBufferCreateInfo(pvdSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4260     BufferWithMemory pvdData(vkd, device, alloc, pvdInfo, MemoryRequirement::HostVisible);
4261     auto &pvdAlloc = pvdData.getAllocation();
4262     void *pvdPtr   = pvdAlloc.getHostPtr();
4263 
4264     const auto ppdSize = static_cast<VkDeviceSize>(sizeof(perPrimitiveData));
4265     const auto ppdInfo = makeBufferCreateInfo(ppdSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4266     BufferWithMemory ppdData(vkd, device, alloc, ppdInfo, MemoryRequirement::HostVisible);
4267     auto &ppdAlloc = ppdData.getAllocation();
4268     void *ppdPtr   = ppdAlloc.getHostPtr();
4269 
4270     deMemcpy(pvdPtr, &perVertexData, sizeof(perVertexData));
4271     deMemcpy(ppdPtr, &perPrimitiveData, sizeof(perPrimitiveData));
4272 
4273     flushAlloc(vkd, device, pvdAlloc);
4274     flushAlloc(vkd, device, ppdAlloc);
4275 
4276     // Descriptor set layout.
4277     DescriptorSetLayoutBuilder setLayoutBuilder;
4278     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bufStages);
4279     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bufStages);
4280     const auto setLayout = setLayoutBuilder.build(vkd, device);
4281 
4282     // Create and update descriptor set.
4283     DescriptorPoolBuilder descriptorPoolBuilder;
4284     descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u);
4285     const auto descriptorPool =
4286         descriptorPoolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
4287     const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
4288 
4289     DescriptorSetUpdateBuilder updateBuilder;
4290     const auto pvdBufferInfo = makeDescriptorBufferInfo(pvdData.get(), 0ull, pvdSize);
4291     const auto ppdBufferInfo = makeDescriptorBufferInfo(ppdData.get(), 0ull, ppdSize);
4292     updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
4293                               VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &pvdBufferInfo);
4294     updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
4295                               VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &ppdBufferInfo);
4296     updateBuilder.update(vkd, device);
4297 
4298     // Pipeline layout.
4299     const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
4300 
4301     // Shader modules.
4302     const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
4303     const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
4304 
4305     Move<VkShaderModule> taskShader;
4306     if (hasTask)
4307         taskShader = createShaderModule(vkd, device, binaries.get("task"));
4308 
4309     // Render pass.
4310     const auto renderPass = makeRenderPass(vkd, device, imageFormat);
4311 
4312     // Framebuffer.
4313     const auto framebuffer =
4314         makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
4315 
4316     // Viewport and scissor.
4317     const auto topHalf = makeViewport(imageExtent.width, imageExtent.height / 2u);
4318     const std::vector<VkViewport> viewports{makeViewport(imageExtent), topHalf};
4319     const std::vector<VkRect2D> scissors(2u, makeRect2D(imageExtent));
4320 
4321     const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(),
4322                                                fragShader.get(), renderPass.get(), viewports, scissors);
4323 
4324     // Command pool and buffer.
4325     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
4326     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4327     const auto cmdBuffer    = cmdBufferPtr.get();
4328 
4329     beginCommandBuffer(vkd, cmdBuffer);
4330 
4331     // Run pipeline.
4332     const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 0.0f);
4333     const auto drawCount = m_params->drawCount();
4334     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
4335     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
4336     vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u,
4337                               &descriptorSet.get(), 0u, nullptr);
4338     vkd.cmdDrawMeshTasksNV(cmdBuffer, drawCount, 0u);
4339     endRenderPass(vkd, cmdBuffer);
4340 
4341     // Copy color buffer to verification buffer.
4342     const auto colorAccess   = (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
4343     const auto transferRead  = VK_ACCESS_TRANSFER_READ_BIT;
4344     const auto transferWrite = VK_ACCESS_TRANSFER_WRITE_BIT;
4345     const auto hostRead      = VK_ACCESS_HOST_READ_BIT;
4346 
4347     const auto preCopyBarrier =
4348         makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4349                                VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
4350     const auto postCopyBarrier = makeMemoryBarrier(transferWrite, hostRead);
4351     const auto copyRegion      = makeBufferImageCopy(imageExtent, colorSRL);
4352 
4353     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
4354                            0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
4355     vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
4356                              verificationBuffer.get(), 1u, &copyRegion);
4357     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
4358                            &postCopyBarrier, 0u, nullptr, 0u, nullptr);
4359 
4360     endCommandBuffer(vkd, cmdBuffer);
4361     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
4362 
4363     // Generate reference image and compare results.
4364     const tcu::IVec3 iExtent(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
4365     const tcu::ConstPixelBufferAccess verificationAccess(tcuFormat, iExtent, verificationBufferData);
4366 
4367     generateReferenceLevel();
4368     invalidateAlloc(vkd, device, verificationBufferAlloc);
4369     if (!verifyResult(verificationAccess))
4370         TCU_FAIL("Result does not match reference; check log for details");
4371 
4372     return tcu::TestStatus::pass("Pass");
4373 }
4374 
checkMeshSupport(Context & context)4375 void checkMeshSupport(Context &context)
4376 {
4377     checkTaskMeshShaderSupportNV(context, false, true);
4378 }
4379 
initMixedPipelinesPrograms(vk::SourceCollections & programCollection)4380 void initMixedPipelinesPrograms(vk::SourceCollections &programCollection)
4381 {
4382     std::ostringstream frag;
4383     frag << "#version 450\n"
4384          << "\n"
4385          << "layout (location=0) in  vec4 inColor;\n"
4386          << "layout (location=0) out vec4 outColor;\n"
4387          << "\n"
4388          << "void main ()\n"
4389          << "{\n"
4390          << "    outColor = inColor;\n"
4391          << "}\n";
4392     programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
4393 
4394     const std::string pushConstantDecl = "layout (push_constant, std430) uniform PushConstantBlock {\n"
4395                                          "    vec4 color;\n"
4396                                          "    uint firstVertex;\n"
4397                                          "} pc;\n";
4398 
4399     // The normal pipeline will have a binding with the vertex position and will take the vertex color from the push constants.
4400     std::ostringstream vert;
4401     vert << "#version 450\n"
4402          << "\n"
4403          << pushConstantDecl << "layout (location=0) out vec4 outColor;\n"
4404          << "layout (location=0) in  vec4 inPos;\n"
4405          << "\n"
4406          << "void main ()\n"
4407          << "{\n"
4408          << "    gl_Position = inPos;\n"
4409          << "    outColor    = pc.color;\n"
4410          << "}\n";
4411     programCollection.glslSources.add("vert") << glu::VertexSource(vert.str());
4412 
4413     // The mesh pipeline will emit a quad based on the first vertex as indicated by the push constants, using the push constant color as well.
4414     std::ostringstream mesh;
4415     mesh << "#version 450\n"
4416          << "#extension GL_NV_mesh_shader : enable\n"
4417          << "\n"
4418          << pushConstantDecl << "\n"
4419          << "layout (local_size_x=2) in;\n"
4420          << "layout (triangles) out;\n"
4421          << "layout (max_vertices=4, max_primitives=2) out;\n"
4422          << "\n"
4423          << "layout (location=0) out vec4 outColor[];\n"
4424          << "\n"
4425          << "layout (set=0, binding=0) readonly buffer VertexBlock {\n"
4426          << "    vec4 positions[];\n"
4427          << "} vertexData;\n"
4428          << "\n"
4429          << "void main ()\n"
4430          << "{\n"
4431          << "    // Emit 4 vertices starting at firstVertex, 2 per invocation.\n"
4432          << "    gl_PrimitiveCountNV = 2u;\n"
4433          << "    \n"
4434          << "    const uint localVertexOffset = 2u * gl_LocalInvocationIndex;\n"
4435          << "    const uint firstLocalVertex  = pc.firstVertex + localVertexOffset;\n"
4436          << "    const uint localIndexOffset  = 3u * gl_LocalInvocationIndex;\n"
4437          << "\n"
4438          << "    for (uint i = 0; i < 2; ++i)\n"
4439          << "    {\n"
4440          << "        gl_MeshVerticesNV[localVertexOffset + i].gl_Position = vertexData.positions[firstLocalVertex + "
4441             "i];\n"
4442          << "        outColor[localVertexOffset + i] = pc.color;\n"
4443          << "    }\n"
4444          << "\n"
4445          << "    // Emit 2 primitives, 1 per invocation.\n"
4446          << "    const uint indices[] = uint[](0, 1, 2, 2, 1, 3);\n"
4447          << "\n"
4448          << "    for (uint i = 0; i < 3; ++i)\n"
4449          << "    {\n"
4450          << "        const uint pos = localIndexOffset + i;\n"
4451          << "        gl_PrimitiveIndicesNV[pos] = indices[pos];\n"
4452          << "    }\n"
4453          << "}\n";
4454     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
4455 }
4456 
testMixedPipelines(Context & context)4457 tcu::TestStatus testMixedPipelines(Context &context)
4458 {
4459     const auto &vkd   = context.getDeviceInterface();
4460     const auto device = context.getDevice();
4461     auto &alloc       = context.getDefaultAllocator();
4462     const auto queue  = context.getUniversalQueue();
4463     const auto qIndex = context.getUniversalQueueFamilyIndex();
4464 
4465     const auto colorFormat = getOutputFormat();
4466     const auto colorExtent = makeExtent3D(32u, 32u, 1u);
4467     const auto colorUsage  = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
4468     const auto tcuFormat   = mapVkFormat(colorFormat);
4469     const tcu::IVec3 iExtent(static_cast<int>(colorExtent.width), static_cast<int>(colorExtent.height),
4470                              static_cast<int>(colorExtent.depth));
4471     const tcu::Vec4 clearValue(0.0f, 0.0f, 0.0f, 1.0f);
4472 
4473     // Divide the image in 4 quadrants and emit a "full-screen" quad (2 triangles) in each quadrant, using a mesh or normal pipeline.
4474     // Replicate a standard quad 4 times with different offsets in X and Y for each quadrant.
4475 
4476     // Triangle vertices for a single full-screen quad.
4477     const std::vector<tcu::Vec4> stdQuad{
4478         tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f),
4479         tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f),
4480         tcu::Vec4(0.0f, 1.0f, 0.0f, 1.0f),
4481         tcu::Vec4(1.0f, 1.0f, 0.0f, 1.0f),
4482     };
4483 
4484     // Offsets for each quadrant.
4485     const std::vector<tcu::Vec4> quadrantOffsets{
4486         tcu::Vec4(-1.0f, -1.0f, 0.0f, 0.0f), // Top left.
4487         tcu::Vec4(0.0f, -1.0f, 0.0f, 0.0f),  // Top right.
4488         tcu::Vec4(-1.0f, 0.0f, 0.0f, 0.0f),  // Bottom left.
4489         tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f),   // Bottom right.
4490     };
4491 
4492     // Colors for each quadrant.
4493     const std::vector<tcu::Vec4> quadrantColors{
4494         tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f),
4495         tcu::Vec4(1.0f, 1.0f, 0.0f, 1.0f),
4496         tcu::Vec4(1.0f, 0.0f, 1.0f, 1.0f),
4497         tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f),
4498     };
4499 
4500     DE_ASSERT(quadrantOffsets.size() == quadrantColors.size());
4501 
4502     // Fill the vertex buffer.
4503     const auto numVertices = stdQuad.size() * quadrantOffsets.size();
4504     std::vector<tcu::Vec4> vertexBufferSrc;
4505 
4506     vertexBufferSrc.reserve(numVertices);
4507     for (size_t quadrantIdx = 0; quadrantIdx < quadrantOffsets.size(); ++quadrantIdx)
4508     {
4509         const auto &quadrantOffset = quadrantOffsets[quadrantIdx];
4510 
4511         for (size_t vertexIdx = 0; vertexIdx < stdQuad.size(); ++vertexIdx)
4512         {
4513             const tcu::Vec4 pos = stdQuad[vertexIdx] + quadrantOffset;
4514             vertexBufferSrc.push_back(pos);
4515         }
4516     }
4517 
4518     const auto vertexBufferSize  = de::dataSize(vertexBufferSrc);
4519     const auto vertexBufferUsage = (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
4520     const auto vertexBufferInfo  = makeBufferCreateInfo(vertexBufferSize, vertexBufferUsage);
4521     BufferWithMemory vertexBuffer(vkd, device, alloc, vertexBufferInfo, MemoryRequirement::HostVisible);
4522     auto &vertexBufferAlloc     = vertexBuffer.getAllocation();
4523     tcu::Vec4 *vertexBufferData = reinterpret_cast<tcu::Vec4 *>(vertexBufferAlloc.getHostPtr());
4524 
4525     deMemcpy(vertexBufferData, vertexBufferSrc.data(), vertexBufferSize);
4526     flushAlloc(vkd, device, vertexBufferAlloc);
4527 
4528     // Index buffer, only used for the classic pipeline.
4529     const std::vector<uint32_t> vertexIndices{0u, 1u, 2u, 2u, 1u, 3u};
4530 
4531     const auto indexBufferSize  = de::dataSize(vertexIndices);
4532     const auto indexBufferUsage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
4533     const auto indexBufferInfo  = makeBufferCreateInfo(indexBufferSize, indexBufferUsage);
4534 
4535     BufferWithMemory indexBuffer(vkd, device, alloc, indexBufferInfo, MemoryRequirement::HostVisible);
4536     auto &indexBufferAlloc = indexBuffer.getAllocation();
4537     void *indexBufferData  = indexBufferAlloc.getHostPtr();
4538 
4539     deMemcpy(indexBufferData, vertexIndices.data(), indexBufferSize);
4540     flushAlloc(vkd, device, indexBufferAlloc);
4541 
4542     // Color attachment.
4543     const VkImageCreateInfo colorAttachmentInfo = {
4544         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
4545         nullptr,                             // const void* pNext;
4546         0u,                                  // VkImageCreateFlags flags;
4547         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
4548         colorFormat,                         // VkFormat format;
4549         colorExtent,                         // VkExtent3D extent;
4550         1u,                                  // uint32_t mipLevels;
4551         1u,                                  // uint32_t arrayLayers;
4552         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
4553         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
4554         colorUsage,                          // VkImageUsageFlags usage;
4555         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
4556         0u,                                  // uint32_t queueFamilyIndexCount;
4557         nullptr,                             // const uint32_t* pQueueFamilyIndices;
4558         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
4559     };
4560     ImageWithMemory colorAttachment(vkd, device, alloc, colorAttachmentInfo, MemoryRequirement::Any);
4561     const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
4562     const auto colorView =
4563         makeImageView(vkd, device, colorAttachment.get(), VK_IMAGE_VIEW_TYPE_2D, colorFormat, colorSRR);
4564 
4565     // Verification buffer.
4566     const auto verificationBufferSize   = tcu::getPixelSize(tcuFormat) * iExtent.x() * iExtent.y() * iExtent.z();
4567     const auto verificationBufferSizeSz = static_cast<VkDeviceSize>(verificationBufferSize);
4568     const auto verificationBufferInfo =
4569         makeBufferCreateInfo(verificationBufferSizeSz, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4570     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
4571     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
4572     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
4573 
4574     // Render pass and framebuffer.
4575     const auto renderPass = makeRenderPass(vkd, device, colorFormat);
4576     const auto framebuffer =
4577         makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), colorExtent.width, colorExtent.height);
4578 
4579     // Push constant range.
4580     struct PushConstantBlock
4581     {
4582         tcu::Vec4 color;
4583         uint32_t firstVertex;
4584     };
4585 
4586     const auto pcSize   = static_cast<uint32_t>(sizeof(PushConstantBlock));
4587     const auto pcStages = (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_MESH_BIT_EXT);
4588     const auto pcRange  = makePushConstantRange(pcStages, 0u, pcSize);
4589 
4590     // No descriptor set layout for the classic pipeline.
4591     // Descriptor set layout for the mesh pipeline using the vertex buffer.
4592     DescriptorSetLayoutBuilder dsLayoutBuilder;
4593     dsLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_MESH_BIT_EXT);
4594     const auto meshDSLayout = dsLayoutBuilder.build(vkd, device);
4595 
4596     // Pipeline layout for the classic pipeline.
4597     const auto classicPipelineLayout = makePipelineLayout(vkd, device, 0u, nullptr, 1u, &pcRange);
4598 
4599     // Pipeline layout for the mesh pipeline.
4600     const auto meshPipelineLayout = makePipelineLayout(vkd, device, 1u, &meshDSLayout.get(), 1u, &pcRange);
4601 
4602     // Descriptor pool and set with the vertex buffer.
4603     DescriptorPoolBuilder poolBuilder;
4604     poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
4605     const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
4606     const auto meshDescriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), meshDSLayout.get());
4607 
4608     DescriptorSetUpdateBuilder updateBuilder;
4609     const auto vertexBufferDescInfo = makeDescriptorBufferInfo(vertexBuffer.get(), 0ull, vertexBufferSize);
4610     updateBuilder.writeSingle(meshDescriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
4611                               VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &vertexBufferDescInfo);
4612     updateBuilder.update(vkd, device);
4613 
4614     // Shaders and pipelines.
4615     const auto &binaries  = context.getBinaryCollection();
4616     const auto vertModule = createShaderModule(vkd, device, binaries.get("vert"));
4617     const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
4618     const auto fragModule = createShaderModule(vkd, device, binaries.get("frag"));
4619 
4620     const std::vector<VkViewport> viewports(1u, makeViewport(colorExtent));
4621     const std::vector<VkRect2D> scissors(1u, makeRect2D(colorExtent));
4622 
4623     const auto classicPipeline =
4624         makeGraphicsPipeline(vkd, device, classicPipelineLayout.get(), vertModule.get(), DE_NULL, DE_NULL, DE_NULL,
4625                              fragModule.get(), renderPass.get(), viewports, scissors);
4626 
4627     const auto meshPipeline = makeGraphicsPipeline(vkd, device, meshPipelineLayout.get(), DE_NULL, meshModule.get(),
4628                                                    fragModule.get(), renderPass.get(), viewports, scissors);
4629 
4630     // Command pool and buffer.
4631     const auto cmdPool      = makeCommandPool(vkd, device, qIndex);
4632     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4633     const auto cmdBuffer    = cmdBufferPtr.get();
4634 
4635     beginCommandBuffer(vkd, cmdBuffer);
4636     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), clearValue);
4637 
4638     // Draw a triangle quad in each of the 4 image quadrants.
4639     PushConstantBlock pcData;
4640 
4641     for (size_t quadrantIdx = 0; quadrantIdx < quadrantColors.size(); ++quadrantIdx)
4642     {
4643         pcData.color              = quadrantColors[quadrantIdx];
4644         pcData.firstVertex        = static_cast<uint32_t>(quadrantIdx * stdQuad.size());
4645         const auto vOffset        = static_cast<VkDeviceSize>(pcData.firstVertex * sizeof(tcu::Vec4));
4646         const bool isMeshQuadrant = (quadrantIdx % 2u == 0u);
4647 
4648         if (isMeshQuadrant)
4649         {
4650             vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, meshPipeline.get());
4651             vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, meshPipelineLayout.get(), 0u, 1u,
4652                                       &meshDescriptorSet.get(), 0u, nullptr);
4653             vkd.cmdPushConstants(cmdBuffer, meshPipelineLayout.get(), pcStages, 0u, pcSize, &pcData);
4654             vkd.cmdDrawMeshTasksNV(cmdBuffer, 1u, 0u);
4655         }
4656         else
4657         {
4658             vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, classicPipeline.get());
4659             vkd.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &vertexBuffer.get(), &vOffset);
4660             vkd.cmdBindIndexBuffer(cmdBuffer, indexBuffer.get(), 0ull, VK_INDEX_TYPE_UINT32);
4661             vkd.cmdPushConstants(cmdBuffer, classicPipelineLayout.get(), pcStages, 0u, pcSize, &pcData);
4662             vkd.cmdDrawIndexed(cmdBuffer, static_cast<uint32_t>(vertexIndices.size()), 1u, 0u, 0, 0u);
4663         }
4664     }
4665 
4666     endRenderPass(vkd, cmdBuffer);
4667 
4668     copyImageToBuffer(vkd, cmdBuffer, colorAttachment.get(), verificationBuffer.get(),
4669                       tcu::IVec2(iExtent.x(), iExtent.y()));
4670     endCommandBuffer(vkd, cmdBuffer);
4671     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
4672 
4673     invalidateAlloc(vkd, device, verificationBufferAlloc);
4674 
4675     // Prepare a reference image with the quadrant colors.
4676     tcu::TextureLevel refLevel(tcuFormat, iExtent.x(), iExtent.y(), iExtent.z());
4677     auto refAccess = refLevel.getAccess();
4678     const tcu::Vec4 halfSize(static_cast<float>(iExtent.x()) / 2.0f, static_cast<float>(iExtent.y()) / 2.0f, 0, 0);
4679     const tcu::Vec4 fbOffset(-1.0f, -1.0f, 0.0f, 0.0f);
4680 
4681     for (size_t quadrantIdx = 0; quadrantIdx < quadrantOffsets.size(); ++quadrantIdx)
4682     {
4683         const auto &offset   = quadrantOffsets[quadrantIdx];
4684         const auto absOffset = (offset - fbOffset) * halfSize;
4685         const auto subregion =
4686             tcu::getSubregion(refAccess, static_cast<int>(absOffset.x()), static_cast<int>(absOffset.y()),
4687                               static_cast<int>(halfSize.x()), static_cast<int>(halfSize.y()));
4688 
4689         tcu::clear(subregion, quadrantColors.at(quadrantIdx));
4690     }
4691 
4692     auto &log = context.getTestContext().getLog();
4693     const tcu::ConstPixelBufferAccess resAccess(tcuFormat, iExtent, verificationBufferData);
4694     const tcu::Vec4 threshold(0.0f, 0.0f, 0.0f,
4695                               0.0f); // The chosen colors should need no threshold. They can be represented exactly.
4696 
4697     if (!tcu::floatThresholdCompare(log, "TestResult", "", refAccess, resAccess, threshold, tcu::COMPARE_LOG_ON_ERROR))
4698         TCU_FAIL("Check log for details");
4699 
4700     return tcu::TestStatus::pass("Pass");
4701 }
4702 
4703 // Test reading the gl_TaskCountNV and gl_PrimitiveCountNV built-ins from several invocations.
4704 class CountReadCase : public MeshShaderMiscCase
4705 {
4706 public:
CountReadCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)4707     CountReadCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
4708         : MeshShaderMiscCase(testCtx, name, std::move(params))
4709     {
4710     }
4711 
4712     void initPrograms(vk::SourceCollections &programCollection) const override;
4713     TestInstance *createInstance(Context &context) const override;
4714 
4715     static constexpr uint32_t kLocalSize = 32u;
4716 };
4717 
4718 class CountReadInstance : public MeshShaderMiscInstance
4719 {
4720 public:
CountReadInstance(Context & context,const MiscTestParams * params)4721     CountReadInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
4722     {
4723     }
4724 
4725     void generateReferenceLevel() override;
4726 };
4727 
createInstance(Context & context) const4728 TestInstance *CountReadCase::createInstance(Context &context) const
4729 {
4730     return new CountReadInstance(context, m_params.get());
4731 }
4732 
generateReferenceLevel()4733 void CountReadInstance::generateReferenceLevel()
4734 {
4735     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
4736 }
4737 
initPrograms(vk::SourceCollections & programCollection) const4738 void CountReadCase::initPrograms(vk::SourceCollections &programCollection) const
4739 {
4740     DE_ASSERT(m_params->needsTaskShader());
4741     DE_ASSERT(m_params->height == m_params->meshCount);
4742     DE_ASSERT(m_params->width == kLocalSize);
4743 
4744     std::ostringstream taskDataDeclStream;
4745     taskDataDeclStream << "taskNV TaskData {\n"
4746                        << "    vec4 color[" << kLocalSize << "];\n"
4747                        << "} td;\n";
4748     const auto taskDataDecl = taskDataDeclStream.str();
4749 
4750     std::ostringstream task;
4751     task << "#version 450\n"
4752          << "#extension GL_NV_mesh_shader : enable\n"
4753          << "\n"
4754          << "layout(local_size_x=" << kLocalSize << ") in;\n"
4755          << "\n"
4756          << "out " << taskDataDecl << "void main ()\n"
4757          << "{\n"
4758          << "    gl_TaskCountNV = 0u;\n"
4759          << "    if (gl_LocalInvocationIndex == 0u) {\n"
4760          << "        gl_TaskCountNV = " << m_params->meshCount << ";\n"
4761          << "    }\n"
4762          << "    memoryBarrierShared();\n"
4763          << "    barrier();\n"
4764          << "    td.color[gl_LocalInvocationIndex] = ((gl_TaskCountNV == " << m_params->meshCount
4765          << ") ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(0.0, 0.0, 0.0, 1.0));\n"
4766          << "}\n";
4767     programCollection.glslSources.add("task") << glu::TaskSource(task.str());
4768 
4769     std::ostringstream mesh;
4770     mesh << "#version 450\n"
4771          << "#extension GL_NV_mesh_shader : enable\n"
4772          << "\n"
4773          << "in " << taskDataDecl << "\n"
4774          << "layout (local_size_x=" << kLocalSize << ") in;\n"
4775          << "layout (points) out;\n"
4776          << "layout (max_vertices=" << kLocalSize << ", max_primitives=" << kLocalSize << ") out;\n"
4777          << "\n"
4778          << "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
4779          << "\n"
4780          << "void main ()\n"
4781          << "{\n"
4782          << "    gl_PrimitiveCountNV = 0u;\n"
4783          << "    if (gl_LocalInvocationIndex == 0u) {\n"
4784          << "        gl_PrimitiveCountNV = " << kLocalSize << ";\n"
4785          << "    }\n"
4786          << "    memoryBarrierShared();\n"
4787          << "    barrier();\n"
4788          << "\n"
4789          << "    const vec4  color  = ((gl_PrimitiveCountNV == " << kLocalSize
4790          << ") ? td.color[gl_LocalInvocationIndex] : vec4(0.0, 0.0, 0.0, 1.0));\n"
4791          << "    const float xCoord = (((float(gl_LocalInvocationIndex) + 0.5) / " << m_params->width
4792          << ") * 2.0 - 1.0);\n"
4793          << "    const float yCoord = (((float(gl_WorkGroupID.x) + 0.5) / " << m_params->height << ") * 2.0 - 1.0);\n"
4794          << "\n"
4795          << "    gl_MeshVerticesNV[gl_LocalInvocationIndex].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
4796          << "    gl_PrimitiveIndicesNV[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;\n"
4797          << "    pointColor[gl_LocalInvocationIndex] = color;\n"
4798          << "}\n";
4799     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
4800 
4801     // Default fragment shader.
4802     MeshShaderMiscCase::initPrograms(programCollection);
4803 }
4804 
4805 } // anonymous namespace
4806 
createMeshShaderMiscTests(tcu::TestContext & testCtx)4807 tcu::TestCaseGroup *createMeshShaderMiscTests(tcu::TestContext &testCtx)
4808 {
4809     GroupPtr miscTests(new tcu::TestCaseGroup(testCtx, "misc"));
4810 
4811     {
4812         ParamsPtr paramsPtr(new MiscTestParams(
4813             /*taskCount*/ tcu::just(2u),
4814             /*meshCount*/ 2u,
4815             /*width*/ 8u,
4816             /*height*/ 8u));
4817 
4818         // Pass a complex structure from the task to the mesh shader
4819         miscTests->addChild(new ComplexTaskDataCase(testCtx, "complex_task_data", std::move(paramsPtr)));
4820     }
4821 
4822     {
4823         ParamsPtr paramsPtr(new MiscTestParams(
4824             /*taskCount*/ tcu::Nothing,
4825             /*meshCount*/ 1u,
4826             /*width*/ 5u,    // Use an odd value so there's a pixel in the exact center.
4827             /*height*/ 7u)); // Idem.
4828 
4829         // Draw a single point
4830         miscTests->addChild(new SinglePointCase(testCtx, "single_point", std::move(paramsPtr)));
4831     }
4832 
4833     {
4834         ParamsPtr paramsPtr(new MiscTestParams(
4835             /*taskCount*/ tcu::Nothing,
4836             /*meshCount*/ 1u,
4837             /*width*/ 8u,
4838             /*height*/ 5u)); // Use an odd value so there's a center line.
4839 
4840         // Draw a single line
4841         miscTests->addChild(new SingleLineCase(testCtx, "single_line", std::move(paramsPtr)));
4842     }
4843 
4844     {
4845         ParamsPtr paramsPtr(new MiscTestParams(
4846             /*taskCount*/ tcu::Nothing,
4847             /*meshCount*/ 1u,
4848             /*width*/ 5u,    // Use an odd value so there's a pixel in the exact center.
4849             /*height*/ 7u)); // Idem.
4850 
4851         // Draw a single triangle
4852         miscTests->addChild(new SingleTriangleCase(testCtx, "single_triangle", std::move(paramsPtr)));
4853     }
4854 
4855     {
4856         ParamsPtr paramsPtr(new MiscTestParams(
4857             /*taskCount*/ tcu::Nothing,
4858             /*meshCount*/ 1u,
4859             /*width*/ 16u,
4860             /*height*/ 16u));
4861 
4862         // Draw the maximum number of points
4863         miscTests->addChild(new MaxPointsCase(testCtx, "max_points", std::move(paramsPtr)));
4864     }
4865 
4866     {
4867         ParamsPtr paramsPtr(new MiscTestParams(
4868             /*taskCount*/ tcu::Nothing,
4869             /*meshCount*/ 1u,
4870             /*width*/ 1u,
4871             /*height*/ 1020u));
4872 
4873         // Draw the maximum number of lines
4874         miscTests->addChild(new MaxLinesCase(testCtx, "max_lines", std::move(paramsPtr)));
4875     }
4876 
4877     {
4878         ParamsPtr paramsPtr(new MiscTestParams(
4879             /*taskCount*/ tcu::Nothing,
4880             /*meshCount*/ 1u,
4881             /*width*/ 512u,
4882             /*height*/ 512u));
4883 
4884         // Draw the maximum number of triangles
4885         miscTests->addChild(new MaxTrianglesCase(testCtx, "max_triangles", std::move(paramsPtr)));
4886     }
4887 
4888     {
4889         ParamsPtr paramsPtr(new MiscTestParams(
4890             /*taskCount*/ tcu::just(65535u),
4891             /*meshCount*/ 1u,
4892             /*width*/ 1360u,
4893             /*height*/ 1542u));
4894 
4895         // Generate a large number of task work groups
4896         miscTests->addChild(new LargeWorkGroupCase(testCtx, "many_task_work_groups", std::move(paramsPtr)));
4897     }
4898 
4899     {
4900         ParamsPtr paramsPtr(new MiscTestParams(
4901             /*taskCount*/ tcu::Nothing,
4902             /*meshCount*/ 65535u,
4903             /*width*/ 1360u,
4904             /*height*/ 1542u));
4905 
4906         // Generate a large number of mesh work groups
4907         miscTests->addChild(new LargeWorkGroupCase(testCtx, "many_mesh_work_groups", std::move(paramsPtr)));
4908     }
4909 
4910     {
4911         ParamsPtr paramsPtr(new MiscTestParams(
4912             /*taskCount*/ tcu::just(512u),
4913             /*meshCount*/ 512u,
4914             /*width*/ 4096u,
4915             /*height*/ 2048u));
4916 
4917         // Generate a large number of task and mesh work groups
4918         miscTests->addChild(new LargeWorkGroupCase(testCtx, "many_task_mesh_work_groups", std::move(paramsPtr)));
4919     }
4920 
4921     {
4922         const PrimitiveType types[] = {
4923             PrimitiveType::POINTS,
4924             PrimitiveType::LINES,
4925             PrimitiveType::TRIANGLES,
4926         };
4927 
4928         for (int i = 0; i < 2; ++i)
4929         {
4930             const bool extraWrites = (i > 0);
4931 
4932             for (const auto primType : types)
4933             {
4934                 std::unique_ptr<NoPrimitivesParams> params(new NoPrimitivesParams(
4935                     /*taskCount*/ (extraWrites ? tcu::just(1u) : tcu::Nothing),
4936                     /*meshCount*/ 1u,
4937                     /*width*/ 16u,
4938                     /*height*/ 16u,
4939                     /*primitiveType*/ primType));
4940 
4941                 ParamsPtr paramsPtr(params.release());
4942                 const auto primName    = primitiveTypeName(primType);
4943                 const std::string name = "no_" + primName + (extraWrites ? "_extra_writes" : "");
4944 
4945                 miscTests->addChild(extraWrites ?
4946                                         (new NoPrimitivesExtraWritesCase(testCtx, name, std::move(paramsPtr))) :
4947                                         (new NoPrimitivesCase(testCtx, name, std::move(paramsPtr))));
4948             }
4949         }
4950     }
4951 
4952     {
4953         for (int i = 0; i < 2; ++i)
4954         {
4955             const bool useTaskShader = (i == 0);
4956 
4957             ParamsPtr paramsPtr(new MiscTestParams(
4958                 /*taskCount*/ (useTaskShader ? tcu::just(1u) : tcu::Nothing),
4959                 /*meshCount*/ 1u,
4960                 /*width*/ 1u,
4961                 /*height*/ 1u));
4962 
4963             const std::string shader = (useTaskShader ? "task" : "mesh");
4964             const std::string name   = "barrier_in_" + shader;
4965 
4966             miscTests->addChild(new SimpleBarrierCase(testCtx, name, std::move(paramsPtr)));
4967         }
4968     }
4969 
4970     {
4971         const struct
4972         {
4973             MemoryBarrierType memBarrierType;
4974             std::string caseName;
4975         } barrierTypes[] = {
4976             {MemoryBarrierType::SHARED, "memory_barrier_shared"},
4977             {MemoryBarrierType::GROUP, "group_memory_barrier"},
4978         };
4979 
4980         for (const auto &barrierCase : barrierTypes)
4981         {
4982             for (int i = 0; i < 2; ++i)
4983             {
4984                 const bool useTaskShader = (i == 0);
4985 
4986                 std::unique_ptr<MemoryBarrierParams> paramsPtr(new MemoryBarrierParams(
4987                     /*taskCount*/ (useTaskShader ? tcu::just(1u) : tcu::Nothing),
4988                     /*meshCount*/ 1u,
4989                     /*width*/ 1u,
4990                     /*height*/ 1u,
4991                     /*memBarrierType*/ barrierCase.memBarrierType));
4992 
4993                 const std::string shader = (useTaskShader ? "task" : "mesh");
4994                 const std::string name   = barrierCase.caseName + "_in_" + shader;
4995 
4996                 miscTests->addChild(new MemoryBarrierCase(testCtx, name, std::move(paramsPtr)));
4997             }
4998         }
4999     }
5000 
5001     {
5002         for (int i = 0; i < 2; ++i)
5003         {
5004             const bool useTaskShader = (i > 0);
5005             const auto name          = std::string("custom_attributes") + (useTaskShader ? "_and_task_shader" : "");
5006 
5007             ParamsPtr paramsPtr(new MiscTestParams(
5008                 /*taskCount*/ (useTaskShader ? tcu::just(1u) : tcu::Nothing),
5009                 /*meshCount*/ 1u,
5010                 /*width*/ 32u,
5011                 /*height*/ 32u));
5012 
5013             miscTests->addChild(new CustomAttributesCase(testCtx, name, std::move(paramsPtr)));
5014         }
5015     }
5016 
5017     {
5018         for (int i = 0; i < 2; ++i)
5019         {
5020             const bool useTaskShader = (i > 0);
5021             const auto name          = std::string("push_constant") + (useTaskShader ? "_and_task_shader" : "");
5022 
5023             ParamsPtr paramsPtr(new MiscTestParams(
5024                 /*taskCount*/ (useTaskShader ? tcu::just(1u) : tcu::Nothing),
5025                 /*meshCount*/ 1u,
5026                 /*width*/ 16u,
5027                 /*height*/ 16u));
5028 
5029             miscTests->addChild(new PushConstantCase(testCtx, name, std::move(paramsPtr)));
5030         }
5031     }
5032 
5033     {
5034         ParamsPtr paramsPtr(new MaximizeThreadsParams(
5035             /*taskCount*/ tcu::Nothing,
5036             /*meshCount*/ 1u,
5037             /*width*/ 128u,
5038             /*height*/ 1u,
5039             /*localSize*/ 32u,
5040             /*numVertices*/ 128u,
5041             /*numPrimitives*/ 256u));
5042 
5043         // Use a large number of primitives compared to other sizes
5044         miscTests->addChild(new MaximizePrimitivesCase(testCtx, "maximize_primitives", std::move(paramsPtr)));
5045     }
5046 
5047     {
5048         ParamsPtr paramsPtr(new MaximizeThreadsParams(
5049             /*taskCount*/ tcu::Nothing,
5050             /*meshCount*/ 1u,
5051             /*width*/ 64u,
5052             /*height*/ 1u,
5053             /*localSize*/ 32u,
5054             /*numVertices*/ 256u,
5055             /*numPrimitives*/ 128u));
5056 
5057         // Use a large number of vertices compared to other sizes
5058         miscTests->addChild(new MaximizeVerticesCase(testCtx, "maximize_vertices", std::move(paramsPtr)));
5059     }
5060 
5061     {
5062         const uint32_t kInvocationCases[] = {32u, 64u, 128u, 256u};
5063 
5064         for (const auto &invocationCase : kInvocationCases)
5065         {
5066             const auto invsStr   = std::to_string(invocationCase);
5067             const auto numPixels = invocationCase / 2u;
5068 
5069             ParamsPtr paramsPtr(new MaximizeThreadsParams(
5070                 /*taskCount*/ tcu::Nothing,
5071                 /*meshCount*/ 1u,
5072                 /*width*/ numPixels,
5073                 /*height*/ 1u,
5074                 /*localSize*/ invocationCase,
5075                 /*numVertices*/ numPixels,
5076                 /*numPrimitives*/ numPixels));
5077 
5078             // Use a large number of invocations compared to other sizes
5079             miscTests->addChild(
5080                 new MaximizeInvocationsCase(testCtx, "maximize_invocations_" + invsStr, std::move(paramsPtr)));
5081         }
5082     }
5083 
5084     if (false) // This test does not work and the spec is not clear that it should.
5085     {
5086         ParamsPtr paramsPtr(new MiscTestParams(
5087             /*taskCount*/ tcu::just(1u),
5088             /*meshCount*/ 128u,
5089             /*width*/ 32u,
5090             /*height*/ 128u));
5091 
5092         // Attempt to read gl_TaskCountNV and gl_PrimitiveCountNV from multiple invocations
5093         miscTests->addChild(new CountReadCase(testCtx, "count_reads", std::move(paramsPtr)));
5094     }
5095 
5096     // Mix classic and mesh shader pipelines in the same render pass
5097     addFunctionCaseWithPrograms(miscTests.get(), "mixed_pipelines", checkMeshSupport, initMixedPipelinesPrograms,
5098                                 testMixedPipelines);
5099 
5100     return miscTests.release();
5101 }
5102 
createMeshShaderInOutTests(tcu::TestContext & testCtx)5103 tcu::TestCaseGroup *createMeshShaderInOutTests(tcu::TestContext &testCtx)
5104 {
5105     // Mesh Shader Tests checking Input/Output interfaces
5106     GroupPtr inOutTests(new tcu::TestCaseGroup(testCtx, "in_out"));
5107 
5108     const struct
5109     {
5110         bool i64;
5111         bool f64;
5112         bool i16;
5113         bool f16;
5114         const char *name;
5115     } requiredFeatures[] = {
5116         // Restrict the number of combinations to avoid creating too many tests.
5117         //    i64        f64        i16        f16        name
5118         {false, false, false, false, "32_bits_only"}, {true, false, false, false, "with_i64"},
5119         {false, true, false, false, "with_f64"},      {true, true, false, false, "all_but_16_bits"},
5120         {false, false, true, false, "with_i16"},      {false, false, false, true, "with_f16"},
5121         {true, true, true, true, "all_types"},
5122     };
5123 
5124     Owner ownerCases[]                 = {Owner::VERTEX, Owner::PRIMITIVE};
5125     DataType dataTypeCases[]           = {DataType::FLOAT, DataType::INTEGER};
5126     BitWidth bitWidthCases[]           = {BitWidth::B64, BitWidth::B32, BitWidth::B16};
5127     DataDim dataDimCases[]             = {DataDim::SCALAR, DataDim::VEC2, DataDim::VEC3, DataDim::VEC4};
5128     Interpolation interpolationCases[] = {Interpolation::NORMAL, Interpolation::FLAT};
5129     de::Random rnd(1636723398u);
5130 
5131     for (const auto &reqs : requiredFeatures)
5132     {
5133         GroupPtr reqsGroup(new tcu::TestCaseGroup(testCtx, reqs.name));
5134 
5135         // Generate the variable list according to the group requirements.
5136         IfaceVarVecPtr varsPtr(new IfaceVarVec);
5137 
5138         for (const auto &ownerCase : ownerCases)
5139             for (const auto &dataTypeCase : dataTypeCases)
5140                 for (const auto &bitWidthCase : bitWidthCases)
5141                     for (const auto &dataDimCase : dataDimCases)
5142                         for (const auto &interpolationCase : interpolationCases)
5143                         {
5144                             if (dataTypeCase == DataType::FLOAT)
5145                             {
5146                                 if (bitWidthCase == BitWidth::B64 && !reqs.f64)
5147                                     continue;
5148                                 if (bitWidthCase == BitWidth::B16 && !reqs.f16)
5149                                     continue;
5150                             }
5151                             else if (dataTypeCase == DataType::INTEGER)
5152                             {
5153                                 if (bitWidthCase == BitWidth::B64 && !reqs.i64)
5154                                     continue;
5155                                 if (bitWidthCase == BitWidth::B16 && !reqs.i16)
5156                                     continue;
5157                             }
5158 
5159                             if (dataTypeCase == DataType::INTEGER && interpolationCase == Interpolation::NORMAL)
5160                                 continue;
5161 
5162                             if (ownerCase == Owner::PRIMITIVE && interpolationCase == Interpolation::NORMAL)
5163                                 continue;
5164 
5165                             if (dataTypeCase == DataType::FLOAT && bitWidthCase == BitWidth::B64 &&
5166                                 interpolationCase == Interpolation::NORMAL)
5167                                 continue;
5168 
5169                             for (uint32_t idx = 0u; idx < IfaceVar::kVarsPerType; ++idx)
5170                                 varsPtr->push_back(IfaceVar(ownerCase, dataTypeCase, bitWidthCase, dataDimCase,
5171                                                             interpolationCase, idx));
5172                         }
5173 
5174         // Generating all permutations of the variables above would mean millions of tests, so we just generate some pseudorandom permutations.
5175         constexpr uint32_t kPermutations = 40u;
5176         for (uint32_t combIdx = 0; combIdx < kPermutations; ++combIdx)
5177         {
5178             const auto caseName = "permutation_" + std::to_string(combIdx);
5179             GroupPtr rndGroup(new tcu::TestCaseGroup(testCtx, caseName.c_str()));
5180 
5181             // Duplicate and shuffle vector.
5182             IfaceVarVecPtr permutVec(new IfaceVarVec(*varsPtr));
5183             rnd.shuffle(begin(*permutVec), end(*permutVec));
5184 
5185             // Cut the vector short to the usable number of locations.
5186             {
5187                 uint32_t usedLocations = 0u;
5188                 size_t vectorEnd       = 0u;
5189                 auto &varVec           = *permutVec;
5190 
5191                 for (size_t i = 0; i < varVec.size(); ++i)
5192                 {
5193                     vectorEnd          = i;
5194                     const auto varSize = varVec[i].getLocationSize();
5195                     if (usedLocations + varSize > InterfaceVariablesCase::kMaxLocations)
5196                         break;
5197                     usedLocations += varSize;
5198                 }
5199 
5200                 varVec.resize(vectorEnd);
5201             }
5202 
5203             for (int i = 0; i < 2; ++i)
5204             {
5205                 const bool useTaskShader = (i > 0);
5206                 const auto name          = (useTaskShader ? "task_mesh" : "mesh_only");
5207 
5208                 // Duplicate vector for this particular case so both variants have the same shuffle.
5209                 IfaceVarVecPtr paramsVec(new IfaceVarVec(*permutVec));
5210 
5211                 ParamsPtr paramsPtr(new InterfaceVariableParams(
5212                     /*taskCount*/ (useTaskShader ? tcu::just(1u) : tcu::Nothing),
5213                     /*meshCount*/ 1u,
5214                     /*width*/ 8u,
5215                     /*height*/ 8u,
5216                     /*useInt64*/ reqs.i64,
5217                     /*useFloat64*/ reqs.f64,
5218                     /*useInt16*/ reqs.i16,
5219                     /*useFloat16*/ reqs.f16,
5220                     /*vars*/ std::move(paramsVec)));
5221 
5222                 rndGroup->addChild(new InterfaceVariablesCase(testCtx, name, std::move(paramsPtr)));
5223             }
5224 
5225             reqsGroup->addChild(rndGroup.release());
5226         }
5227 
5228         inOutTests->addChild(reqsGroup.release());
5229     }
5230 
5231     return inOutTests.release();
5232 }
5233 
5234 } // namespace MeshShader
5235 } // namespace vkt
5236