1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2022 The Khronos Group Inc.
6  * Copyright (c) 2022 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Mesh Shader Property Tests for VK_EXT_mesh_shader
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMeshShaderPropertyTestsEXT.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktMeshShaderUtil.hpp"
28 
29 #include "vkBufferWithMemory.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkBarrierUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkImageWithMemory.hpp"
36 #include "vkImageUtil.hpp"
37 
38 #include "tcuTestLog.hpp"
39 #include "tcuImageCompare.hpp"
40 #include "tcuTextureUtil.hpp"
41 
42 #include "deUniquePtr.hpp"
43 
44 #include <algorithm>
45 #include <sstream>
46 #include <limits>
47 
48 namespace vkt
49 {
50 namespace MeshShader
51 {
52 
53 using namespace vk;
54 
55 namespace
56 {
57 
58 enum class PayLoadShMemSizeType
59 {
60     PAYLOAD = 0,
61     SHARED_MEMORY,
62     BOTH,
63 };
64 
65 struct PayloadShMemSizeParams
66 {
67     PayLoadShMemSizeType testType;
68 
hasPayloadvkt::MeshShader::__anon1a407c490111::PayloadShMemSizeParams69     bool hasPayload(void) const
70     {
71         return testType != PayLoadShMemSizeType::SHARED_MEMORY;
72     }
hasSharedMemoryvkt::MeshShader::__anon1a407c490111::PayloadShMemSizeParams73     bool hasSharedMemory(void) const
74     {
75         return testType != PayLoadShMemSizeType::PAYLOAD;
76     }
77 };
78 
79 using TaskPayloadShMemSizeParams = PayloadShMemSizeParams;
80 using MeshPayloadShMemSizeParams = PayloadShMemSizeParams;
81 using SpecConstVector            = std::vector<uint32_t>;
82 
83 class TaskPayloadShMemSizeCase : public vkt::TestCase
84 {
85 public:
TaskPayloadShMemSizeCase(tcu::TestContext & testCtx,const std::string & name,const TaskPayloadShMemSizeParams & params)86     TaskPayloadShMemSizeCase(tcu::TestContext &testCtx, const std::string &name,
87                              const TaskPayloadShMemSizeParams &params)
88         : vkt::TestCase(testCtx, name)
89         , m_params(params)
90     {
91     }
~TaskPayloadShMemSizeCase(void)92     virtual ~TaskPayloadShMemSizeCase(void)
93     {
94     }
95 
96     void checkSupport(Context &context) const override;
97     void initPrograms(vk::SourceCollections &programCollection) const override;
98     TestInstance *createInstance(Context &context) const override;
99 
100 protected:
101     // These depend on the context because we need the mesh shading properties to calculate them.
102     struct ParamsFromContext
103     {
104         uint32_t payloadElements;
105         uint32_t sharedMemoryElements;
106     };
107 
108     ParamsFromContext getParamsFromContext(Context &context) const;
109 
110     const TaskPayloadShMemSizeParams m_params;
111 
112     static constexpr uint32_t kElementSize      = static_cast<uint32_t>(sizeof(uint32_t));
113     static constexpr uint32_t kLocalInvocations = 128u;
114 };
115 
116 class SpecConstantInstance : public vkt::TestInstance
117 {
118 public:
SpecConstantInstance(Context & context,SpecConstVector && vec)119     SpecConstantInstance(Context &context, SpecConstVector &&vec)
120         : vkt::TestInstance(context)
121         , m_specConstants(std::move(vec))
122     {
123     }
~SpecConstantInstance(void)124     virtual ~SpecConstantInstance(void)
125     {
126     }
127 
128 protected:
129     std::vector<VkSpecializationMapEntry> makeSpecializationMap(void) const;
130     const SpecConstVector m_specConstants;
131 };
132 
makeSpecializationMap(void) const133 std::vector<VkSpecializationMapEntry> SpecConstantInstance::makeSpecializationMap(void) const
134 {
135     std::vector<VkSpecializationMapEntry> entryMap;
136     entryMap.reserve(m_specConstants.size());
137 
138     const auto constantSize = sizeof(uint32_t);
139     const auto csU32        = static_cast<uint32_t>(constantSize);
140 
141     for (size_t i = 0u; i < m_specConstants.size(); ++i)
142     {
143         const auto id = static_cast<uint32_t>(i);
144 
145         const VkSpecializationMapEntry entry = {
146             id,           // uint32_t constantID;
147             (csU32 * id), // uint32_t offset;
148             constantSize, // size_t size;
149         };
150         entryMap.push_back(entry);
151     }
152 
153     return entryMap;
154 }
155 
156 class PayloadShMemSizeInstance : public SpecConstantInstance
157 {
158 public:
PayloadShMemSizeInstance(Context & context,const TaskPayloadShMemSizeParams & params,SpecConstVector && vec)159     PayloadShMemSizeInstance(Context &context, const TaskPayloadShMemSizeParams &params, SpecConstVector &&vec)
160         : SpecConstantInstance(context, std::move(vec))
161         , m_params(params)
162     {
163     }
~PayloadShMemSizeInstance(void)164     virtual ~PayloadShMemSizeInstance(void)
165     {
166     }
167 
168     tcu::TestStatus iterate(void) override;
169 
170 protected:
171     Move<VkRenderPass> makeCustomRenderPass(const DeviceInterface &vkd, VkDevice device);
172     const TaskPayloadShMemSizeParams m_params;
173 };
174 
checkSupport(Context & context) const175 void TaskPayloadShMemSizeCase::checkSupport(Context &context) const
176 {
177     checkTaskMeshShaderSupportEXT(context, true /*requireTask*/, true /*requireMesh*/);
178     context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
179 
180     const auto &meshProperties = context.getMeshShaderPropertiesEXT();
181     const auto minSize         = kLocalInvocations * kElementSize;
182 
183     // Note: the min required values for these properties in the spec would pass these checks.
184 
185     if (meshProperties.maxTaskPayloadSize < minSize)
186         TCU_FAIL("Invalid maxTaskPayloadSize");
187 
188     if (meshProperties.maxTaskSharedMemorySize < minSize)
189         TCU_FAIL("Invalid maxTaskSharedMemorySize");
190 
191     if (meshProperties.maxTaskPayloadAndSharedMemorySize < minSize)
192         TCU_FAIL("Invalid maxTaskPayloadAndSharedMemorySize");
193 
194     if (meshProperties.maxMeshPayloadAndSharedMemorySize < minSize)
195         TCU_FAIL("Invalid maxMeshPayloadAndSharedMemorySize");
196 }
197 
getParamsFromContext(Context & context) const198 TaskPayloadShMemSizeCase::ParamsFromContext TaskPayloadShMemSizeCase::getParamsFromContext(Context &context) const
199 {
200     ParamsFromContext params;
201 
202     const auto &meshProperties = context.getMeshShaderPropertiesEXT();
203     const auto maxMeshPayloadSize =
204         std::min(meshProperties.maxMeshPayloadAndOutputMemorySize, meshProperties.maxMeshPayloadAndSharedMemorySize);
205     const auto maxPayloadElements =
206         std::min(meshProperties.maxTaskPayloadSize / kElementSize, maxMeshPayloadSize / kElementSize);
207     const auto maxShMemElements = meshProperties.maxTaskSharedMemorySize / kElementSize;
208     const auto maxTotalElements = meshProperties.maxTaskPayloadAndSharedMemorySize / kElementSize;
209 
210     if (m_params.testType == PayLoadShMemSizeType::PAYLOAD)
211     {
212         params.sharedMemoryElements = 0u;
213         params.payloadElements      = std::min(maxTotalElements, maxPayloadElements);
214     }
215     else if (m_params.testType == PayLoadShMemSizeType::SHARED_MEMORY)
216     {
217         params.payloadElements      = 0u;
218         params.sharedMemoryElements = std::min(maxTotalElements, maxShMemElements);
219     }
220     else
221     {
222         uint32_t *minPtr;
223         uint32_t minVal;
224         uint32_t *maxPtr;
225         uint32_t maxVal;
226 
227         // Divide them as evenly as possible getting them as closest as possible to maxTotalElements.
228         if (maxPayloadElements < maxShMemElements)
229         {
230             minPtr = &params.payloadElements;
231             minVal = maxPayloadElements;
232 
233             maxPtr = &params.sharedMemoryElements;
234             maxVal = maxShMemElements;
235         }
236         else
237         {
238             minPtr = &params.sharedMemoryElements;
239             minVal = maxShMemElements;
240 
241             maxPtr = &params.payloadElements;
242             maxVal = maxPayloadElements;
243         }
244 
245         *minPtr = std::min(minVal, maxTotalElements / 2u);
246         *maxPtr = std::min(maxTotalElements - (*minPtr), maxVal);
247     }
248 
249     return params;
250 }
251 
createInstance(Context & context) const252 TestInstance *TaskPayloadShMemSizeCase::createInstance(Context &context) const
253 {
254     const auto ctxParams = getParamsFromContext(context);
255     SpecConstVector specConstVec{ctxParams.payloadElements, ctxParams.sharedMemoryElements};
256 
257     return new PayloadShMemSizeInstance(context, m_params, std::move(specConstVec));
258 }
259 
initPrograms(vk::SourceCollections & programCollection) const260 void TaskPayloadShMemSizeCase::initPrograms(vk::SourceCollections &programCollection) const
261 {
262     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
263 
264     const std::string scDecl = "layout (constant_id=0) const uint payloadElements = 1u;\n"
265                                "layout (constant_id=1) const uint sharedMemoryElements = 1u;\n";
266 
267     const std::string dsDecl = "layout (set=0, binding=0, std430) buffer ResultBlock {\n"
268                                "    uint sharedOK;\n"
269                                "    uint payloadOK;\n"
270                                "} result;\n";
271 
272     std::string taskData;
273     std::string taskPayloadBody;
274     std::string meshPayloadBody;
275 
276     if (m_params.hasPayload())
277     {
278         std::ostringstream taskDataStream;
279         taskDataStream << "struct TaskData {\n"
280                        << "    uint elements[payloadElements];\n"
281                        << "};\n"
282                        << "taskPayloadSharedEXT TaskData td;\n";
283         taskData = taskDataStream.str();
284 
285         std::ostringstream taskBodyStream;
286         taskBodyStream << "    const uint payloadElementsPerInvocation = uint(ceil(float(payloadElements) / float("
287                        << kLocalInvocations << ")));\n"
288                        << "    for (uint i = 0u; i < payloadElementsPerInvocation; ++i) {\n"
289                        << "        const uint elemIdx = payloadElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
290                        << "        if (elemIdx < payloadElements) {\n"
291                        << "            td.elements[elemIdx] = elemIdx + 2000u;\n"
292                        << "        }\n"
293                        << "    }\n"
294                        << "\n";
295         taskPayloadBody = taskBodyStream.str();
296 
297         std::ostringstream meshBodyStream;
298         meshBodyStream << "    bool allOK = true;\n"
299                        << "    for (uint i = 0u; i < payloadElements; ++i) {\n"
300                        << "        if (td.elements[i] != i + 2000u) {\n"
301                        << "            allOK = false;\n"
302                        << "            break;\n"
303                        << "        }\n"
304                        << "    }\n"
305                        << "    result.payloadOK = (allOK ? 1u : 0u);\n"
306                        << "\n";
307         meshPayloadBody = meshBodyStream.str();
308     }
309     else
310     {
311         meshPayloadBody = "    result.payloadOK = 1u;\n";
312     }
313 
314     std::string sharedData;
315     std::string taskSharedDataBody;
316 
317     if (m_params.hasSharedMemory())
318     {
319         sharedData = "shared uint sharedElements[sharedMemoryElements];\n";
320 
321         std::ostringstream bodyStream;
322         bodyStream << "    const uint shMemElementsPerInvocation = uint(ceil(float(sharedMemoryElements) / float("
323                    << kLocalInvocations << ")));\n"
324                    << "    for (uint i = 0u; i < shMemElementsPerInvocation; ++i) {\n"
325                    << "        const uint elemIdx = shMemElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
326                    << "        if (elemIdx < sharedMemoryElements) {\n"
327                    << "            sharedElements[elemIdx] = elemIdx * 2u + 1000u;\n" // Write
328                    << "        }\n"
329                    << "    }\n"
330                    << "    memoryBarrierShared();\n"
331                    << "    barrier();\n"
332                    << "    for (uint i = 0u; i < shMemElementsPerInvocation; ++i) {\n"
333                    << "        const uint elemIdx = shMemElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
334                    << "        if (elemIdx < sharedMemoryElements) {\n"
335                    << "            const uint accessIdx = sharedMemoryElements - 1u - elemIdx;\n"
336                    << "            sharedElements[accessIdx] += accessIdx;\n" // Read+Write a different element.
337                    << "        }\n"
338                    << "    }\n"
339                    << "    memoryBarrierShared();\n"
340                    << "    barrier();\n"
341                    << "    if (gl_LocalInvocationIndex == 0u) {\n"
342                    << "        bool allOK = true;\n"
343                    << "        for (uint i = 0u; i < sharedMemoryElements; ++i) {\n"
344                    << "            if (sharedElements[i] != i*3u + 1000u) {\n"
345                    << "                allOK = false;\n"
346                    << "                break;\n"
347                    << "            }\n"
348                    << "        }\n"
349                    << "        result.sharedOK = (allOK ? 1u : 0u);\n"
350                    << "    }\n"
351                    << "\n";
352         taskSharedDataBody = bodyStream.str();
353     }
354     else
355     {
356         taskSharedDataBody = "    if (gl_LocalInvocationIndex == 0u) {\n"
357                              "        result.sharedOK = 1u;\n"
358                              "    }\n";
359     }
360 
361     std::ostringstream task;
362     task << "#version 450\n"
363          << "#extension GL_EXT_mesh_shader : enable\n"
364          << "\n"
365          << "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
366          << scDecl << dsDecl << taskData << sharedData << "\n"
367          << "void main () {\n"
368          << taskSharedDataBody << taskPayloadBody << "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
369          << "}\n";
370     programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
371 
372     std::ostringstream mesh;
373     mesh << "#version 450\n"
374          << "#extension GL_EXT_mesh_shader : enable\n"
375          << "\n"
376          << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
377          << "layout (triangles) out;\n"
378          << "layout (max_vertices=3, max_primitives=1) out;\n"
379          << scDecl << dsDecl << taskData << "\n"
380          << "void main () {\n"
381          << meshPayloadBody << "    SetMeshOutputsEXT(0u, 0u);\n"
382          << "}\n";
383     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
384 }
385 
makeCustomRenderPass(const DeviceInterface & vkd,VkDevice device)386 Move<VkRenderPass> PayloadShMemSizeInstance::makeCustomRenderPass(const DeviceInterface &vkd, VkDevice device)
387 {
388     const auto subpassDesc =
389         makeSubpassDescription(0u, VK_PIPELINE_BIND_POINT_GRAPHICS, 0u, nullptr, 0u, nullptr, 0u, nullptr, 0u, nullptr);
390     const auto dependency =
391         makeSubpassDependency(0u, 0u, VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT, VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT,
392                               VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT, 0u);
393 
394     const VkRenderPassCreateInfo renderPassCreateInfo = {
395         VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
396         nullptr,                                   // const void* pNext;
397         0u,                                        // VkRenderPassCreateFlags flags;
398         0u,                                        // uint32_t attachmentCount;
399         nullptr,                                   // const VkAttachmentDescription* pAttachments;
400         1u,                                        // uint32_t subpassCount;
401         &subpassDesc,                              // const VkSubpassDescription* pSubpasses;
402         1u,                                        // uint32_t dependencyCount;
403         &dependency,                               // const VkSubpassDependency* pDependencies;
404     };
405 
406     return createRenderPass(vkd, device, &renderPassCreateInfo);
407 }
408 
iterate(void)409 tcu::TestStatus PayloadShMemSizeInstance::iterate(void)
410 {
411     const auto &vkd              = m_context.getDeviceInterface();
412     const auto device            = m_context.getDevice();
413     auto &alloc                  = m_context.getDefaultAllocator();
414     const auto queueIndex        = m_context.getUniversalQueueFamilyIndex();
415     const auto queue             = m_context.getUniversalQueue();
416     const auto framebufferExtent = makeExtent2D(1u, 1u);
417     const auto pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
418 
419     const auto resultsBufferSize       = static_cast<VkDeviceSize>(sizeof(uint32_t) * 2u);
420     const auto resultsBufferDescType   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
421     const auto resultsBufferUsage      = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
422     const auto resultsBufferStages     = (VK_SHADER_STAGE_TASK_BIT_EXT | VK_SHADER_STAGE_MESH_BIT_EXT);
423     const auto resultsBufferCreateInfo = makeBufferCreateInfo(resultsBufferSize, resultsBufferUsage);
424     BufferWithMemory resultsBuffer(vkd, device, alloc, resultsBufferCreateInfo, MemoryRequirement::HostVisible);
425     auto &resultsBufferAlloc   = resultsBuffer.getAllocation();
426     void *resultsBufferDataPtr = resultsBufferAlloc.getHostPtr();
427 
428     deMemset(resultsBufferDataPtr, 0, static_cast<size_t>(resultsBufferSize));
429 
430     DescriptorSetLayoutBuilder layoutBuilder;
431     layoutBuilder.addSingleBinding(resultsBufferDescType, resultsBufferStages);
432     const auto setLayout      = layoutBuilder.build(vkd, device);
433     const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
434 
435     DescriptorPoolBuilder poolBuilder;
436     poolBuilder.addType(resultsBufferDescType);
437     const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
438     const auto descriptorSet  = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
439 
440     DescriptorSetUpdateBuilder updateBuilder;
441     const auto resultsBufferDescInfo = makeDescriptorBufferInfo(resultsBuffer.get(), 0ull, resultsBufferSize);
442     updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
443                               resultsBufferDescType, &resultsBufferDescInfo);
444     updateBuilder.update(vkd, device);
445 
446     const auto &binaries  = m_context.getBinaryCollection();
447     const auto hasTask    = binaries.contains("task");
448     const auto taskShader = (hasTask ? createShaderModule(vkd, device, binaries.get("task")) : Move<VkShaderModule>());
449     const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
450 
451     const auto renderPass = makeCustomRenderPass(vkd, device);
452     const auto framebuffer =
453         makeFramebuffer(vkd, device, renderPass.get(), 0u, nullptr, framebufferExtent.width, framebufferExtent.height);
454 
455     const std::vector<VkViewport> viewports(1u, makeViewport(framebufferExtent));
456     const std::vector<VkRect2D> scissors(1u, makeRect2D(framebufferExtent));
457 
458     const auto specMap                  = makeSpecializationMap();
459     const VkSpecializationInfo specInfo = {
460         static_cast<uint32_t>(specMap.size()), // uint32_t mapEntryCount;
461         de::dataOrNull(specMap),               // const VkSpecializationMapEntry* pMapEntries;
462         de::dataSize(m_specConstants),         // size_t dataSize;
463         de::dataOrNull(m_specConstants),       // const void* pData;
464     };
465 
466     std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
467     VkPipelineShaderStageCreateInfo stageInfo = {
468         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
469         nullptr,                                             // const void* pNext;
470         0u,                                                  // VkPipelineShaderStageCreateFlags flags;
471         VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM,                  // VkShaderStageFlagBits stage;
472         DE_NULL,                                             // VkShaderModule module;
473         "main",                                              // const char* pName;
474         &specInfo,                                           // const VkSpecializationInfo* pSpecializationInfo;
475     };
476 
477     if (hasTask)
478     {
479         stageInfo.stage  = VK_SHADER_STAGE_TASK_BIT_EXT;
480         stageInfo.module = taskShader.get();
481         shaderStages.push_back(stageInfo);
482     }
483 
484     {
485         stageInfo.stage  = VK_SHADER_STAGE_MESH_BIT_EXT;
486         stageInfo.module = meshShader.get();
487         shaderStages.push_back(stageInfo);
488     }
489 
490     const auto pipeline = makeGraphicsPipeline(vkd, device, DE_NULL, pipelineLayout.get(), 0u, shaderStages,
491                                                renderPass.get(), viewports, scissors);
492 
493     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
494     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
495     const auto cmdBuffer    = cmdBufferPtr.get();
496 
497     beginCommandBuffer(vkd, cmdBuffer);
498     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u));
499     vkd.cmdBindPipeline(cmdBuffer, pipelineBindPoint, pipeline.get());
500     vkd.cmdBindDescriptorSets(cmdBuffer, pipelineBindPoint, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u,
501                               nullptr);
502     vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
503     endRenderPass(vkd, cmdBuffer);
504     {
505         const auto writeToHost = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
506         const auto writeStages = (VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT | VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT);
507         cmdPipelineMemoryBarrier(vkd, cmdBuffer, writeStages, VK_PIPELINE_STAGE_HOST_BIT, &writeToHost);
508     }
509     endCommandBuffer(vkd, cmdBuffer);
510     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
511 
512     invalidateAlloc(vkd, device, resultsBufferAlloc);
513     struct
514     {
515         uint32_t sharedOK;
516         uint32_t payloadOK;
517     } resultData;
518     deMemcpy(&resultData, resultsBufferDataPtr, sizeof(resultData));
519 
520     if (resultData.sharedOK != 1u)
521         TCU_FAIL("Unexpected shared memory result: " + std::to_string(resultData.sharedOK));
522 
523     if (resultData.payloadOK != 1u)
524         TCU_FAIL("Unexpected payload result: " + std::to_string(resultData.payloadOK));
525 
526     return tcu::TestStatus::pass("Pass");
527 }
528 
529 class MaxViewIndexCase : public vkt::TestCase
530 {
531 public:
MaxViewIndexCase(tcu::TestContext & testCtx,const std::string & name)532     MaxViewIndexCase(tcu::TestContext &testCtx, const std::string &name) : vkt::TestCase(testCtx, name)
533     {
534     }
~MaxViewIndexCase(void)535     virtual ~MaxViewIndexCase(void)
536     {
537     }
538 
539     void checkSupport(Context &context) const override;
540     void initPrograms(vk::SourceCollections &programCollection) const override;
541     TestInstance *createInstance(Context &context) const override;
542 };
543 
544 class MaxViewIndexInstance : public vkt::TestInstance
545 {
546 public:
MaxViewIndexInstance(Context & context)547     MaxViewIndexInstance(Context &context) : vkt::TestInstance(context)
548     {
549     }
~MaxViewIndexInstance(void)550     virtual ~MaxViewIndexInstance(void)
551     {
552     }
553 
554     tcu::TestStatus iterate(void) override;
555     Move<VkRenderPass> makeCustomRenderPass(const DeviceInterface &vkd, VkDevice device, uint32_t layerCount,
556                                             VkFormat format);
557 
558     static constexpr uint32_t kMaxViews = 32u;
559 };
560 
checkSupport(Context & context) const561 void MaxViewIndexCase::checkSupport(Context &context) const
562 {
563     checkTaskMeshShaderSupportEXT(context, false /*requireTask*/, true /*requireMesh*/);
564 
565     const auto &multiviewFeatures = context.getMultiviewFeatures();
566     if (!multiviewFeatures.multiview)
567         TCU_THROW(NotSupportedError, "Multiview not supported");
568 
569     const auto &meshFeatures = context.getMeshShaderFeaturesEXT();
570     if (!meshFeatures.multiviewMeshShader)
571         TCU_THROW(NotSupportedError, "Multiview not supported for mesh shaders");
572 }
573 
initPrograms(vk::SourceCollections & programCollection) const574 void MaxViewIndexCase::initPrograms(vk::SourceCollections &programCollection) const
575 {
576     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
577 
578     std::ostringstream mesh;
579     mesh << "#version 450\n"
580          << "#extension GL_EXT_mesh_shader : enable\n"
581          << "\n"
582          << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
583          << "layout (triangles) out;\n"
584          << "layout (max_vertices=3, max_primitives=1) out;\n"
585          << "\n"
586          << "void main (void) {\n"
587          << "    SetMeshOutputsEXT(3u, 1u);\n"
588          << "\n"
589          << "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
590          << "    gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
591          << "    gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
592          << "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u);\n"
593          << "}\n";
594     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
595 
596     std::ostringstream frag;
597     frag << "#version 450\n"
598          << "#extension GL_EXT_mesh_shader : enable\n"
599          << "#extension GL_EXT_multiview : enable\n"
600          << "\n"
601          << "layout (location=0) out uvec4 outColor;\n"
602          << "\n"
603          << "void main (void) {\n"
604          << "    outColor = uvec4(uint(gl_ViewIndex) + 1u, 0, 0, 0);\n"
605          << "}\n";
606     programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
607 }
608 
createInstance(Context & context) const609 TestInstance *MaxViewIndexCase::createInstance(Context &context) const
610 {
611     return new MaxViewIndexInstance(context);
612 }
613 
makeCustomRenderPass(const DeviceInterface & vkd,VkDevice device,uint32_t layerCount,VkFormat format)614 Move<VkRenderPass> MaxViewIndexInstance::makeCustomRenderPass(const DeviceInterface &vkd, VkDevice device,
615                                                               uint32_t layerCount, VkFormat format)
616 {
617     DE_ASSERT(layerCount > 0u);
618 
619     const VkAttachmentDescription colorAttachmentDescription = {
620         0u,                                       // VkAttachmentDescriptionFlags    flags
621         format,                                   // VkFormat                        format
622         VK_SAMPLE_COUNT_1_BIT,                    // VkSampleCountFlagBits           samples
623         VK_ATTACHMENT_LOAD_OP_CLEAR,              // VkAttachmentLoadOp              loadOp
624         VK_ATTACHMENT_STORE_OP_STORE,             // VkAttachmentStoreOp             storeOp
625         VK_ATTACHMENT_LOAD_OP_DONT_CARE,          // VkAttachmentLoadOp              stencilLoadOp
626         VK_ATTACHMENT_STORE_OP_DONT_CARE,         // VkAttachmentStoreOp             stencilStoreOp
627         VK_IMAGE_LAYOUT_UNDEFINED,                // VkImageLayout                   initialLayout
628         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout                   finalLayout
629     };
630 
631     const VkAttachmentReference colorAttachmentRef =
632         makeAttachmentReference(0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
633 
634     const VkSubpassDescription subpassDescription = {
635         0u,                              // VkSubpassDescriptionFlags       flags
636         VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint             pipelineBindPoint
637         0u,                              // uint32_t                        inputAttachmentCount
638         nullptr,                         // const VkAttachmentReference*    pInputAttachments
639         1u,                              // uint32_t                        colorAttachmentCount
640         &colorAttachmentRef,             // const VkAttachmentReference*    pColorAttachments
641         nullptr,                         // const VkAttachmentReference*    pResolveAttachments
642         nullptr,                         // const VkAttachmentReference*    pDepthStencilAttachment
643         0u,                              // uint32_t                        preserveAttachmentCount
644         nullptr                          // const uint32_t*                 pPreserveAttachments
645     };
646 
647     const uint32_t viewMask                                   = ((1u << layerCount) - 1u);
648     const VkRenderPassMultiviewCreateInfo multiviewCreateInfo = {
649         VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO, // VkStructureType sType;
650         nullptr,                                             // const void* pNext;
651         1u,                                                  // uint32_t subpassCount;
652         &viewMask,                                           // const uint32_t* pViewMasks;
653         0u,                                                  // uint32_t dependencyCount;
654         nullptr,                                             // const int32_t* pViewOffsets;
655         1u,                                                  // uint32_t correlationMaskCount;
656         &viewMask,                                           // const uint32_t* pCorrelationMasks;
657     };
658 
659     const VkRenderPassCreateInfo renderPassInfo = {
660         VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType                   sType
661         &multiviewCreateInfo,                      // const void*                       pNext
662         0u,                                        // VkRenderPassCreateFlags           flags
663         1u,                                        // uint32_t                          attachmentCount
664         &colorAttachmentDescription,               // const VkAttachmentDescription*    pAttachments
665         1u,                                        // uint32_t                          subpassCount
666         &subpassDescription,                       // const VkSubpassDescription*       pSubpasses
667         0u,                                        // uint32_t                          dependencyCount
668         nullptr,                                   // const VkSubpassDependency*        pDependencies
669     };
670 
671     return createRenderPass(vkd, device, &renderPassInfo);
672 }
673 
iterate(void)674 tcu::TestStatus MaxViewIndexInstance::iterate(void)
675 {
676     const auto &vkd            = m_context.getDeviceInterface();
677     const auto device          = m_context.getDevice();
678     auto &alloc                = m_context.getDefaultAllocator();
679     const auto queueIndex      = m_context.getUniversalQueueFamilyIndex();
680     const auto queue           = m_context.getUniversalQueue();
681     const auto &meshProperties = m_context.getMeshShaderPropertiesEXT();
682     const auto maxViews        = kMaxViews;
683     const auto numViews        = std::min(meshProperties.maxMeshMultiviewViewCount, maxViews);
684     const auto viewType        = ((numViews > 1u) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
685     const auto colorFormat     = VK_FORMAT_R32_UINT;
686     const auto tcuColorFormat  = mapVkFormat(colorFormat);
687     const auto pixelSize       = static_cast<uint32_t>(tcu::getPixelSize(tcuColorFormat));
688     const auto colorUsage      = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
689     const auto fbExtent        = makeExtent3D(8u, 8u, 1u);
690     const tcu::IVec3 iExtent3D(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height),
691                                static_cast<int>(numViews));
692     const tcu::UVec4 clearColor(0u, 0u, 0u, 0u);
693 
694     // Create color attachment.
695     const VkImageCreateInfo colorAttachmentCreatInfo = {
696         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
697         nullptr,                             // const void* pNext;
698         0u,                                  // VkImageCreateFlags flags;
699         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
700         colorFormat,                         // VkFormat format;
701         fbExtent,                            // VkExtent3D extent;
702         1u,                                  // uint32_t mipLevels;
703         numViews,                            // uint32_t arrayLayers;
704         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
705         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
706         colorUsage,                          // VkImageUsageFlags usage;
707         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
708         0u,                                  // uint32_t queueFamilyIndexCount;
709         nullptr,                             // const uint32_t* pQueueFamilyIndices;
710         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
711     };
712     ImageWithMemory colorAttachment(vkd, device, alloc, colorAttachmentCreatInfo, MemoryRequirement::Any);
713     const auto colorSRR            = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numViews);
714     const auto colorSRL            = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, numViews);
715     const auto colorAttachmentView = makeImageView(vkd, device, colorAttachment.get(), viewType, colorFormat, colorSRR);
716 
717     // Verification buffer for the color attachment.
718     DE_ASSERT(fbExtent.depth == 1u);
719     const auto verificationBufferUsage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
720     const auto verificationBufferSize =
721         static_cast<VkDeviceSize>(pixelSize * fbExtent.width * fbExtent.height * numViews);
722     const auto verificationBufferCreateInfo = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
723     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferCreateInfo,
724                                         MemoryRequirement::HostVisible);
725     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
726     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
727 
728     deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
729 
730     const auto pipelineLayout = makePipelineLayout(vkd, device);
731     const auto renderPass     = makeCustomRenderPass(vkd, device, numViews, colorFormat);
732     const auto framebuffer =
733         makeFramebuffer(vkd, device, renderPass.get(), colorAttachmentView.get(), fbExtent.width, fbExtent.height, 1u);
734 
735     const auto &binaries  = m_context.getBinaryCollection();
736     const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
737     const auto fragModule = createShaderModule(vkd, device, binaries.get("frag"));
738 
739     const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
740     const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
741 
742     const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), DE_NULL, meshModule.get(),
743                                                fragModule.get(), renderPass.get(), viewports, scissors);
744 
745     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
746     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
747     const auto cmdBuffer    = cmdBufferPtr.get();
748 
749     beginCommandBuffer(vkd, cmdBuffer);
750     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
751     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
752     vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
753     endRenderPass(vkd, cmdBuffer);
754 
755     const auto preTransferBarrier = makeImageMemoryBarrier(
756         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
757         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorAttachment.get(), colorSRR);
758     cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
759                                   VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
760 
761     const auto copyRegion = makeBufferImageCopy(fbExtent, colorSRL);
762     vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
763                              verificationBuffer.get(), 1u, &copyRegion);
764 
765     const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
766     cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
767                              &postTransferBarrier);
768 
769     endCommandBuffer(vkd, cmdBuffer);
770     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
771 
772     invalidateAlloc(vkd, device, verificationBufferAlloc);
773     tcu::ConstPixelBufferAccess resultAccess(tcuColorFormat, iExtent3D, verificationBufferData);
774 
775     for (int z = 0; z < iExtent3D.z(); ++z)
776     {
777         const tcu::UVec4 expectedPixel(static_cast<uint32_t>(z) + 1u, 0u, 0u, 1u);
778         for (int y = 0; y < iExtent3D.y(); ++y)
779             for (int x = 0; x < iExtent3D.x(); ++x)
780             {
781                 const auto resultPixel = resultAccess.getPixelUint(x, y, z);
782                 if (resultPixel != expectedPixel)
783                 {
784                     std::ostringstream msg;
785                     msg << "Unexpected pixel value at layer " << z << ": (" << x << ", " << y << ") is " << resultPixel
786                         << " while expecting " << expectedPixel;
787                     TCU_FAIL(msg.str());
788                 }
789             }
790     }
791 
792     // QualityWarning if needed.
793     if (meshProperties.maxMeshMultiviewViewCount > maxViews)
794     {
795         const auto maxViewsStr = std::to_string(maxViews);
796         return tcu::TestStatus(QP_TEST_RESULT_QUALITY_WARNING,
797                                "Test passed but maxMeshMultiviewViewCount greater than " + maxViewsStr);
798     }
799 
800     return tcu::TestStatus::pass("Pass");
801 }
802 
803 class MaxOutputLayersCase : public vkt::TestCase
804 {
805 public:
MaxOutputLayersCase(tcu::TestContext & testCtx,const std::string & name)806     MaxOutputLayersCase(tcu::TestContext &testCtx, const std::string &name) : vkt::TestCase(testCtx, name)
807     {
808     }
~MaxOutputLayersCase(void)809     virtual ~MaxOutputLayersCase(void)
810     {
811     }
812 
813     TestInstance *createInstance(Context &context) const override;
814     void checkSupport(Context &context) const override;
815     void initPrograms(vk::SourceCollections &programCollection) const override;
816 };
817 
818 class MaxOutputLayersInstance : public vkt::TestInstance
819 {
820 public:
MaxOutputLayersInstance(Context & context)821     MaxOutputLayersInstance(Context &context) : vkt::TestInstance(context)
822     {
823     }
~MaxOutputLayersInstance(void)824     virtual ~MaxOutputLayersInstance(void)
825     {
826     }
827 
828     tcu::TestStatus iterate(void) override;
829 };
830 
createInstance(Context & context) const831 TestInstance *MaxOutputLayersCase::createInstance(Context &context) const
832 {
833     return new MaxOutputLayersInstance(context);
834 }
835 
checkSupport(Context & context) const836 void MaxOutputLayersCase::checkSupport(Context &context) const
837 {
838     checkTaskMeshShaderSupportEXT(context, false /*requireTask*/, true /*requireMesh*/);
839 }
840 
initPrograms(vk::SourceCollections & programCollection) const841 void MaxOutputLayersCase::initPrograms(vk::SourceCollections &programCollection) const
842 {
843     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
844 
845     std::ostringstream mesh;
846     mesh << "#version 450\n"
847          << "#extension GL_EXT_mesh_shader : enable\n"
848          << "\n"
849          << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
850          << "layout (triangles) out;\n"
851          << "layout (max_vertices=3, max_primitives=1) out;\n"
852          << "\n"
853          << "void main (void) {\n"
854          << "    SetMeshOutputsEXT(3u, 1u);\n"
855          << "\n"
856          << "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
857          << "    gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
858          << "    gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
859          << "\n"
860          << "    gl_MeshPrimitivesEXT[0].gl_Layer = int(gl_WorkGroupID.x);\n"
861          << "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u);\n"
862          << "}\n";
863     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
864 
865     std::ostringstream frag;
866     frag << "#version 450\n"
867          << "\n"
868          << "layout (location=0) out uvec4 outColor;\n"
869          << "\n"
870          << "void main (void) {\n"
871          << "    outColor = uvec4(uint(gl_Layer) + 1u, 0, 0, 0);\n"
872          << "}\n";
873     programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
874 }
875 
iterate(void)876 tcu::TestStatus MaxOutputLayersInstance::iterate(void)
877 {
878     const auto &vki            = m_context.getInstanceInterface();
879     const auto &physicalDevice = m_context.getPhysicalDevice();
880     const auto &vkd            = m_context.getDeviceInterface();
881     const auto device          = m_context.getDevice();
882     auto &alloc                = m_context.getDefaultAllocator();
883     const auto queueIndex      = m_context.getUniversalQueueFamilyIndex();
884     const auto queue           = m_context.getUniversalQueue();
885     const auto fbFormat        = VK_FORMAT_R32_UINT;
886     const auto imageType       = VK_IMAGE_TYPE_2D;
887     const auto tiling          = VK_IMAGE_TILING_OPTIMAL;
888     const auto usage           = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
889     const auto sampleCount     = VK_SAMPLE_COUNT_1_BIT;
890     auto &log                  = m_context.getTestContext().getLog();
891 
892     // Find out how many layers we can actually use.
893     const auto &properties     = m_context.getDeviceProperties();
894     const auto &meshProperties = m_context.getMeshShaderPropertiesEXT();
895     const auto formatProperties =
896         getPhysicalDeviceImageFormatProperties(vki, physicalDevice, fbFormat, imageType, tiling, usage, 0u);
897     const auto layerCount = std::min({
898         properties.limits.maxFramebufferLayers,
899         meshProperties.maxMeshOutputLayers,
900         formatProperties.maxArrayLayers,
901         meshProperties.maxMeshWorkGroupCount[0],
902     });
903 
904     // This is needed for iExtent3D below.
905     DE_ASSERT(static_cast<uint64_t>(std::numeric_limits<int>::max()) >= static_cast<uint64_t>(layerCount));
906     log << tcu::TestLog::Message << "Using " + std::to_string(layerCount) + " layers" << tcu::TestLog::EndMessage;
907 
908     const auto viewType       = ((layerCount > 1u) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
909     const auto tcuColorFormat = mapVkFormat(fbFormat);
910     const auto pixelSize      = static_cast<uint32_t>(tcu::getPixelSize(tcuColorFormat));
911     const auto fbExtent       = makeExtent3D(1u, 1u, 1u);
912     const tcu::IVec3 iExtent3D(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height),
913                                static_cast<int>(layerCount));
914     const tcu::UVec4 clearColor(0u, 0u, 0u, 0u);
915 
916     // Create color attachment.
917     const VkImageCreateInfo colorAttachmentCreatInfo = {
918         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
919         nullptr,                             // const void* pNext;
920         0u,                                  // VkImageCreateFlags flags;
921         imageType,                           // VkImageType imageType;
922         fbFormat,                            // VkFormat format;
923         fbExtent,                            // VkExtent3D extent;
924         1u,                                  // uint32_t mipLevels;
925         layerCount,                          // uint32_t arrayLayers;
926         sampleCount,                         // VkSampleCountFlagBits samples;
927         tiling,                              // VkImageTiling tiling;
928         usage,                               // VkImageUsageFlags usage;
929         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
930         0u,                                  // uint32_t queueFamilyIndexCount;
931         nullptr,                             // const uint32_t* pQueueFamilyIndices;
932         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
933     };
934     ImageWithMemory colorAttachment(vkd, device, alloc, colorAttachmentCreatInfo, MemoryRequirement::Any);
935     const auto colorSRR            = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, layerCount);
936     const auto colorSRL            = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, layerCount);
937     const auto colorAttachmentView = makeImageView(vkd, device, colorAttachment.get(), viewType, fbFormat, colorSRR);
938 
939     // Verification buffer for the color attachment.
940     DE_ASSERT(fbExtent.depth == 1u);
941     const auto verificationBufferUsage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
942     const auto verificationBufferSize =
943         static_cast<VkDeviceSize>(pixelSize * fbExtent.width * fbExtent.height * layerCount);
944     const auto verificationBufferCreateInfo = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
945     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferCreateInfo,
946                                         MemoryRequirement::HostVisible);
947     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
948     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
949 
950     deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
951 
952     const auto pipelineLayout = makePipelineLayout(vkd, device);
953     const auto renderPass     = makeRenderPass(vkd, device, fbFormat);
954     const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorAttachmentView.get(), fbExtent.width,
955                                              fbExtent.height, layerCount);
956 
957     const auto &binaries  = m_context.getBinaryCollection();
958     const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
959     const auto fragModule = createShaderModule(vkd, device, binaries.get("frag"));
960 
961     const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
962     const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
963 
964     const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), DE_NULL, meshModule.get(),
965                                                fragModule.get(), renderPass.get(), viewports, scissors);
966 
967     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
968     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
969     const auto cmdBuffer    = cmdBufferPtr.get();
970 
971     beginCommandBuffer(vkd, cmdBuffer);
972     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
973     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
974     vkd.cmdDrawMeshTasksEXT(cmdBuffer, layerCount, 1u, 1u);
975     endRenderPass(vkd, cmdBuffer);
976 
977     const auto preTransferBarrier = makeImageMemoryBarrier(
978         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
979         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorAttachment.get(), colorSRR);
980     cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
981                                   VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
982 
983     const auto copyRegion = makeBufferImageCopy(fbExtent, colorSRL);
984     vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
985                              verificationBuffer.get(), 1u, &copyRegion);
986 
987     const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
988     cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
989                              &postTransferBarrier);
990 
991     endCommandBuffer(vkd, cmdBuffer);
992     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
993 
994     invalidateAlloc(vkd, device, verificationBufferAlloc);
995     tcu::ConstPixelBufferAccess resultAccess(tcuColorFormat, iExtent3D, verificationBufferData);
996 
997     for (int z = 0; z < iExtent3D.z(); ++z)
998     {
999         const tcu::UVec4 expectedPixel(static_cast<uint32_t>(z) + 1u, 0u, 0u, 1u);
1000         for (int y = 0; y < iExtent3D.y(); ++y)
1001             for (int x = 0; x < iExtent3D.x(); ++x)
1002             {
1003                 const auto resultPixel = resultAccess.getPixelUint(x, y, z);
1004                 if (resultPixel != expectedPixel)
1005                 {
1006                     std::ostringstream msg;
1007                     msg << "Unexpected pixel value at layer " << z << ": (" << x << ", " << y << ") is " << resultPixel
1008                         << " while expecting " << expectedPixel;
1009                     TCU_FAIL(msg.str());
1010                 }
1011             }
1012     }
1013 
1014     return tcu::TestStatus::pass("Pass");
1015 }
1016 
1017 enum class MaxPrimVertType
1018 {
1019     PRIMITIVES,
1020     VERTICES,
1021 };
1022 
1023 struct MaxPrimVertParams
1024 {
1025     MaxPrimVertType testType;
1026     uint32_t itemCount;
1027 };
1028 
1029 class MaxMeshOutputPrimVertCase : public vkt::TestCase
1030 {
1031 public:
MaxMeshOutputPrimVertCase(tcu::TestContext & testCtx,const std::string & name,const MaxPrimVertParams & params)1032     MaxMeshOutputPrimVertCase(tcu::TestContext &testCtx, const std::string &name, const MaxPrimVertParams &params)
1033         : vkt::TestCase(testCtx, name)
1034         , m_params(params)
1035     {
1036     }
~MaxMeshOutputPrimVertCase(void)1037     virtual ~MaxMeshOutputPrimVertCase(void)
1038     {
1039     }
1040 
1041     void initPrograms(vk::SourceCollections &programCollection) const override;
1042     TestInstance *createInstance(Context &context) const override;
1043     void checkSupport(Context &context) const override;
1044 
1045 protected:
1046     static constexpr uint32_t kLocalInvocations = 128u;
1047 
1048     const MaxPrimVertParams m_params;
1049 };
1050 
1051 class MaxMeshOutputPrimVertInstance : public vkt::TestInstance
1052 {
1053 public:
MaxMeshOutputPrimVertInstance(Context & context,uint32_t shaderPrimitives,uint32_t fbWidth)1054     MaxMeshOutputPrimVertInstance(Context &context, uint32_t shaderPrimitives, uint32_t fbWidth)
1055         : vkt::TestInstance(context)
1056         , m_shaderPrimitives(shaderPrimitives)
1057         , m_fbWidth(fbWidth)
1058     {
1059         DE_ASSERT(m_shaderPrimitives > 0u);
1060         DE_ASSERT(m_fbWidth > 0u);
1061     }
~MaxMeshOutputPrimVertInstance(void)1062     virtual ~MaxMeshOutputPrimVertInstance(void)
1063     {
1064     }
1065 
1066     tcu::TestStatus iterate(void) override;
1067 
1068 protected:
1069     const uint32_t m_shaderPrimitives;
1070     const uint32_t m_fbWidth;
1071 };
1072 
createInstance(Context & context) const1073 TestInstance *MaxMeshOutputPrimVertCase::createInstance(Context &context) const
1074 {
1075     const auto fbWidth = ((m_params.testType == MaxPrimVertType::PRIMITIVES) ? 1u : m_params.itemCount);
1076     return new MaxMeshOutputPrimVertInstance(context, m_params.itemCount, fbWidth);
1077 }
1078 
checkSupport(Context & context) const1079 void MaxMeshOutputPrimVertCase::checkSupport(Context &context) const
1080 {
1081     checkTaskMeshShaderSupportEXT(context, false /*requireTask*/, true /*requireMesh*/);
1082     context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_FRAGMENT_STORES_AND_ATOMICS);
1083 
1084     // Note when testing vertices, due to our usage of points as the primitive type, we are also limited by the number of primitives.
1085 
1086     const auto isVertices      = (m_params.testType == MaxPrimVertType::VERTICES);
1087     const auto &meshProperties = context.getMeshShaderPropertiesEXT();
1088     const auto &itemLimit      = isVertices ?
1089                                      std::min(meshProperties.maxMeshOutputVertices, meshProperties.maxMeshOutputPrimitives) :
1090                                      meshProperties.maxMeshOutputPrimitives;
1091 
1092     if (m_params.itemCount > itemLimit)
1093         TCU_THROW(NotSupportedError, "Implementation does not support the given amount of items");
1094 
1095     // Check memory limits just in case.
1096     uint32_t totalBytes = 0u;
1097     const auto perVertexBytes =
1098         static_cast<uint32_t>(sizeof(tcu::Vec4) + sizeof(float)); // gl_Position and gl_PointSize
1099 
1100     if (isVertices)
1101     {
1102         // No per-primitive data in this variant.
1103         const auto actualVertices = de::roundUp(m_params.itemCount, meshProperties.meshOutputPerVertexGranularity);
1104 
1105         totalBytes = perVertexBytes * actualVertices;
1106     }
1107     else
1108     {
1109         // Single vertex, but using gl_PrimitiveID in each primitive.
1110         const auto perPrimitiveBytes = static_cast<uint32_t>(sizeof(uint32_t)); // gl_PrimitiveID
1111         const auto actualVertices    = de::roundUp(1u, meshProperties.meshOutputPerVertexGranularity);
1112         const auto actualPrimitives = de::roundUp(m_params.itemCount, meshProperties.meshOutputPerPrimitiveGranularity);
1113 
1114         totalBytes = perVertexBytes * actualVertices + perPrimitiveBytes * actualPrimitives;
1115     }
1116 
1117     if (totalBytes > meshProperties.maxMeshOutputMemorySize)
1118         TCU_THROW(NotSupportedError, "Not enough output memory for this test");
1119 }
1120 
initPrograms(vk::SourceCollections & programCollection) const1121 void MaxMeshOutputPrimVertCase::initPrograms(vk::SourceCollections &programCollection) const
1122 {
1123     const auto buildOptions     = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1124     const bool isPrimitives     = (m_params.testType == MaxPrimVertType::PRIMITIVES);
1125     const auto associatedVertex = (isPrimitives ? "0u" : "primitiveID");
1126     const auto maxVertices      = (isPrimitives ? 1u : m_params.itemCount);
1127     const auto ssboIndex        = (isPrimitives ? "gl_PrimitiveID" : "uint(gl_FragCoord.x)");
1128     const auto xCoord           = (isPrimitives ? "0.0" : "(float(vertexID) + 0.5) / float(maxVertices) * 2.0 - 1.0");
1129     const auto maxPrimitives    = m_params.itemCount;
1130 
1131     // When testing vertices, we'll use a wide framebuffer, emit one vertex per pixel and use the fragment coords to index into the
1132     // SSBO. When testing primitives, we'll use a 1x1 framebuffer, emit one single vertex in the center and use the primitive id to
1133     // index into the SSBO.
1134     std::ostringstream frag;
1135     frag << "#version 450\n"
1136          << "\n"
1137          << "layout (set=0, binding=0, std430) buffer OutputBlock {\n"
1138          << "    uint flags[];\n"
1139          << "} ssbo;\n"
1140          << "\n"
1141          << "void main (void) {\n"
1142          << "    ssbo.flags[" << ssboIndex << "] = 1u;\n"
1143          << "}\n";
1144     programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
1145 
1146     std::ostringstream mesh;
1147     mesh << "#version 450\n"
1148          << "#extension GL_EXT_mesh_shader : enable\n"
1149          << "\n"
1150          << "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
1151          << "layout (points) out;\n"
1152          << "layout (max_vertices=" << maxVertices << ", max_primitives=" << maxPrimitives << ") out;\n"
1153          << "\n"
1154          << "out gl_MeshPerVertexEXT {\n"
1155          << "    vec4  gl_Position;\n"
1156          << "    float gl_PointSize;\n"
1157          << "} gl_MeshVerticesEXT[];\n"
1158          << "\n";
1159 
1160     if (isPrimitives)
1161     {
1162         mesh << "perprimitiveEXT out gl_MeshPerPrimitiveEXT {\n"
1163              << "    int gl_PrimitiveID;\n"
1164              << "} gl_MeshPrimitivesEXT[];\n"
1165              << "\n";
1166     }
1167 
1168     mesh << "void main (void) {\n"
1169          << "    const uint localInvs = " << kLocalInvocations << "u;\n"
1170          << "    const uint maxVertices = " << maxVertices << "u;\n"
1171          << "    const uint maxPoints = " << maxPrimitives << "u;\n"
1172          << "    const uint verticesPerInvocation = (maxVertices + localInvs - 1u) / localInvs;\n"
1173          << "    const uint primitivesPerInvocation = (maxPoints + localInvs - 1u) / localInvs;\n"
1174          << "\n"
1175          << "    SetMeshOutputsEXT(maxVertices, maxPoints);\n"
1176          << "\n"
1177          << "    for (uint i = 0u; i < verticesPerInvocation; ++i) {\n"
1178          << "        const uint vertexID = gl_LocalInvocationIndex * verticesPerInvocation + i;\n"
1179          << "        if (vertexID >= maxVertices) {\n"
1180          << "            break;\n"
1181          << "        }\n"
1182          << "        const float xCoord = " << xCoord << ";\n"
1183          << "        gl_MeshVerticesEXT[vertexID].gl_Position = vec4(xCoord, 0.0, 0.0, 1.0);\n"
1184          << "        gl_MeshVerticesEXT[vertexID].gl_PointSize = 1.0f;\n"
1185          << "    }\n"
1186          << "\n"
1187          << "    for (uint i = 0u; i < primitivesPerInvocation; ++i) {\n"
1188          << "        const uint primitiveID = gl_LocalInvocationIndex * primitivesPerInvocation + i;\n"
1189          << "        if (primitiveID >= maxPoints) {\n"
1190          << "            break;\n"
1191          << "        }\n"
1192          << (isPrimitives ? "        gl_MeshPrimitivesEXT[primitiveID].gl_PrimitiveID = int(primitiveID);\n" : "")
1193          << "        gl_PrimitivePointIndicesEXT[primitiveID] = " << associatedVertex << ";\n"
1194          << "    }\n"
1195          << "}\n";
1196     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1197 }
1198 
iterate(void)1199 tcu::TestStatus MaxMeshOutputPrimVertInstance::iterate(void)
1200 {
1201     const auto &vkd       = m_context.getDeviceInterface();
1202     const auto device     = m_context.getDevice();
1203     auto &alloc           = m_context.getDefaultAllocator();
1204     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
1205     const auto queue      = m_context.getUniversalQueue();
1206     const auto fbExtent   = makeExtent2D(m_fbWidth, 1u);
1207     const auto bindPoint  = VK_PIPELINE_BIND_POINT_GRAPHICS;
1208 
1209     const auto ssboSize     = static_cast<VkDeviceSize>(sizeof(uint32_t) * m_shaderPrimitives);
1210     const auto ssboUsage    = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
1211     const auto ssboDescType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1212 
1213     const auto ssboCreateInfo = makeBufferCreateInfo(ssboSize, ssboUsage);
1214     BufferWithMemory ssbo(vkd, device, alloc, ssboCreateInfo, MemoryRequirement::HostVisible);
1215     auto &ssboAlloc         = ssbo.getAllocation();
1216     void *ssboData          = ssboAlloc.getHostPtr();
1217     const auto ssboDescInfo = makeDescriptorBufferInfo(ssbo.get(), 0ull, ssboSize);
1218 
1219     // Zero-out SSBO.
1220     deMemset(ssboData, 0, static_cast<size_t>(ssboSize));
1221     flushAlloc(vkd, device, ssboAlloc);
1222 
1223     // Descriptor set layout, pool, set and set update.
1224     DescriptorSetLayoutBuilder setLayoutBuilder;
1225     setLayoutBuilder.addSingleBinding(ssboDescType, VK_SHADER_STAGE_FRAGMENT_BIT);
1226     const auto setLayout = setLayoutBuilder.build(vkd, device);
1227 
1228     DescriptorPoolBuilder poolBuilder;
1229     poolBuilder.addType(ssboDescType);
1230     const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1231     const auto descriptorSet  = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
1232 
1233     DescriptorSetUpdateBuilder updateBuilder;
1234     updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), ssboDescType,
1235                               &ssboDescInfo);
1236     updateBuilder.update(vkd, device);
1237 
1238     // Pipeline layout, render pass and pipeline.
1239     const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
1240     const auto renderPass     = makeRenderPass(vkd, device);
1241     const auto framebuffer =
1242         makeFramebuffer(vkd, device, renderPass.get(), 0u, nullptr, fbExtent.width, fbExtent.height);
1243 
1244     const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
1245     const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
1246 
1247     const auto &binaries  = m_context.getBinaryCollection();
1248     const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
1249     const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
1250     const auto pipeline   = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), DE_NULL, meshShader.get(),
1251                                                  fragShader.get(), renderPass.get(), viewports, scissors);
1252 
1253     // Command pool and buffer.
1254     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
1255     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1256     const auto cmdBuffer    = cmdBufferPtr.get();
1257 
1258     beginCommandBuffer(vkd, cmdBuffer);
1259     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u));
1260     vkd.cmdBindDescriptorSets(cmdBuffer, bindPoint, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
1261     vkd.cmdBindPipeline(cmdBuffer, bindPoint, pipeline.get());
1262     vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1263     endRenderPass(vkd, cmdBuffer);
1264     endCommandBuffer(vkd, cmdBuffer);
1265     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1266 
1267     invalidateAlloc(vkd, device, ssboAlloc);
1268     std::vector<uint32_t> outputFlags(m_shaderPrimitives, 0u);
1269     deMemcpy(outputFlags.data(), ssboData, de::dataSize(outputFlags));
1270 
1271     // Verify output SSBO.
1272     bool pass = true;
1273     auto &log = m_context.getTestContext().getLog();
1274 
1275     for (size_t i = 0u; i < outputFlags.size(); ++i)
1276     {
1277         if (outputFlags[i] != 1u)
1278         {
1279             std::ostringstream msg;
1280             msg << "Primitive ID " << i << " flag != 1: " << outputFlags[i];
1281             log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
1282             pass = false;
1283         }
1284     }
1285 
1286     if (!pass)
1287         TCU_FAIL("Check log for details");
1288 
1289     return tcu::TestStatus::pass("Pass");
1290 }
1291 
1292 class MaxMeshOutputComponentsCase : public vkt::TestCase
1293 {
1294 public:
MaxMeshOutputComponentsCase(tcu::TestContext & testCtx,const std::string & name)1295     MaxMeshOutputComponentsCase(tcu::TestContext &testCtx, const std::string &name) : vkt::TestCase(testCtx, name)
1296     {
1297     }
1298 
~MaxMeshOutputComponentsCase(void)1299     virtual ~MaxMeshOutputComponentsCase(void)
1300     {
1301     }
1302 
1303     void initPrograms(vk::SourceCollections &programCollection) const override;
1304     TestInstance *createInstance(Context &context) const override;
1305     void checkSupport(Context &context) const override;
1306 
1307 protected:
1308     struct ParamsFromContext
1309     {
1310         uint32_t maxLocations;
1311     };
1312     ParamsFromContext getParamsFromContext(Context &context) const;
1313 };
1314 
1315 class MaxMeshOutputComponentsInstance : public SpecConstantInstance
1316 {
1317 public:
MaxMeshOutputComponentsInstance(Context & context,SpecConstVector && scVector)1318     MaxMeshOutputComponentsInstance(Context &context, SpecConstVector &&scVector)
1319         : SpecConstantInstance(context, std::move(scVector))
1320     {
1321     }
1322 
~MaxMeshOutputComponentsInstance(void)1323     virtual ~MaxMeshOutputComponentsInstance(void)
1324     {
1325     }
1326 
1327     tcu::TestStatus iterate(void) override;
1328 };
1329 
getParamsFromContext(Context & context) const1330 MaxMeshOutputComponentsCase::ParamsFromContext MaxMeshOutputComponentsCase::getParamsFromContext(Context &context) const
1331 {
1332     const uint32_t kLocationComponents =
1333         4u; // Each location can handle up to 4 32-bit components (and we'll be using uvec4).
1334     const uint32_t kUsedLocations = 1u; // For gl_Position.
1335     const uint32_t maxLocations =
1336         context.getMeshShaderPropertiesEXT().maxMeshOutputComponents / kLocationComponents - kUsedLocations;
1337 
1338     ParamsFromContext params{maxLocations};
1339     return params;
1340 }
1341 
checkSupport(Context & context) const1342 void MaxMeshOutputComponentsCase::checkSupport(Context &context) const
1343 {
1344     checkTaskMeshShaderSupportEXT(context, false /*requireTask*/, true /*requireMesh*/);
1345 }
1346 
createInstance(Context & context) const1347 TestInstance *MaxMeshOutputComponentsCase::createInstance(Context &context) const
1348 {
1349     const auto ctxParams = getParamsFromContext(context);
1350     SpecConstVector specConstVec{ctxParams.maxLocations};
1351 
1352     return new MaxMeshOutputComponentsInstance(context, std::move(specConstVec));
1353 }
1354 
initPrograms(vk::SourceCollections & programCollection) const1355 void MaxMeshOutputComponentsCase::initPrograms(vk::SourceCollections &programCollection) const
1356 {
1357     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1358 
1359     const std::string locationStructDecl = "layout (constant_id=0) const uint maxLocations = 1u;\n"
1360                                            "struct LocationStruct {\n"
1361                                            "    uvec4 location_var[maxLocations];\n"
1362                                            "};\n";
1363 
1364     const std::string declOut =
1365         locationStructDecl + "layout (location=0) perprimitiveEXT flat out LocationStruct ls[];\n";
1366 
1367     const std::string declIn = locationStructDecl + "layout (location=0) perprimitiveEXT flat in LocationStruct ls;\n";
1368 
1369     std::ostringstream mesh;
1370     mesh << "#version 450\n"
1371          << "#extension GL_EXT_mesh_shader : enable\n"
1372          << "\n"
1373          << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
1374          << "layout (triangles) out;\n"
1375          << "layout (max_vertices=3, max_primitives=1) out;\n"
1376          << "\n"
1377          << "out gl_MeshPerVertexEXT {\n"
1378          << "    vec4  gl_Position;\n"
1379          << "} gl_MeshVerticesEXT[];\n"
1380          << "\n"
1381          << declOut << "\n"
1382          << "void main (void) {\n"
1383          << "    SetMeshOutputsEXT(3u, 1u);\n"
1384          << "    gl_MeshVerticesEXT[0].gl_Position = vec4( 0.0, -0.5, 0.0, 1.0);\n"
1385          << "    gl_MeshVerticesEXT[1].gl_Position = vec4(-0.5,  0.5, 0.0, 1.0);\n"
1386          << "    gl_MeshVerticesEXT[2].gl_Position = vec4( 0.5,  0.5, 0.0, 1.0);\n"
1387          << "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u);\n"
1388          << "\n"
1389          << "    for (uint i = 0u; i < maxLocations; ++i) {\n"
1390          << "        const uint baseVal = 10000u * (i + 1u);\n"
1391          << "        const uvec4 expectedValue = uvec4(baseVal + 1u, baseVal + 2u, baseVal + 3u, baseVal + 4u);\n"
1392          << "        ls[0].location_var[i] = expectedValue;\n"
1393          << "    }\n"
1394          << "}\n";
1395     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1396 
1397     std::ostringstream frag;
1398     frag << "#version 450\n"
1399          << "#extension GL_EXT_mesh_shader : enable\n"
1400          << "\n"
1401          << "layout (location=0) out vec4 outColor;\n"
1402          << "\n"
1403          << declIn << "\n"
1404          << "void main (void) {\n"
1405          << "    bool success = true;\n"
1406          << "    for (uint i = 0u; i < maxLocations; ++i) {\n"
1407          << "        const uint baseVal = 10000u * (i + 1u);\n"
1408          << "        const uvec4 expectedValue = uvec4(baseVal + 1u, baseVal + 2u, baseVal + 3u, baseVal + 4u);\n"
1409          << "        success = success && (ls.location_var[i] == expectedValue);\n"
1410          << "    }\n"
1411          << "    outColor = (success ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(0.0, 0.0, 0.0, 1.0));\n"
1412          << "}\n";
1413     programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
1414 }
1415 
iterate(void)1416 tcu::TestStatus MaxMeshOutputComponentsInstance::iterate(void)
1417 {
1418     const auto &vkd       = m_context.getDeviceInterface();
1419     const auto device     = m_context.getDevice();
1420     auto &alloc           = m_context.getDefaultAllocator();
1421     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
1422     const auto queue      = m_context.getUniversalQueue();
1423 
1424     const auto colorFormat    = VK_FORMAT_R8G8B8A8_UNORM;
1425     const auto tcuColorFormat = mapVkFormat(colorFormat);
1426     const auto pixelSize      = static_cast<uint32_t>(tcu::getPixelSize(tcuColorFormat));
1427     const auto colorUsage     = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1428     const auto fbExtent       = makeExtent3D(1u, 1u, 1u);
1429     const tcu::IVec3 iExtent3D(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height),
1430                                static_cast<int>(fbExtent.depth));
1431     const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 1.0f);
1432     const tcu::Vec4 expectedColor(0.0f, 0.0f, 1.0f, 1.0f);
1433     const tcu::Vec4 colorThreshold(0.0f, 0.0f, 0.0f, 0.0f);
1434 
1435     // Create color attachment.
1436     const VkImageCreateInfo colorAttachmentCreatInfo = {
1437         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1438         nullptr,                             // const void* pNext;
1439         0u,                                  // VkImageCreateFlags flags;
1440         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
1441         colorFormat,                         // VkFormat format;
1442         fbExtent,                            // VkExtent3D extent;
1443         1u,                                  // uint32_t mipLevels;
1444         1u,                                  // uint32_t arrayLayers;
1445         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
1446         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
1447         colorUsage,                          // VkImageUsageFlags usage;
1448         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
1449         0u,                                  // uint32_t queueFamilyIndexCount;
1450         nullptr,                             // const uint32_t* pQueueFamilyIndices;
1451         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
1452     };
1453     ImageWithMemory colorAttachment(vkd, device, alloc, colorAttachmentCreatInfo, MemoryRequirement::Any);
1454     const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1455     const auto colorSRL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
1456     const auto colorAttachmentView =
1457         makeImageView(vkd, device, colorAttachment.get(), VK_IMAGE_VIEW_TYPE_2D, colorFormat, colorSRR);
1458 
1459     // Verification buffer for the color attachment.
1460     DE_ASSERT(fbExtent.depth == 1u);
1461     const auto verificationBufferUsage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1462     const auto verificationBufferSize =
1463         static_cast<VkDeviceSize>(pixelSize * fbExtent.width * fbExtent.height * fbExtent.depth);
1464     const auto verificationBufferCreateInfo = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
1465     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferCreateInfo,
1466                                         MemoryRequirement::HostVisible);
1467     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
1468     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
1469 
1470     deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
1471 
1472     const auto pipelineLayout = makePipelineLayout(vkd, device);
1473     const auto renderPass     = makeRenderPass(vkd, device, colorFormat);
1474     const auto framebuffer =
1475         makeFramebuffer(vkd, device, renderPass.get(), colorAttachmentView.get(), fbExtent.width, fbExtent.height, 1u);
1476 
1477     const auto &binaries  = m_context.getBinaryCollection();
1478     const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
1479     const auto fragModule = createShaderModule(vkd, device, binaries.get("frag"));
1480 
1481     const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
1482     const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
1483 
1484     const auto specMap                  = makeSpecializationMap();
1485     const VkSpecializationInfo specInfo = {
1486         static_cast<uint32_t>(specMap.size()), // uint32_t mapEntryCount;
1487         de::dataOrNull(specMap),               // const VkSpecializationMapEntry* pMapEntries;
1488         de::dataSize(m_specConstants),         // size_t dataSize;
1489         de::dataOrNull(m_specConstants),       // const void* pData;
1490     };
1491 
1492     std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
1493     VkPipelineShaderStageCreateInfo stageInfo = {
1494         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
1495         nullptr,                                             // const void* pNext;
1496         0u,                                                  // VkPipelineShaderStageCreateFlags flags;
1497         VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM,                  // VkShaderStageFlagBits stage;
1498         DE_NULL,                                             // VkShaderModule module;
1499         "main",                                              // const char* pName;
1500         &specInfo,                                           // const VkSpecializationInfo* pSpecializationInfo;
1501     };
1502 
1503     {
1504         stageInfo.stage  = VK_SHADER_STAGE_MESH_BIT_EXT;
1505         stageInfo.module = meshModule.get();
1506         shaderStages.push_back(stageInfo);
1507     }
1508 
1509     {
1510         stageInfo.stage  = VK_SHADER_STAGE_FRAGMENT_BIT;
1511         stageInfo.module = fragModule.get();
1512         shaderStages.push_back(stageInfo);
1513     }
1514 
1515     const auto pipeline = makeGraphicsPipeline(vkd, device, DE_NULL, pipelineLayout.get(), 0u, shaderStages,
1516                                                renderPass.get(), viewports, scissors);
1517 
1518     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
1519     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1520     const auto cmdBuffer    = cmdBufferPtr.get();
1521 
1522     beginCommandBuffer(vkd, cmdBuffer);
1523     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
1524     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
1525     vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1526     endRenderPass(vkd, cmdBuffer);
1527 
1528     const auto preTransferBarrier = makeImageMemoryBarrier(
1529         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1530         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorAttachment.get(), colorSRR);
1531     cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1532                                   VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
1533 
1534     const auto copyRegion = makeBufferImageCopy(fbExtent, colorSRL);
1535     vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1536                              verificationBuffer.get(), 1u, &copyRegion);
1537 
1538     const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1539     cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1540                              &postTransferBarrier);
1541 
1542     endCommandBuffer(vkd, cmdBuffer);
1543     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1544 
1545     invalidateAlloc(vkd, device, verificationBufferAlloc);
1546     tcu::ConstPixelBufferAccess resultAccess(tcuColorFormat, iExtent3D, verificationBufferData);
1547 
1548     auto &log = m_context.getTestContext().getLog();
1549     log << tcu::TestLog::Message << "maxLocations value: " << m_specConstants.at(0u) << tcu::TestLog::EndMessage;
1550     if (!tcu::floatThresholdCompare(log, "Result", "", expectedColor, resultAccess, colorThreshold,
1551                                     tcu::COMPARE_LOG_ON_ERROR))
1552         TCU_FAIL("Check log for details");
1553 
1554     return tcu::TestStatus::pass("Pass");
1555 }
1556 
1557 class MeshPayloadShMemSizeCase : public vkt::TestCase
1558 {
1559 public:
MeshPayloadShMemSizeCase(tcu::TestContext & testCtx,const std::string & name,const MeshPayloadShMemSizeParams & params)1560     MeshPayloadShMemSizeCase(tcu::TestContext &testCtx, const std::string &name,
1561                              const MeshPayloadShMemSizeParams &params)
1562         : vkt::TestCase(testCtx, name)
1563         , m_params(params)
1564     {
1565     }
~MeshPayloadShMemSizeCase(void)1566     virtual ~MeshPayloadShMemSizeCase(void)
1567     {
1568     }
1569 
1570     void checkSupport(Context &context) const override;
1571     void initPrograms(vk::SourceCollections &programCollection) const override;
1572     TestInstance *createInstance(Context &context) const override;
1573 
1574 protected:
1575     struct ParamsFromContext
1576     {
1577         uint32_t payloadElements;
1578         uint32_t sharedMemoryElements;
1579     };
1580     ParamsFromContext getParamsFromContext(Context &context) const;
1581 
1582     const MeshPayloadShMemSizeParams m_params;
1583 
1584     static constexpr uint32_t kElementSize      = static_cast<uint32_t>(sizeof(uint32_t));
1585     static constexpr uint32_t kLocalInvocations = 128u;
1586 };
1587 
checkSupport(Context & context) const1588 void MeshPayloadShMemSizeCase::checkSupport(Context &context) const
1589 {
1590     const bool requireTask = m_params.hasPayload();
1591 
1592     checkTaskMeshShaderSupportEXT(context, requireTask, true /*requireMesh*/);
1593     context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
1594 
1595     const auto &meshProperties = context.getMeshShaderPropertiesEXT();
1596     const auto minSize         = kLocalInvocations * kElementSize;
1597 
1598     // Note: the min required values for these properties in the spec would pass these checks.
1599 
1600     if (requireTask)
1601     {
1602         if (meshProperties.maxTaskPayloadSize < minSize)
1603             TCU_FAIL("Invalid maxTaskPayloadSize");
1604 
1605         if (meshProperties.maxTaskPayloadAndSharedMemorySize < minSize)
1606             TCU_FAIL("Invalid maxTaskPayloadAndSharedMemorySize");
1607     }
1608 
1609     if (meshProperties.maxMeshSharedMemorySize < minSize)
1610         TCU_FAIL("Invalid maxMeshSharedMemorySize");
1611 
1612     if (meshProperties.maxMeshPayloadAndSharedMemorySize < minSize)
1613         TCU_FAIL("Invalid maxMeshPayloadAndSharedMemorySize");
1614 
1615     if (meshProperties.maxMeshPayloadAndOutputMemorySize < minSize)
1616         TCU_FAIL("Invalid maxMeshPayloadAndOutputMemorySize");
1617 }
1618 
getParamsFromContext(Context & context) const1619 MeshPayloadShMemSizeCase::ParamsFromContext MeshPayloadShMemSizeCase::getParamsFromContext(Context &context) const
1620 {
1621     ParamsFromContext params;
1622 
1623     const auto &meshProperties = context.getMeshShaderPropertiesEXT();
1624     const auto maxTaskPayloadSize =
1625         std::min(meshProperties.maxTaskPayloadAndSharedMemorySize, meshProperties.maxTaskPayloadSize);
1626     const auto maxMeshPayloadSize =
1627         std::min(meshProperties.maxMeshPayloadAndOutputMemorySize, meshProperties.maxMeshPayloadAndSharedMemorySize);
1628     const auto maxPayloadElements = std::min(maxTaskPayloadSize, maxMeshPayloadSize) / kElementSize;
1629     const auto maxShMemElements   = meshProperties.maxMeshSharedMemorySize / kElementSize;
1630     const auto maxTotalElements   = meshProperties.maxTaskPayloadAndSharedMemorySize / kElementSize;
1631 
1632     if (m_params.testType == PayLoadShMemSizeType::PAYLOAD)
1633     {
1634         params.sharedMemoryElements = 0u;
1635         params.payloadElements      = std::min(maxTotalElements, maxPayloadElements);
1636     }
1637     else if (m_params.testType == PayLoadShMemSizeType::SHARED_MEMORY)
1638     {
1639         params.payloadElements      = 0u;
1640         params.sharedMemoryElements = std::min(maxTotalElements, maxShMemElements);
1641     }
1642     else
1643     {
1644         uint32_t *minPtr;
1645         uint32_t minVal;
1646         uint32_t *maxPtr;
1647         uint32_t maxVal;
1648 
1649         // Divide them as evenly as possible getting them as closest as possible to maxTotalElements.
1650         if (maxPayloadElements < maxShMemElements)
1651         {
1652             minPtr = &params.payloadElements;
1653             minVal = maxPayloadElements;
1654 
1655             maxPtr = &params.sharedMemoryElements;
1656             maxVal = maxShMemElements;
1657         }
1658         else
1659         {
1660             minPtr = &params.sharedMemoryElements;
1661             minVal = maxShMemElements;
1662 
1663             maxPtr = &params.payloadElements;
1664             maxVal = maxPayloadElements;
1665         }
1666 
1667         *minPtr = std::min(minVal, maxTotalElements / 2u);
1668         *maxPtr = std::min(maxTotalElements - (*minPtr), maxVal);
1669     }
1670 
1671     return params;
1672 }
1673 
createInstance(Context & context) const1674 TestInstance *MeshPayloadShMemSizeCase::createInstance(Context &context) const
1675 {
1676     const auto ctxParams = getParamsFromContext(context);
1677     SpecConstVector vec{ctxParams.payloadElements, ctxParams.sharedMemoryElements};
1678 
1679     return new PayloadShMemSizeInstance(context, m_params, std::move(vec));
1680 }
1681 
initPrograms(vk::SourceCollections & programCollection) const1682 void MeshPayloadShMemSizeCase::initPrograms(vk::SourceCollections &programCollection) const
1683 {
1684     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1685 
1686     const std::string scDecl = "layout (constant_id=0) const uint payloadElements = 1u;\n"
1687                                "layout (constant_id=1) const uint sharedMemoryElements = 1u;\n";
1688 
1689     const std::string dsDecl = "layout (set=0, binding=0, std430) buffer ResultBlock {\n"
1690                                "    uint sharedOK;\n"
1691                                "    uint payloadOK;\n"
1692                                "} result;\n";
1693 
1694     std::string taskData;
1695     std::string taskPayloadBody;
1696     std::string meshPayloadBody;
1697 
1698     if (m_params.hasPayload())
1699     {
1700         std::ostringstream taskDataStream;
1701         taskDataStream << "struct TaskData {\n"
1702                        << "    uint elements[payloadElements];\n"
1703                        << "};\n"
1704                        << "taskPayloadSharedEXT TaskData td;\n";
1705         taskData = taskDataStream.str();
1706 
1707         std::ostringstream taskBodyStream;
1708         taskBodyStream << "    const uint payloadElementsPerInvocation = uint(ceil(float(payloadElements) / float("
1709                        << kLocalInvocations << ")));\n"
1710                        << "    for (uint i = 0u; i < payloadElementsPerInvocation; ++i) {\n"
1711                        << "        const uint elemIdx = payloadElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
1712                        << "        if (elemIdx < payloadElements) {\n"
1713                        << "            td.elements[elemIdx] = elemIdx + 2000u;\n"
1714                        << "        }\n"
1715                        << "    }\n"
1716                        << "\n";
1717         taskPayloadBody = taskBodyStream.str();
1718 
1719         std::ostringstream meshBodyStream;
1720         meshBodyStream << "    if (gl_LocalInvocationIndex == 0u) {\n"
1721                        << "        bool allOK = true;\n"
1722                        << "        for (uint i = 0u; i < payloadElements; ++i) {\n"
1723                        << "            if (td.elements[i] != i + 2000u) {\n"
1724                        << "                allOK = false;\n"
1725                        << "                break;\n"
1726                        << "            }\n"
1727                        << "        }\n"
1728                        << "        result.payloadOK = (allOK ? 1u : 0u);\n"
1729                        << "    }\n"
1730                        << "\n";
1731         meshPayloadBody = meshBodyStream.str();
1732     }
1733     else
1734     {
1735         meshPayloadBody = "    result.payloadOK = 1u;\n";
1736     }
1737 
1738     std::string sharedData;
1739     std::string meshSharedDataBody;
1740 
1741     if (m_params.hasSharedMemory())
1742     {
1743         sharedData = "shared uint sharedElements[sharedMemoryElements];\n";
1744 
1745         std::ostringstream bodyStream;
1746         bodyStream << "    const uint shMemElementsPerInvocation = uint(ceil(float(sharedMemoryElements) / float("
1747                    << kLocalInvocations << ")));\n"
1748                    << "    for (uint i = 0u; i < shMemElementsPerInvocation; ++i) {\n"
1749                    << "        const uint elemIdx = shMemElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
1750                    << "        if (elemIdx < sharedMemoryElements) {\n"
1751                    << "            sharedElements[elemIdx] = elemIdx * 2u + 1000u;\n" // Write
1752                    << "        }\n"
1753                    << "    }\n"
1754                    << "    memoryBarrierShared();\n"
1755                    << "    barrier();\n"
1756                    << "    for (uint i = 0u; i < shMemElementsPerInvocation; ++i) {\n"
1757                    << "        const uint elemIdx = shMemElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
1758                    << "        if (elemIdx < sharedMemoryElements) {\n"
1759                    << "            const uint accessIdx = sharedMemoryElements - 1u - elemIdx;\n"
1760                    << "            sharedElements[accessIdx] += accessIdx;\n" // Read+Write a different element.
1761                    << "        }\n"
1762                    << "    }\n"
1763                    << "    memoryBarrierShared();\n"
1764                    << "    barrier();\n"
1765                    << "    if (gl_LocalInvocationIndex == 0u) {\n"
1766                    << "        bool allOK = true;\n"
1767                    << "        for (uint i = 0u; i < sharedMemoryElements; ++i) {\n"
1768                    << "            if (sharedElements[i] != i*3u + 1000u) {\n"
1769                    << "                allOK = false;\n"
1770                    << "                break;\n"
1771                    << "            }\n"
1772                    << "        }\n"
1773                    << "        result.sharedOK = (allOK ? 1u : 0u);\n"
1774                    << "    }\n"
1775                    << "\n";
1776         meshSharedDataBody = bodyStream.str();
1777     }
1778     else
1779     {
1780         meshSharedDataBody = "    if (gl_LocalInvocationIndex == 0u) {\n"
1781                              "        result.sharedOK = 1u;\n"
1782                              "    }\n";
1783     }
1784 
1785     if (m_params.hasPayload())
1786     {
1787         std::ostringstream task;
1788         task << "#version 450\n"
1789              << "#extension GL_EXT_mesh_shader : enable\n"
1790              << "\n"
1791              << "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
1792              << scDecl << dsDecl << taskData << "\n"
1793              << "void main () {\n"
1794              << taskPayloadBody << "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
1795              << "}\n";
1796         programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1797     }
1798 
1799     std::ostringstream mesh;
1800     mesh << "#version 450\n"
1801          << "#extension GL_EXT_mesh_shader : enable\n"
1802          << "\n"
1803          << "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
1804          << "layout (triangles) out;\n"
1805          << "layout (max_vertices=3, max_primitives=1) out;\n"
1806          << scDecl << dsDecl << taskData << sharedData << "\n"
1807          << "void main () {\n"
1808          << meshSharedDataBody << meshPayloadBody << "    SetMeshOutputsEXT(0u, 0u);\n"
1809          << "}\n";
1810     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1811 }
1812 
1813 enum class LocationType
1814 {
1815     PER_VERTEX,
1816     PER_PRIMITIVE,
1817 };
1818 
1819 enum class ViewIndexType
1820 {
1821     NO_VIEW_INDEX,
1822     VIEW_INDEX_FRAG,
1823     VIEW_INDEX_BOTH,
1824 };
1825 
1826 struct MaxMeshOutputParams
1827 {
1828     bool usePayload;
1829     LocationType locationType;
1830     ViewIndexType viewIndexType;
1831 
isMultiViewvkt::MeshShader::__anon1a407c490111::MaxMeshOutputParams1832     bool isMultiView(void) const
1833     {
1834         return (viewIndexType != ViewIndexType::NO_VIEW_INDEX);
1835     }
1836 
viewIndexInMeshvkt::MeshShader::__anon1a407c490111::MaxMeshOutputParams1837     bool viewIndexInMesh(void) const
1838     {
1839         return (viewIndexType == ViewIndexType::VIEW_INDEX_BOTH);
1840     }
1841 };
1842 
1843 class MaxMeshOutputSizeCase : public vkt::TestCase
1844 {
1845 public:
MaxMeshOutputSizeCase(tcu::TestContext & testCtx,const std::string & name,const MaxMeshOutputParams & params)1846     MaxMeshOutputSizeCase(tcu::TestContext &testCtx, const std::string &name, const MaxMeshOutputParams &params)
1847         : vkt::TestCase(testCtx, name)
1848         , m_params(params)
1849     {
1850     }
~MaxMeshOutputSizeCase(void)1851     virtual ~MaxMeshOutputSizeCase(void)
1852     {
1853     }
1854 
1855     TestInstance *createInstance(Context &context) const override;
1856     void checkSupport(Context &context) const override;
1857     void initPrograms(vk::SourceCollections &programCollection) const override;
1858 
1859     // Small-ish numbers allow for more fine-grained control in the amount of memory, but it can't be too small or we hit the locations limit.
1860     static constexpr uint32_t kMaxPoints = 96u;
1861     static constexpr uint32_t kNumViews  = 2u; // For the multiView case.
1862 
1863 protected:
1864     static constexpr uint32_t kUvec4Size          = 16u; // We'll use 4 scalars at a time in the form of a uvec4.
1865     static constexpr uint32_t kUvec4Comp          = 4u;  // 4 components per uvec4.
1866     static constexpr uint32_t kPayloadElementSize = 4u;  // Each payload element will be a uint.
1867 
1868     struct ParamsFromContext
1869     {
1870         uint32_t payloadElements;
1871         uint32_t locationCount;
1872     };
1873     ParamsFromContext getParamsFromContext(Context &context) const;
1874 
1875     const MaxMeshOutputParams m_params;
1876 };
1877 
1878 class MaxMeshOutputSizeInstance : public SpecConstantInstance
1879 {
1880 public:
MaxMeshOutputSizeInstance(Context & context,SpecConstVector && vec,uint32_t numViews)1881     MaxMeshOutputSizeInstance(Context &context, SpecConstVector &&vec, uint32_t numViews)
1882         : SpecConstantInstance(context, std::move(vec))
1883         , m_numViews(numViews)
1884     {
1885     }
~MaxMeshOutputSizeInstance(void)1886     virtual ~MaxMeshOutputSizeInstance(void)
1887     {
1888     }
1889 
1890     tcu::TestStatus iterate(void) override;
1891 
1892 protected:
1893     Move<VkRenderPass> makeCustomRenderPass(const DeviceInterface &vkd, VkDevice device, uint32_t layerCount,
1894                                             VkFormat format);
1895 
1896     const uint32_t m_numViews;
1897 };
1898 
checkSupport(Context & context) const1899 void MaxMeshOutputSizeCase::checkSupport(Context &context) const
1900 {
1901     checkTaskMeshShaderSupportEXT(context, m_params.usePayload /*requireTask*/, true /*requireMesh*/);
1902 
1903     if (m_params.isMultiView())
1904     {
1905         const auto &multiviewFeatures = context.getMultiviewFeatures();
1906         if (!multiviewFeatures.multiview)
1907             TCU_THROW(NotSupportedError, "Multiview not supported");
1908 
1909         const auto &meshFeatures = context.getMeshShaderFeaturesEXT();
1910         if (!meshFeatures.multiviewMeshShader)
1911             TCU_THROW(NotSupportedError, "Multiview not supported for mesh shaders");
1912 
1913         const auto &meshProperties = context.getMeshShaderPropertiesEXT();
1914         if (meshProperties.maxMeshMultiviewViewCount < kNumViews)
1915             TCU_THROW(NotSupportedError, "maxMeshMultiviewViewCount too low");
1916     }
1917 }
1918 
getParamsFromContext(Context & context) const1919 MaxMeshOutputSizeCase::ParamsFromContext MaxMeshOutputSizeCase::getParamsFromContext(Context &context) const
1920 {
1921     const auto &meshProperties = context.getMeshShaderPropertiesEXT();
1922     const auto maxOutSize =
1923         std::min(meshProperties.maxMeshOutputMemorySize, meshProperties.maxMeshPayloadAndOutputMemorySize);
1924     const auto maxMeshPayloadSize =
1925         std::min(meshProperties.maxMeshPayloadAndSharedMemorySize, meshProperties.maxMeshPayloadAndOutputMemorySize);
1926     const auto maxTaskPayloadSize =
1927         std::min(meshProperties.maxTaskPayloadSize, meshProperties.maxTaskPayloadAndSharedMemorySize);
1928     const auto maxPayloadSize = std::min(maxMeshPayloadSize, maxTaskPayloadSize);
1929     const auto numViewFactor  = (m_params.viewIndexInMesh() ? kNumViews : 1u);
1930 
1931     uint32_t payloadSize;
1932     uint32_t outSize;
1933 
1934     if (m_params.usePayload)
1935     {
1936         const auto totalMax = maxOutSize + maxPayloadSize;
1937 
1938         if (totalMax <= meshProperties.maxMeshPayloadAndOutputMemorySize)
1939         {
1940             payloadSize = maxPayloadSize;
1941             outSize     = maxOutSize;
1942         }
1943         else
1944         {
1945             payloadSize = maxPayloadSize;
1946             outSize     = meshProperties.maxMeshPayloadAndOutputMemorySize - payloadSize;
1947         }
1948     }
1949     else
1950     {
1951         payloadSize = 0u;
1952         outSize     = maxOutSize;
1953     }
1954 
1955     // This uses the equation in "Mesh Shader Output" spec section. Note per-vertex data already has gl_Position and gl_PointSize.
1956     // Also note gl_PointSize uses 1 effective location (4 scalar components) despite being a float.
1957     const auto granularity =
1958         ((m_params.locationType == LocationType::PER_PRIMITIVE) ? meshProperties.meshOutputPerPrimitiveGranularity :
1959                                                                   meshProperties.meshOutputPerVertexGranularity);
1960     const auto actualPoints      = de::roundUp(kMaxPoints, granularity);
1961     const auto sizeMultiplier    = actualPoints * kUvec4Size;
1962     const auto builtinDataSize   = (16u /*gl_Position*/ + 16u /*gl_PointSize*/) * actualPoints;
1963     const auto locationsDataSize = (outSize - builtinDataSize) / numViewFactor;
1964     const auto maxTotalLocations =
1965         meshProperties.maxMeshOutputComponents / kUvec4Comp - 2u; // gl_Position and gl_PointSize use 1 location each.
1966     const auto locationCount = std::min(locationsDataSize / sizeMultiplier, maxTotalLocations);
1967 
1968     ParamsFromContext params;
1969     params.payloadElements = payloadSize / kPayloadElementSize;
1970     params.locationCount   = locationCount;
1971 
1972     auto &log = context.getTestContext().getLog();
1973     {
1974         const auto actualOuputSize = builtinDataSize + locationCount * sizeMultiplier * numViewFactor;
1975 
1976         log << tcu::TestLog::Message << "Payload elements: " << params.payloadElements << tcu::TestLog::EndMessage;
1977         log << tcu::TestLog::Message << "Location count: " << params.locationCount << tcu::TestLog::EndMessage;
1978         log << tcu::TestLog::Message
1979             << "Max mesh payload and output size (bytes): " << meshProperties.maxMeshPayloadAndOutputMemorySize
1980             << tcu::TestLog::EndMessage;
1981         log << tcu::TestLog::Message << "Max output size (bytes): " << maxOutSize << tcu::TestLog::EndMessage;
1982         log << tcu::TestLog::Message << "Payload size (bytes): " << payloadSize << tcu::TestLog::EndMessage;
1983         log << tcu::TestLog::Message << "Output data size (bytes): " << actualOuputSize << tcu::TestLog::EndMessage;
1984         log << tcu::TestLog::Message << "Output + payload size (bytes): " << (payloadSize + actualOuputSize)
1985             << tcu::TestLog::EndMessage;
1986     }
1987 
1988     return params;
1989 }
1990 
createInstance(Context & context) const1991 TestInstance *MaxMeshOutputSizeCase::createInstance(Context &context) const
1992 {
1993     const auto ctxParams = getParamsFromContext(context);
1994     SpecConstVector specConstVec{ctxParams.payloadElements, ctxParams.locationCount};
1995     const auto numViews = (m_params.isMultiView() ? kNumViews : 1u);
1996 
1997     return new MaxMeshOutputSizeInstance(context, std::move(specConstVec), numViews);
1998 }
1999 
initPrograms(vk::SourceCollections & programCollection) const2000 void MaxMeshOutputSizeCase::initPrograms(vk::SourceCollections &programCollection) const
2001 {
2002     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
2003     const std::string locationQualifier =
2004         ((m_params.locationType == LocationType::PER_PRIMITIVE) ? "perprimitiveEXT" : "");
2005     const std::string multiViewExtDecl = "#extension GL_EXT_multiview : enable\n";
2006 
2007     const std::string scDecl = "layout (constant_id=0) const uint payloadElements = 1u;\n"
2008                                "layout (constant_id=1) const uint locationCount = 1u;\n";
2009 
2010     std::string taskPayload;
2011     std::string payloadVerification = "    bool payloadOK = true;\n";
2012     std::string locStruct           = "struct LocationBlock {\n"
2013                                       "    uvec4 elements[locationCount];\n"
2014                                       "};\n";
2015 
2016     if (m_params.usePayload)
2017     {
2018         taskPayload = "struct TaskData {\n"
2019                       "    uint elements[payloadElements];\n"
2020                       "};\n"
2021                       "taskPayloadSharedEXT TaskData td;\n";
2022 
2023         std::ostringstream task;
2024         task << "#version 450\n"
2025              << "#extension GL_EXT_mesh_shader : enable\n"
2026              << "\n"
2027              << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
2028              << scDecl << taskPayload << "\n"
2029              << "void main (void) {\n"
2030              << "    for (uint i = 0; i < payloadElements; ++i) {\n"
2031              << "        td.elements[i] = 1000000u + i;\n"
2032              << "    }\n"
2033              << "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
2034              << "}\n";
2035         programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
2036 
2037         payloadVerification += "    for (uint i = 0; i < payloadElements; ++i) {\n"
2038                                "        if (td.elements[i] != 1000000u + i) {\n"
2039                                "            payloadOK = false;\n"
2040                                "            break;\n"
2041                                "        }\n"
2042                                "    }\n";
2043     }
2044 
2045     // Do values depend on view indices?
2046     const bool valFromViewIndex       = m_params.viewIndexInMesh();
2047     const std::string extraCompOffset = (valFromViewIndex ? "(4u * uint(gl_ViewIndex))" : "0u");
2048 
2049     {
2050         const std::string multiViewExt = (valFromViewIndex ? multiViewExtDecl : "");
2051 
2052         std::ostringstream mesh;
2053         mesh << "#version 450\n"
2054              << "#extension GL_EXT_mesh_shader : enable\n"
2055              << multiViewExt << "\n"
2056              << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
2057              << "layout (points) out;\n"
2058              << "layout (max_vertices=" << kMaxPoints << ", max_primitives=" << kMaxPoints << ") out;\n"
2059              << "\n"
2060              << "out gl_MeshPerVertexEXT {\n"
2061              << "    vec4  gl_Position;\n"
2062              << "    float gl_PointSize;\n"
2063              << "} gl_MeshVerticesEXT[];\n"
2064              << "\n"
2065              << scDecl << taskPayload << "\n"
2066              << locStruct << "layout (location=0) out " << locationQualifier << " LocationBlock loc[];\n"
2067              << "\n"
2068              << "void main (void) {\n"
2069              << payloadVerification << "\n"
2070              << "    SetMeshOutputsEXT(" << kMaxPoints << ", " << kMaxPoints << ");\n"
2071              << "    const uint payloadOffset = (payloadOK ? 10u : 0u);\n"
2072              << "    const uint compOffset = " << extraCompOffset << ";\n"
2073              << "    for (uint pointIdx = 0u; pointIdx < " << kMaxPoints << "; ++pointIdx) {\n"
2074              << "        const float xCoord = ((float(pointIdx) + 0.5) / float(" << kMaxPoints << ")) * 2.0 - 1.0;\n"
2075              << "        gl_MeshVerticesEXT[pointIdx].gl_Position = vec4(xCoord, 0.0, 0.0, 1.0);\n"
2076              << "        gl_MeshVerticesEXT[pointIdx].gl_PointSize = 1.0f;\n"
2077              << "        gl_PrimitivePointIndicesEXT[pointIdx] = pointIdx;\n"
2078              << "        for (uint elemIdx = 0; elemIdx < locationCount; ++elemIdx) {\n"
2079              << "            const uint baseVal = 200000000u + 100000u * pointIdx + 1000u * elemIdx + payloadOffset;\n"
2080              << "            loc[pointIdx].elements[elemIdx] = uvec4(baseVal + 1u + compOffset, baseVal + 2u + "
2081                 "compOffset, baseVal + 3u + compOffset, baseVal + 4u + compOffset);\n"
2082              << "        }\n"
2083              << "    }\n"
2084              << "}\n";
2085         programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
2086     }
2087 
2088     {
2089         const std::string multiViewExt = (m_params.isMultiView() ? multiViewExtDecl : "");
2090         const std::string outColorMod  = (m_params.isMultiView() ? "    outColor.r += float(gl_ViewIndex);\n" : "");
2091 
2092         std::ostringstream frag;
2093         frag << "#version 450\n"
2094              << "#extension GL_EXT_mesh_shader : enable\n"
2095              << multiViewExt << "\n"
2096              << "layout (location=0) out vec4 outColor;\n"
2097              << scDecl << locStruct << "layout (location=0) in flat " << locationQualifier << " LocationBlock loc;\n"
2098              << "\n"
2099              << "void main (void) {\n"
2100              << "    bool pointOK = true;\n"
2101              << "    const uint pointIdx = uint(gl_FragCoord.x);\n"
2102              << "    const uint expectedPayloadOffset = 10u;\n"
2103              << "    const uint compOffset = " << extraCompOffset << ";\n"
2104              << "    for (uint elemIdx = 0; elemIdx < locationCount; ++elemIdx) {\n"
2105              << "        const uint baseVal = 200000000u + 100000u * pointIdx + 1000u * elemIdx + "
2106                 "expectedPayloadOffset;\n"
2107              << "        const uvec4 expectedVal = uvec4(baseVal + 1u + compOffset, baseVal + 2u + compOffset, baseVal "
2108                 "+ 3u + compOffset, baseVal + 4u + compOffset);\n"
2109              << "        if (loc.elements[elemIdx] != expectedVal) {\n"
2110              << "            pointOK = false;\n"
2111              << "            break;\n"
2112              << "        }\n"
2113              << "    }\n"
2114              << "    const vec4 okColor = vec4(0.0, 0.0, 1.0, 1.0);\n"
2115              << "    const vec4 failColor = vec4(0.0, 0.0, 0.0, 1.0);\n"
2116              << "    outColor = (pointOK ? okColor : failColor);\n"
2117              << outColorMod << "}\n";
2118         programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
2119     }
2120 }
2121 
makeCustomRenderPass(const DeviceInterface & vkd,VkDevice device,uint32_t layerCount,VkFormat format)2122 Move<VkRenderPass> MaxMeshOutputSizeInstance::makeCustomRenderPass(const DeviceInterface &vkd, VkDevice device,
2123                                                                    uint32_t layerCount, VkFormat format)
2124 {
2125     DE_ASSERT(layerCount > 0u);
2126 
2127     const VkAttachmentDescription colorAttachmentDescription = {
2128         0u,                                       // VkAttachmentDescriptionFlags    flags
2129         format,                                   // VkFormat                        format
2130         VK_SAMPLE_COUNT_1_BIT,                    // VkSampleCountFlagBits           samples
2131         VK_ATTACHMENT_LOAD_OP_CLEAR,              // VkAttachmentLoadOp              loadOp
2132         VK_ATTACHMENT_STORE_OP_STORE,             // VkAttachmentStoreOp             storeOp
2133         VK_ATTACHMENT_LOAD_OP_DONT_CARE,          // VkAttachmentLoadOp              stencilLoadOp
2134         VK_ATTACHMENT_STORE_OP_DONT_CARE,         // VkAttachmentStoreOp             stencilStoreOp
2135         VK_IMAGE_LAYOUT_UNDEFINED,                // VkImageLayout                   initialLayout
2136         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout                   finalLayout
2137     };
2138 
2139     const VkAttachmentReference colorAttachmentRef =
2140         makeAttachmentReference(0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
2141 
2142     const VkSubpassDescription subpassDescription = {
2143         0u,                              // VkSubpassDescriptionFlags       flags
2144         VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint             pipelineBindPoint
2145         0u,                              // uint32_t                        inputAttachmentCount
2146         nullptr,                         // const VkAttachmentReference*    pInputAttachments
2147         1u,                              // uint32_t                        colorAttachmentCount
2148         &colorAttachmentRef,             // const VkAttachmentReference*    pColorAttachments
2149         nullptr,                         // const VkAttachmentReference*    pResolveAttachments
2150         nullptr,                         // const VkAttachmentReference*    pDepthStencilAttachment
2151         0u,                              // uint32_t                        preserveAttachmentCount
2152         nullptr                          // const uint32_t*                 pPreserveAttachments
2153     };
2154 
2155     const uint32_t viewMask                                   = ((1u << layerCount) - 1u);
2156     const VkRenderPassMultiviewCreateInfo multiviewCreateInfo = {
2157         VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO, // VkStructureType sType;
2158         nullptr,                                             // const void* pNext;
2159         1u,                                                  // uint32_t subpassCount;
2160         &viewMask,                                           // const uint32_t* pViewMasks;
2161         0u,                                                  // uint32_t dependencyCount;
2162         nullptr,                                             // const int32_t* pViewOffsets;
2163         1u,                                                  // uint32_t correlationMaskCount;
2164         &viewMask,                                           // const uint32_t* pCorrelationMasks;
2165     };
2166 
2167     const void *pNext = ((layerCount > 1u) ? &multiviewCreateInfo : nullptr);
2168 
2169     const VkRenderPassCreateInfo renderPassInfo = {
2170         VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType                   sType
2171         pNext,                                     // const void*                       pNext
2172         0u,                                        // VkRenderPassCreateFlags           flags
2173         1u,                                        // uint32_t                          attachmentCount
2174         &colorAttachmentDescription,               // const VkAttachmentDescription*    pAttachments
2175         1u,                                        // uint32_t                          subpassCount
2176         &subpassDescription,                       // const VkSubpassDescription*       pSubpasses
2177         0u,                                        // uint32_t                          dependencyCount
2178         nullptr,                                   // const VkSubpassDependency*        pDependencies
2179     };
2180 
2181     return createRenderPass(vkd, device, &renderPassInfo);
2182 }
2183 
iterate(void)2184 tcu::TestStatus MaxMeshOutputSizeInstance::iterate(void)
2185 {
2186     const auto &vkd       = m_context.getDeviceInterface();
2187     const auto device     = m_context.getDevice();
2188     auto &alloc           = m_context.getDefaultAllocator();
2189     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
2190     const auto queue      = m_context.getUniversalQueue();
2191 
2192     const auto colorFormat    = VK_FORMAT_R8G8B8A8_UNORM;
2193     const auto tcuColorFormat = mapVkFormat(colorFormat);
2194     const auto pixelSize      = static_cast<uint32_t>(tcu::getPixelSize(tcuColorFormat));
2195     const auto colorUsage     = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2196     const auto imageViewType  = ((m_numViews > 1u) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
2197     const auto fbExtent       = makeExtent3D(MaxMeshOutputSizeCase::kMaxPoints, 1u, 1u);
2198     const tcu::IVec3 iExtent3D(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height),
2199                                static_cast<int>(m_numViews));
2200     const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 1.0f);
2201     const tcu::Vec4 expectedColor(0.0f, 0.0f, 1.0f, 1.0f);
2202     const tcu::Vec4 colorThreshold(0.0f, 0.0f, 0.0f, 0.0f);
2203 
2204     // Create color attachment.
2205     const VkImageCreateInfo colorAttachmentCreatInfo = {
2206         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
2207         nullptr,                             // const void* pNext;
2208         0u,                                  // VkImageCreateFlags flags;
2209         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
2210         colorFormat,                         // VkFormat format;
2211         fbExtent,                            // VkExtent3D extent;
2212         1u,                                  // uint32_t mipLevels;
2213         m_numViews,                          // uint32_t arrayLayers;
2214         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
2215         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
2216         colorUsage,                          // VkImageUsageFlags usage;
2217         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
2218         0u,                                  // uint32_t queueFamilyIndexCount;
2219         nullptr,                             // const uint32_t* pQueueFamilyIndices;
2220         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
2221     };
2222     ImageWithMemory colorAttachment(vkd, device, alloc, colorAttachmentCreatInfo, MemoryRequirement::Any);
2223     const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, m_numViews);
2224     const auto colorSRL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, m_numViews);
2225     const auto colorAttachmentView =
2226         makeImageView(vkd, device, colorAttachment.get(), imageViewType, colorFormat, colorSRR);
2227 
2228     // Verification buffer for the color attachment.
2229     DE_ASSERT(fbExtent.depth == 1u);
2230     const auto verificationBufferUsage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
2231     const auto verificationBufferSize =
2232         static_cast<VkDeviceSize>(pixelSize * fbExtent.width * fbExtent.height * m_numViews);
2233     const auto verificationBufferCreateInfo = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
2234     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferCreateInfo,
2235                                         MemoryRequirement::HostVisible);
2236     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
2237     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
2238 
2239     deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
2240 
2241     const auto pipelineLayout = makePipelineLayout(vkd, device);
2242     const auto renderPass     = makeCustomRenderPass(vkd, device, m_numViews, colorFormat);
2243     const auto framebuffer =
2244         makeFramebuffer(vkd, device, renderPass.get(), colorAttachmentView.get(), fbExtent.width, fbExtent.height, 1u);
2245 
2246     const auto &binaries  = m_context.getBinaryCollection();
2247     const bool hasTask    = binaries.contains("task");
2248     const auto taskModule = (hasTask ? createShaderModule(vkd, device, binaries.get("task")) : Move<VkShaderModule>());
2249     const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
2250     const auto fragModule = createShaderModule(vkd, device, binaries.get("frag"));
2251 
2252     const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
2253     const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
2254 
2255     const auto specMap                  = makeSpecializationMap();
2256     const VkSpecializationInfo specInfo = {
2257         static_cast<uint32_t>(specMap.size()), // uint32_t mapEntryCount;
2258         de::dataOrNull(specMap),               // const VkSpecializationMapEntry* pMapEntries;
2259         de::dataSize(m_specConstants),         // size_t dataSize;
2260         de::dataOrNull(m_specConstants),       // const void* pData;
2261     };
2262 
2263     std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
2264     VkPipelineShaderStageCreateInfo stageInfo = {
2265         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
2266         nullptr,                                             // const void* pNext;
2267         0u,                                                  // VkPipelineShaderStageCreateFlags flags;
2268         VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM,                  // VkShaderStageFlagBits stage;
2269         DE_NULL,                                             // VkShaderModule module;
2270         "main",                                              // const char* pName;
2271         &specInfo,                                           // const VkSpecializationInfo* pSpecializationInfo;
2272     };
2273 
2274     if (hasTask)
2275     {
2276         stageInfo.stage  = VK_SHADER_STAGE_TASK_BIT_EXT;
2277         stageInfo.module = taskModule.get();
2278         shaderStages.push_back(stageInfo);
2279     }
2280 
2281     {
2282         stageInfo.stage  = VK_SHADER_STAGE_MESH_BIT_EXT;
2283         stageInfo.module = meshModule.get();
2284         shaderStages.push_back(stageInfo);
2285     }
2286 
2287     {
2288         stageInfo.stage  = VK_SHADER_STAGE_FRAGMENT_BIT;
2289         stageInfo.module = fragModule.get();
2290         shaderStages.push_back(stageInfo);
2291     }
2292 
2293     const auto pipeline = makeGraphicsPipeline(vkd, device, DE_NULL, pipelineLayout.get(), 0u, shaderStages,
2294                                                renderPass.get(), viewports, scissors);
2295 
2296     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
2297     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2298     const auto cmdBuffer    = cmdBufferPtr.get();
2299 
2300     beginCommandBuffer(vkd, cmdBuffer);
2301     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
2302     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
2303     vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
2304     endRenderPass(vkd, cmdBuffer);
2305 
2306     const auto preTransferBarrier = makeImageMemoryBarrier(
2307         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2308         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorAttachment.get(), colorSRR);
2309     cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
2310                                   VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
2311 
2312     const auto copyRegion = makeBufferImageCopy(fbExtent, colorSRL);
2313     vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2314                              verificationBuffer.get(), 1u, &copyRegion);
2315 
2316     const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
2317     cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
2318                              &postTransferBarrier);
2319 
2320     endCommandBuffer(vkd, cmdBuffer);
2321     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2322 
2323     invalidateAlloc(vkd, device, verificationBufferAlloc);
2324     tcu::ConstPixelBufferAccess resultAccess(tcuColorFormat, iExtent3D, verificationBufferData);
2325     tcu::TextureLevel referenceLevel(tcuColorFormat, iExtent3D.x(), iExtent3D.y(), iExtent3D.z());
2326     tcu::PixelBufferAccess referenceAccess = referenceLevel.getAccess();
2327 
2328     for (int z = 0; z < iExtent3D.z(); ++z)
2329     {
2330         const auto layer = tcu::getSubregion(referenceAccess, 0, 0, z, iExtent3D.x(), iExtent3D.y(), 1);
2331         const tcu::Vec4 expectedLayerColor(static_cast<float>(z), expectedColor.y(), expectedColor.z(),
2332                                            expectedColor.w());
2333         tcu::clear(layer, expectedLayerColor);
2334     }
2335 
2336     auto &log = m_context.getTestContext().getLog();
2337     if (!tcu::floatThresholdCompare(log, "Result", "", referenceAccess, resultAccess, colorThreshold,
2338                                     tcu::COMPARE_LOG_ON_ERROR))
2339         TCU_FAIL("Check log for details");
2340 
2341     return tcu::TestStatus::pass("Pass");
2342 }
2343 
2344 } // namespace
2345 
createMeshShaderPropertyTestsEXT(tcu::TestContext & testCtx)2346 tcu::TestCaseGroup *createMeshShaderPropertyTestsEXT(tcu::TestContext &testCtx)
2347 {
2348     using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
2349 
2350     // Tests checking mesh shading properties
2351     GroupPtr mainGroup(new tcu::TestCaseGroup(testCtx, "properties"));
2352 
2353     const struct
2354     {
2355         PayLoadShMemSizeType testType;
2356         const char *name;
2357     } taskPayloadShMemCases[] = {
2358         {PayLoadShMemSizeType::PAYLOAD, "task_payload_size"},
2359         {PayLoadShMemSizeType::SHARED_MEMORY, "task_shared_memory_size"},
2360         {PayLoadShMemSizeType::BOTH, "task_payload_and_shared_memory_size"},
2361     };
2362 
2363     for (const auto &taskPayloadShMemCase : taskPayloadShMemCases)
2364     {
2365         const TaskPayloadShMemSizeParams params{taskPayloadShMemCase.testType};
2366         mainGroup->addChild(new TaskPayloadShMemSizeCase(testCtx, taskPayloadShMemCase.name, params));
2367     }
2368 
2369     mainGroup->addChild(new MaxViewIndexCase(testCtx, "max_view_index"));
2370     mainGroup->addChild(new MaxOutputLayersCase(testCtx, "max_output_layers"));
2371 
2372     const struct
2373     {
2374         MaxPrimVertType limitPrimVertType;
2375         const char *prefix;
2376     } limitPrimVertCases[] = {
2377         {MaxPrimVertType::PRIMITIVES, "max_mesh_output_primitives_"},
2378         {MaxPrimVertType::VERTICES, "max_mesh_output_vertices_"},
2379     };
2380 
2381     const uint32_t itemCounts[] = {256u, 512u, 1024u, 2048u};
2382 
2383     for (const auto &primVertCase : limitPrimVertCases)
2384     {
2385         for (const auto &count : itemCounts)
2386         {
2387             const MaxPrimVertParams params{primVertCase.limitPrimVertType, count};
2388             mainGroup->addChild(
2389                 new MaxMeshOutputPrimVertCase(testCtx, primVertCase.prefix + std::to_string(count), params));
2390         }
2391     }
2392 
2393     mainGroup->addChild(new MaxMeshOutputComponentsCase(testCtx, "max_mesh_output_components"));
2394 
2395     const struct
2396     {
2397         PayLoadShMemSizeType testType;
2398         const char *name;
2399     } meshPayloadShMemCases[] = {
2400         // No actual property for the first one, combines the two properties involving payload size.
2401         {PayLoadShMemSizeType::PAYLOAD, "mesh_payload_size"},
2402         {PayLoadShMemSizeType::SHARED_MEMORY, "mesh_shared_memory_size"},
2403         {PayLoadShMemSizeType::BOTH, "mesh_payload_and_shared_memory_size"},
2404     };
2405     for (const auto &meshPayloadShMemCase : meshPayloadShMemCases)
2406     {
2407         const MeshPayloadShMemSizeParams params{meshPayloadShMemCase.testType};
2408         mainGroup->addChild(new MeshPayloadShMemSizeCase(testCtx, meshPayloadShMemCase.name, params));
2409     }
2410 
2411     const struct
2412     {
2413         bool usePayload;
2414         const char *suffix;
2415     } meshOutputPayloadCases[] = {
2416         {false, "_without_payload"},
2417         {true, "_with_payload"},
2418     };
2419 
2420     const struct
2421     {
2422         LocationType locationType;
2423         const char *suffix;
2424     } locationTypeCases[] = {
2425         {LocationType::PER_PRIMITIVE, "_per_primitive"},
2426         {LocationType::PER_VERTEX, "_per_vertex"},
2427     };
2428 
2429     const struct
2430     {
2431         ViewIndexType viewIndexType;
2432         const char *suffix;
2433     } multiviewCases[] = {
2434         {ViewIndexType::NO_VIEW_INDEX, "_no_view_index"},
2435         {ViewIndexType::VIEW_INDEX_FRAG, "_view_index_in_frag"},
2436         {ViewIndexType::VIEW_INDEX_BOTH, "_view_index_in_mesh_and_frag"},
2437     };
2438 
2439     for (const auto &meshOutputPayloadCase : meshOutputPayloadCases)
2440     {
2441         for (const auto &locationTypeCase : locationTypeCases)
2442         {
2443             for (const auto &multiviewCase : multiviewCases)
2444             {
2445                 const std::string name = std::string("max_mesh_output_size") + meshOutputPayloadCase.suffix +
2446                                          locationTypeCase.suffix + multiviewCase.suffix;
2447                 const MaxMeshOutputParams params = {
2448                     meshOutputPayloadCase.usePayload, // bool usePayload;
2449                     locationTypeCase.locationType,    // LocationType locationType;
2450                     multiviewCase.viewIndexType,      // ViewIndexType viewIndexType;
2451                 };
2452 
2453                 mainGroup->addChild(new MaxMeshOutputSizeCase(testCtx, name, params));
2454             }
2455         }
2456     }
2457 
2458     return mainGroup.release();
2459 }
2460 } // namespace MeshShader
2461 } // namespace vkt
2462