1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2022 The Khronos Group Inc.
6 * Copyright (c) 2022 Valve Corporation.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Mesh Shader Property Tests for VK_EXT_mesh_shader
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktMeshShaderPropertyTestsEXT.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktMeshShaderUtil.hpp"
28
29 #include "vkBufferWithMemory.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkBarrierUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkImageWithMemory.hpp"
36 #include "vkImageUtil.hpp"
37
38 #include "tcuTestLog.hpp"
39 #include "tcuImageCompare.hpp"
40 #include "tcuTextureUtil.hpp"
41
42 #include "deUniquePtr.hpp"
43
44 #include <algorithm>
45 #include <sstream>
46 #include <limits>
47
48 namespace vkt
49 {
50 namespace MeshShader
51 {
52
53 using namespace vk;
54
55 namespace
56 {
57
58 enum class PayLoadShMemSizeType
59 {
60 PAYLOAD = 0,
61 SHARED_MEMORY,
62 BOTH,
63 };
64
65 struct PayloadShMemSizeParams
66 {
67 PayLoadShMemSizeType testType;
68
hasPayloadvkt::MeshShader::__anon1a407c490111::PayloadShMemSizeParams69 bool hasPayload(void) const
70 {
71 return testType != PayLoadShMemSizeType::SHARED_MEMORY;
72 }
hasSharedMemoryvkt::MeshShader::__anon1a407c490111::PayloadShMemSizeParams73 bool hasSharedMemory(void) const
74 {
75 return testType != PayLoadShMemSizeType::PAYLOAD;
76 }
77 };
78
79 using TaskPayloadShMemSizeParams = PayloadShMemSizeParams;
80 using MeshPayloadShMemSizeParams = PayloadShMemSizeParams;
81 using SpecConstVector = std::vector<uint32_t>;
82
83 class TaskPayloadShMemSizeCase : public vkt::TestCase
84 {
85 public:
TaskPayloadShMemSizeCase(tcu::TestContext & testCtx,const std::string & name,const TaskPayloadShMemSizeParams & params)86 TaskPayloadShMemSizeCase(tcu::TestContext &testCtx, const std::string &name,
87 const TaskPayloadShMemSizeParams ¶ms)
88 : vkt::TestCase(testCtx, name)
89 , m_params(params)
90 {
91 }
~TaskPayloadShMemSizeCase(void)92 virtual ~TaskPayloadShMemSizeCase(void)
93 {
94 }
95
96 void checkSupport(Context &context) const override;
97 void initPrograms(vk::SourceCollections &programCollection) const override;
98 TestInstance *createInstance(Context &context) const override;
99
100 protected:
101 // These depend on the context because we need the mesh shading properties to calculate them.
102 struct ParamsFromContext
103 {
104 uint32_t payloadElements;
105 uint32_t sharedMemoryElements;
106 };
107
108 ParamsFromContext getParamsFromContext(Context &context) const;
109
110 const TaskPayloadShMemSizeParams m_params;
111
112 static constexpr uint32_t kElementSize = static_cast<uint32_t>(sizeof(uint32_t));
113 static constexpr uint32_t kLocalInvocations = 128u;
114 };
115
116 class SpecConstantInstance : public vkt::TestInstance
117 {
118 public:
SpecConstantInstance(Context & context,SpecConstVector && vec)119 SpecConstantInstance(Context &context, SpecConstVector &&vec)
120 : vkt::TestInstance(context)
121 , m_specConstants(std::move(vec))
122 {
123 }
~SpecConstantInstance(void)124 virtual ~SpecConstantInstance(void)
125 {
126 }
127
128 protected:
129 std::vector<VkSpecializationMapEntry> makeSpecializationMap(void) const;
130 const SpecConstVector m_specConstants;
131 };
132
makeSpecializationMap(void) const133 std::vector<VkSpecializationMapEntry> SpecConstantInstance::makeSpecializationMap(void) const
134 {
135 std::vector<VkSpecializationMapEntry> entryMap;
136 entryMap.reserve(m_specConstants.size());
137
138 const auto constantSize = sizeof(uint32_t);
139 const auto csU32 = static_cast<uint32_t>(constantSize);
140
141 for (size_t i = 0u; i < m_specConstants.size(); ++i)
142 {
143 const auto id = static_cast<uint32_t>(i);
144
145 const VkSpecializationMapEntry entry = {
146 id, // uint32_t constantID;
147 (csU32 * id), // uint32_t offset;
148 constantSize, // size_t size;
149 };
150 entryMap.push_back(entry);
151 }
152
153 return entryMap;
154 }
155
156 class PayloadShMemSizeInstance : public SpecConstantInstance
157 {
158 public:
PayloadShMemSizeInstance(Context & context,const TaskPayloadShMemSizeParams & params,SpecConstVector && vec)159 PayloadShMemSizeInstance(Context &context, const TaskPayloadShMemSizeParams ¶ms, SpecConstVector &&vec)
160 : SpecConstantInstance(context, std::move(vec))
161 , m_params(params)
162 {
163 }
~PayloadShMemSizeInstance(void)164 virtual ~PayloadShMemSizeInstance(void)
165 {
166 }
167
168 tcu::TestStatus iterate(void) override;
169
170 protected:
171 Move<VkRenderPass> makeCustomRenderPass(const DeviceInterface &vkd, VkDevice device);
172 const TaskPayloadShMemSizeParams m_params;
173 };
174
checkSupport(Context & context) const175 void TaskPayloadShMemSizeCase::checkSupport(Context &context) const
176 {
177 checkTaskMeshShaderSupportEXT(context, true /*requireTask*/, true /*requireMesh*/);
178 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
179
180 const auto &meshProperties = context.getMeshShaderPropertiesEXT();
181 const auto minSize = kLocalInvocations * kElementSize;
182
183 // Note: the min required values for these properties in the spec would pass these checks.
184
185 if (meshProperties.maxTaskPayloadSize < minSize)
186 TCU_FAIL("Invalid maxTaskPayloadSize");
187
188 if (meshProperties.maxTaskSharedMemorySize < minSize)
189 TCU_FAIL("Invalid maxTaskSharedMemorySize");
190
191 if (meshProperties.maxTaskPayloadAndSharedMemorySize < minSize)
192 TCU_FAIL("Invalid maxTaskPayloadAndSharedMemorySize");
193
194 if (meshProperties.maxMeshPayloadAndSharedMemorySize < minSize)
195 TCU_FAIL("Invalid maxMeshPayloadAndSharedMemorySize");
196 }
197
getParamsFromContext(Context & context) const198 TaskPayloadShMemSizeCase::ParamsFromContext TaskPayloadShMemSizeCase::getParamsFromContext(Context &context) const
199 {
200 ParamsFromContext params;
201
202 const auto &meshProperties = context.getMeshShaderPropertiesEXT();
203 const auto maxMeshPayloadSize =
204 std::min(meshProperties.maxMeshPayloadAndOutputMemorySize, meshProperties.maxMeshPayloadAndSharedMemorySize);
205 const auto maxPayloadElements =
206 std::min(meshProperties.maxTaskPayloadSize / kElementSize, maxMeshPayloadSize / kElementSize);
207 const auto maxShMemElements = meshProperties.maxTaskSharedMemorySize / kElementSize;
208 const auto maxTotalElements = meshProperties.maxTaskPayloadAndSharedMemorySize / kElementSize;
209
210 if (m_params.testType == PayLoadShMemSizeType::PAYLOAD)
211 {
212 params.sharedMemoryElements = 0u;
213 params.payloadElements = std::min(maxTotalElements, maxPayloadElements);
214 }
215 else if (m_params.testType == PayLoadShMemSizeType::SHARED_MEMORY)
216 {
217 params.payloadElements = 0u;
218 params.sharedMemoryElements = std::min(maxTotalElements, maxShMemElements);
219 }
220 else
221 {
222 uint32_t *minPtr;
223 uint32_t minVal;
224 uint32_t *maxPtr;
225 uint32_t maxVal;
226
227 // Divide them as evenly as possible getting them as closest as possible to maxTotalElements.
228 if (maxPayloadElements < maxShMemElements)
229 {
230 minPtr = ¶ms.payloadElements;
231 minVal = maxPayloadElements;
232
233 maxPtr = ¶ms.sharedMemoryElements;
234 maxVal = maxShMemElements;
235 }
236 else
237 {
238 minPtr = ¶ms.sharedMemoryElements;
239 minVal = maxShMemElements;
240
241 maxPtr = ¶ms.payloadElements;
242 maxVal = maxPayloadElements;
243 }
244
245 *minPtr = std::min(minVal, maxTotalElements / 2u);
246 *maxPtr = std::min(maxTotalElements - (*minPtr), maxVal);
247 }
248
249 return params;
250 }
251
createInstance(Context & context) const252 TestInstance *TaskPayloadShMemSizeCase::createInstance(Context &context) const
253 {
254 const auto ctxParams = getParamsFromContext(context);
255 SpecConstVector specConstVec{ctxParams.payloadElements, ctxParams.sharedMemoryElements};
256
257 return new PayloadShMemSizeInstance(context, m_params, std::move(specConstVec));
258 }
259
initPrograms(vk::SourceCollections & programCollection) const260 void TaskPayloadShMemSizeCase::initPrograms(vk::SourceCollections &programCollection) const
261 {
262 const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
263
264 const std::string scDecl = "layout (constant_id=0) const uint payloadElements = 1u;\n"
265 "layout (constant_id=1) const uint sharedMemoryElements = 1u;\n";
266
267 const std::string dsDecl = "layout (set=0, binding=0, std430) buffer ResultBlock {\n"
268 " uint sharedOK;\n"
269 " uint payloadOK;\n"
270 "} result;\n";
271
272 std::string taskData;
273 std::string taskPayloadBody;
274 std::string meshPayloadBody;
275
276 if (m_params.hasPayload())
277 {
278 std::ostringstream taskDataStream;
279 taskDataStream << "struct TaskData {\n"
280 << " uint elements[payloadElements];\n"
281 << "};\n"
282 << "taskPayloadSharedEXT TaskData td;\n";
283 taskData = taskDataStream.str();
284
285 std::ostringstream taskBodyStream;
286 taskBodyStream << " const uint payloadElementsPerInvocation = uint(ceil(float(payloadElements) / float("
287 << kLocalInvocations << ")));\n"
288 << " for (uint i = 0u; i < payloadElementsPerInvocation; ++i) {\n"
289 << " const uint elemIdx = payloadElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
290 << " if (elemIdx < payloadElements) {\n"
291 << " td.elements[elemIdx] = elemIdx + 2000u;\n"
292 << " }\n"
293 << " }\n"
294 << "\n";
295 taskPayloadBody = taskBodyStream.str();
296
297 std::ostringstream meshBodyStream;
298 meshBodyStream << " bool allOK = true;\n"
299 << " for (uint i = 0u; i < payloadElements; ++i) {\n"
300 << " if (td.elements[i] != i + 2000u) {\n"
301 << " allOK = false;\n"
302 << " break;\n"
303 << " }\n"
304 << " }\n"
305 << " result.payloadOK = (allOK ? 1u : 0u);\n"
306 << "\n";
307 meshPayloadBody = meshBodyStream.str();
308 }
309 else
310 {
311 meshPayloadBody = " result.payloadOK = 1u;\n";
312 }
313
314 std::string sharedData;
315 std::string taskSharedDataBody;
316
317 if (m_params.hasSharedMemory())
318 {
319 sharedData = "shared uint sharedElements[sharedMemoryElements];\n";
320
321 std::ostringstream bodyStream;
322 bodyStream << " const uint shMemElementsPerInvocation = uint(ceil(float(sharedMemoryElements) / float("
323 << kLocalInvocations << ")));\n"
324 << " for (uint i = 0u; i < shMemElementsPerInvocation; ++i) {\n"
325 << " const uint elemIdx = shMemElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
326 << " if (elemIdx < sharedMemoryElements) {\n"
327 << " sharedElements[elemIdx] = elemIdx * 2u + 1000u;\n" // Write
328 << " }\n"
329 << " }\n"
330 << " memoryBarrierShared();\n"
331 << " barrier();\n"
332 << " for (uint i = 0u; i < shMemElementsPerInvocation; ++i) {\n"
333 << " const uint elemIdx = shMemElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
334 << " if (elemIdx < sharedMemoryElements) {\n"
335 << " const uint accessIdx = sharedMemoryElements - 1u - elemIdx;\n"
336 << " sharedElements[accessIdx] += accessIdx;\n" // Read+Write a different element.
337 << " }\n"
338 << " }\n"
339 << " memoryBarrierShared();\n"
340 << " barrier();\n"
341 << " if (gl_LocalInvocationIndex == 0u) {\n"
342 << " bool allOK = true;\n"
343 << " for (uint i = 0u; i < sharedMemoryElements; ++i) {\n"
344 << " if (sharedElements[i] != i*3u + 1000u) {\n"
345 << " allOK = false;\n"
346 << " break;\n"
347 << " }\n"
348 << " }\n"
349 << " result.sharedOK = (allOK ? 1u : 0u);\n"
350 << " }\n"
351 << "\n";
352 taskSharedDataBody = bodyStream.str();
353 }
354 else
355 {
356 taskSharedDataBody = " if (gl_LocalInvocationIndex == 0u) {\n"
357 " result.sharedOK = 1u;\n"
358 " }\n";
359 }
360
361 std::ostringstream task;
362 task << "#version 450\n"
363 << "#extension GL_EXT_mesh_shader : enable\n"
364 << "\n"
365 << "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
366 << scDecl << dsDecl << taskData << sharedData << "\n"
367 << "void main () {\n"
368 << taskSharedDataBody << taskPayloadBody << " EmitMeshTasksEXT(1u, 1u, 1u);\n"
369 << "}\n";
370 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
371
372 std::ostringstream mesh;
373 mesh << "#version 450\n"
374 << "#extension GL_EXT_mesh_shader : enable\n"
375 << "\n"
376 << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
377 << "layout (triangles) out;\n"
378 << "layout (max_vertices=3, max_primitives=1) out;\n"
379 << scDecl << dsDecl << taskData << "\n"
380 << "void main () {\n"
381 << meshPayloadBody << " SetMeshOutputsEXT(0u, 0u);\n"
382 << "}\n";
383 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
384 }
385
makeCustomRenderPass(const DeviceInterface & vkd,VkDevice device)386 Move<VkRenderPass> PayloadShMemSizeInstance::makeCustomRenderPass(const DeviceInterface &vkd, VkDevice device)
387 {
388 const auto subpassDesc =
389 makeSubpassDescription(0u, VK_PIPELINE_BIND_POINT_GRAPHICS, 0u, nullptr, 0u, nullptr, 0u, nullptr, 0u, nullptr);
390 const auto dependency =
391 makeSubpassDependency(0u, 0u, VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT, VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT,
392 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT, 0u);
393
394 const VkRenderPassCreateInfo renderPassCreateInfo = {
395 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
396 nullptr, // const void* pNext;
397 0u, // VkRenderPassCreateFlags flags;
398 0u, // uint32_t attachmentCount;
399 nullptr, // const VkAttachmentDescription* pAttachments;
400 1u, // uint32_t subpassCount;
401 &subpassDesc, // const VkSubpassDescription* pSubpasses;
402 1u, // uint32_t dependencyCount;
403 &dependency, // const VkSubpassDependency* pDependencies;
404 };
405
406 return createRenderPass(vkd, device, &renderPassCreateInfo);
407 }
408
iterate(void)409 tcu::TestStatus PayloadShMemSizeInstance::iterate(void)
410 {
411 const auto &vkd = m_context.getDeviceInterface();
412 const auto device = m_context.getDevice();
413 auto &alloc = m_context.getDefaultAllocator();
414 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
415 const auto queue = m_context.getUniversalQueue();
416 const auto framebufferExtent = makeExtent2D(1u, 1u);
417 const auto pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
418
419 const auto resultsBufferSize = static_cast<VkDeviceSize>(sizeof(uint32_t) * 2u);
420 const auto resultsBufferDescType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
421 const auto resultsBufferUsage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
422 const auto resultsBufferStages = (VK_SHADER_STAGE_TASK_BIT_EXT | VK_SHADER_STAGE_MESH_BIT_EXT);
423 const auto resultsBufferCreateInfo = makeBufferCreateInfo(resultsBufferSize, resultsBufferUsage);
424 BufferWithMemory resultsBuffer(vkd, device, alloc, resultsBufferCreateInfo, MemoryRequirement::HostVisible);
425 auto &resultsBufferAlloc = resultsBuffer.getAllocation();
426 void *resultsBufferDataPtr = resultsBufferAlloc.getHostPtr();
427
428 deMemset(resultsBufferDataPtr, 0, static_cast<size_t>(resultsBufferSize));
429
430 DescriptorSetLayoutBuilder layoutBuilder;
431 layoutBuilder.addSingleBinding(resultsBufferDescType, resultsBufferStages);
432 const auto setLayout = layoutBuilder.build(vkd, device);
433 const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
434
435 DescriptorPoolBuilder poolBuilder;
436 poolBuilder.addType(resultsBufferDescType);
437 const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
438 const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
439
440 DescriptorSetUpdateBuilder updateBuilder;
441 const auto resultsBufferDescInfo = makeDescriptorBufferInfo(resultsBuffer.get(), 0ull, resultsBufferSize);
442 updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
443 resultsBufferDescType, &resultsBufferDescInfo);
444 updateBuilder.update(vkd, device);
445
446 const auto &binaries = m_context.getBinaryCollection();
447 const auto hasTask = binaries.contains("task");
448 const auto taskShader = (hasTask ? createShaderModule(vkd, device, binaries.get("task")) : Move<VkShaderModule>());
449 const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
450
451 const auto renderPass = makeCustomRenderPass(vkd, device);
452 const auto framebuffer =
453 makeFramebuffer(vkd, device, renderPass.get(), 0u, nullptr, framebufferExtent.width, framebufferExtent.height);
454
455 const std::vector<VkViewport> viewports(1u, makeViewport(framebufferExtent));
456 const std::vector<VkRect2D> scissors(1u, makeRect2D(framebufferExtent));
457
458 const auto specMap = makeSpecializationMap();
459 const VkSpecializationInfo specInfo = {
460 static_cast<uint32_t>(specMap.size()), // uint32_t mapEntryCount;
461 de::dataOrNull(specMap), // const VkSpecializationMapEntry* pMapEntries;
462 de::dataSize(m_specConstants), // size_t dataSize;
463 de::dataOrNull(m_specConstants), // const void* pData;
464 };
465
466 std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
467 VkPipelineShaderStageCreateInfo stageInfo = {
468 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
469 nullptr, // const void* pNext;
470 0u, // VkPipelineShaderStageCreateFlags flags;
471 VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM, // VkShaderStageFlagBits stage;
472 DE_NULL, // VkShaderModule module;
473 "main", // const char* pName;
474 &specInfo, // const VkSpecializationInfo* pSpecializationInfo;
475 };
476
477 if (hasTask)
478 {
479 stageInfo.stage = VK_SHADER_STAGE_TASK_BIT_EXT;
480 stageInfo.module = taskShader.get();
481 shaderStages.push_back(stageInfo);
482 }
483
484 {
485 stageInfo.stage = VK_SHADER_STAGE_MESH_BIT_EXT;
486 stageInfo.module = meshShader.get();
487 shaderStages.push_back(stageInfo);
488 }
489
490 const auto pipeline = makeGraphicsPipeline(vkd, device, DE_NULL, pipelineLayout.get(), 0u, shaderStages,
491 renderPass.get(), viewports, scissors);
492
493 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
494 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
495 const auto cmdBuffer = cmdBufferPtr.get();
496
497 beginCommandBuffer(vkd, cmdBuffer);
498 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u));
499 vkd.cmdBindPipeline(cmdBuffer, pipelineBindPoint, pipeline.get());
500 vkd.cmdBindDescriptorSets(cmdBuffer, pipelineBindPoint, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u,
501 nullptr);
502 vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
503 endRenderPass(vkd, cmdBuffer);
504 {
505 const auto writeToHost = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
506 const auto writeStages = (VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT | VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT);
507 cmdPipelineMemoryBarrier(vkd, cmdBuffer, writeStages, VK_PIPELINE_STAGE_HOST_BIT, &writeToHost);
508 }
509 endCommandBuffer(vkd, cmdBuffer);
510 submitCommandsAndWait(vkd, device, queue, cmdBuffer);
511
512 invalidateAlloc(vkd, device, resultsBufferAlloc);
513 struct
514 {
515 uint32_t sharedOK;
516 uint32_t payloadOK;
517 } resultData;
518 deMemcpy(&resultData, resultsBufferDataPtr, sizeof(resultData));
519
520 if (resultData.sharedOK != 1u)
521 TCU_FAIL("Unexpected shared memory result: " + std::to_string(resultData.sharedOK));
522
523 if (resultData.payloadOK != 1u)
524 TCU_FAIL("Unexpected payload result: " + std::to_string(resultData.payloadOK));
525
526 return tcu::TestStatus::pass("Pass");
527 }
528
529 class MaxViewIndexCase : public vkt::TestCase
530 {
531 public:
MaxViewIndexCase(tcu::TestContext & testCtx,const std::string & name)532 MaxViewIndexCase(tcu::TestContext &testCtx, const std::string &name) : vkt::TestCase(testCtx, name)
533 {
534 }
~MaxViewIndexCase(void)535 virtual ~MaxViewIndexCase(void)
536 {
537 }
538
539 void checkSupport(Context &context) const override;
540 void initPrograms(vk::SourceCollections &programCollection) const override;
541 TestInstance *createInstance(Context &context) const override;
542 };
543
544 class MaxViewIndexInstance : public vkt::TestInstance
545 {
546 public:
MaxViewIndexInstance(Context & context)547 MaxViewIndexInstance(Context &context) : vkt::TestInstance(context)
548 {
549 }
~MaxViewIndexInstance(void)550 virtual ~MaxViewIndexInstance(void)
551 {
552 }
553
554 tcu::TestStatus iterate(void) override;
555 Move<VkRenderPass> makeCustomRenderPass(const DeviceInterface &vkd, VkDevice device, uint32_t layerCount,
556 VkFormat format);
557
558 static constexpr uint32_t kMaxViews = 32u;
559 };
560
checkSupport(Context & context) const561 void MaxViewIndexCase::checkSupport(Context &context) const
562 {
563 checkTaskMeshShaderSupportEXT(context, false /*requireTask*/, true /*requireMesh*/);
564
565 const auto &multiviewFeatures = context.getMultiviewFeatures();
566 if (!multiviewFeatures.multiview)
567 TCU_THROW(NotSupportedError, "Multiview not supported");
568
569 const auto &meshFeatures = context.getMeshShaderFeaturesEXT();
570 if (!meshFeatures.multiviewMeshShader)
571 TCU_THROW(NotSupportedError, "Multiview not supported for mesh shaders");
572 }
573
initPrograms(vk::SourceCollections & programCollection) const574 void MaxViewIndexCase::initPrograms(vk::SourceCollections &programCollection) const
575 {
576 const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
577
578 std::ostringstream mesh;
579 mesh << "#version 450\n"
580 << "#extension GL_EXT_mesh_shader : enable\n"
581 << "\n"
582 << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
583 << "layout (triangles) out;\n"
584 << "layout (max_vertices=3, max_primitives=1) out;\n"
585 << "\n"
586 << "void main (void) {\n"
587 << " SetMeshOutputsEXT(3u, 1u);\n"
588 << "\n"
589 << " gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
590 << " gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0, 3.0, 0.0, 1.0);\n"
591 << " gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
592 << " gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u);\n"
593 << "}\n";
594 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
595
596 std::ostringstream frag;
597 frag << "#version 450\n"
598 << "#extension GL_EXT_mesh_shader : enable\n"
599 << "#extension GL_EXT_multiview : enable\n"
600 << "\n"
601 << "layout (location=0) out uvec4 outColor;\n"
602 << "\n"
603 << "void main (void) {\n"
604 << " outColor = uvec4(uint(gl_ViewIndex) + 1u, 0, 0, 0);\n"
605 << "}\n";
606 programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
607 }
608
createInstance(Context & context) const609 TestInstance *MaxViewIndexCase::createInstance(Context &context) const
610 {
611 return new MaxViewIndexInstance(context);
612 }
613
makeCustomRenderPass(const DeviceInterface & vkd,VkDevice device,uint32_t layerCount,VkFormat format)614 Move<VkRenderPass> MaxViewIndexInstance::makeCustomRenderPass(const DeviceInterface &vkd, VkDevice device,
615 uint32_t layerCount, VkFormat format)
616 {
617 DE_ASSERT(layerCount > 0u);
618
619 const VkAttachmentDescription colorAttachmentDescription = {
620 0u, // VkAttachmentDescriptionFlags flags
621 format, // VkFormat format
622 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples
623 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp
624 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp
625 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp
626 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp
627 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout
628 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout
629 };
630
631 const VkAttachmentReference colorAttachmentRef =
632 makeAttachmentReference(0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
633
634 const VkSubpassDescription subpassDescription = {
635 0u, // VkSubpassDescriptionFlags flags
636 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint
637 0u, // uint32_t inputAttachmentCount
638 nullptr, // const VkAttachmentReference* pInputAttachments
639 1u, // uint32_t colorAttachmentCount
640 &colorAttachmentRef, // const VkAttachmentReference* pColorAttachments
641 nullptr, // const VkAttachmentReference* pResolveAttachments
642 nullptr, // const VkAttachmentReference* pDepthStencilAttachment
643 0u, // uint32_t preserveAttachmentCount
644 nullptr // const uint32_t* pPreserveAttachments
645 };
646
647 const uint32_t viewMask = ((1u << layerCount) - 1u);
648 const VkRenderPassMultiviewCreateInfo multiviewCreateInfo = {
649 VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO, // VkStructureType sType;
650 nullptr, // const void* pNext;
651 1u, // uint32_t subpassCount;
652 &viewMask, // const uint32_t* pViewMasks;
653 0u, // uint32_t dependencyCount;
654 nullptr, // const int32_t* pViewOffsets;
655 1u, // uint32_t correlationMaskCount;
656 &viewMask, // const uint32_t* pCorrelationMasks;
657 };
658
659 const VkRenderPassCreateInfo renderPassInfo = {
660 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType
661 &multiviewCreateInfo, // const void* pNext
662 0u, // VkRenderPassCreateFlags flags
663 1u, // uint32_t attachmentCount
664 &colorAttachmentDescription, // const VkAttachmentDescription* pAttachments
665 1u, // uint32_t subpassCount
666 &subpassDescription, // const VkSubpassDescription* pSubpasses
667 0u, // uint32_t dependencyCount
668 nullptr, // const VkSubpassDependency* pDependencies
669 };
670
671 return createRenderPass(vkd, device, &renderPassInfo);
672 }
673
iterate(void)674 tcu::TestStatus MaxViewIndexInstance::iterate(void)
675 {
676 const auto &vkd = m_context.getDeviceInterface();
677 const auto device = m_context.getDevice();
678 auto &alloc = m_context.getDefaultAllocator();
679 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
680 const auto queue = m_context.getUniversalQueue();
681 const auto &meshProperties = m_context.getMeshShaderPropertiesEXT();
682 const auto maxViews = kMaxViews;
683 const auto numViews = std::min(meshProperties.maxMeshMultiviewViewCount, maxViews);
684 const auto viewType = ((numViews > 1u) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
685 const auto colorFormat = VK_FORMAT_R32_UINT;
686 const auto tcuColorFormat = mapVkFormat(colorFormat);
687 const auto pixelSize = static_cast<uint32_t>(tcu::getPixelSize(tcuColorFormat));
688 const auto colorUsage = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
689 const auto fbExtent = makeExtent3D(8u, 8u, 1u);
690 const tcu::IVec3 iExtent3D(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height),
691 static_cast<int>(numViews));
692 const tcu::UVec4 clearColor(0u, 0u, 0u, 0u);
693
694 // Create color attachment.
695 const VkImageCreateInfo colorAttachmentCreatInfo = {
696 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
697 nullptr, // const void* pNext;
698 0u, // VkImageCreateFlags flags;
699 VK_IMAGE_TYPE_2D, // VkImageType imageType;
700 colorFormat, // VkFormat format;
701 fbExtent, // VkExtent3D extent;
702 1u, // uint32_t mipLevels;
703 numViews, // uint32_t arrayLayers;
704 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
705 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
706 colorUsage, // VkImageUsageFlags usage;
707 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
708 0u, // uint32_t queueFamilyIndexCount;
709 nullptr, // const uint32_t* pQueueFamilyIndices;
710 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
711 };
712 ImageWithMemory colorAttachment(vkd, device, alloc, colorAttachmentCreatInfo, MemoryRequirement::Any);
713 const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numViews);
714 const auto colorSRL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, numViews);
715 const auto colorAttachmentView = makeImageView(vkd, device, colorAttachment.get(), viewType, colorFormat, colorSRR);
716
717 // Verification buffer for the color attachment.
718 DE_ASSERT(fbExtent.depth == 1u);
719 const auto verificationBufferUsage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
720 const auto verificationBufferSize =
721 static_cast<VkDeviceSize>(pixelSize * fbExtent.width * fbExtent.height * numViews);
722 const auto verificationBufferCreateInfo = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
723 BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferCreateInfo,
724 MemoryRequirement::HostVisible);
725 auto &verificationBufferAlloc = verificationBuffer.getAllocation();
726 void *verificationBufferData = verificationBufferAlloc.getHostPtr();
727
728 deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
729
730 const auto pipelineLayout = makePipelineLayout(vkd, device);
731 const auto renderPass = makeCustomRenderPass(vkd, device, numViews, colorFormat);
732 const auto framebuffer =
733 makeFramebuffer(vkd, device, renderPass.get(), colorAttachmentView.get(), fbExtent.width, fbExtent.height, 1u);
734
735 const auto &binaries = m_context.getBinaryCollection();
736 const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
737 const auto fragModule = createShaderModule(vkd, device, binaries.get("frag"));
738
739 const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
740 const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
741
742 const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), DE_NULL, meshModule.get(),
743 fragModule.get(), renderPass.get(), viewports, scissors);
744
745 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
746 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
747 const auto cmdBuffer = cmdBufferPtr.get();
748
749 beginCommandBuffer(vkd, cmdBuffer);
750 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
751 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
752 vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
753 endRenderPass(vkd, cmdBuffer);
754
755 const auto preTransferBarrier = makeImageMemoryBarrier(
756 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
757 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorAttachment.get(), colorSRR);
758 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
759 VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
760
761 const auto copyRegion = makeBufferImageCopy(fbExtent, colorSRL);
762 vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
763 verificationBuffer.get(), 1u, ©Region);
764
765 const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
766 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
767 &postTransferBarrier);
768
769 endCommandBuffer(vkd, cmdBuffer);
770 submitCommandsAndWait(vkd, device, queue, cmdBuffer);
771
772 invalidateAlloc(vkd, device, verificationBufferAlloc);
773 tcu::ConstPixelBufferAccess resultAccess(tcuColorFormat, iExtent3D, verificationBufferData);
774
775 for (int z = 0; z < iExtent3D.z(); ++z)
776 {
777 const tcu::UVec4 expectedPixel(static_cast<uint32_t>(z) + 1u, 0u, 0u, 1u);
778 for (int y = 0; y < iExtent3D.y(); ++y)
779 for (int x = 0; x < iExtent3D.x(); ++x)
780 {
781 const auto resultPixel = resultAccess.getPixelUint(x, y, z);
782 if (resultPixel != expectedPixel)
783 {
784 std::ostringstream msg;
785 msg << "Unexpected pixel value at layer " << z << ": (" << x << ", " << y << ") is " << resultPixel
786 << " while expecting " << expectedPixel;
787 TCU_FAIL(msg.str());
788 }
789 }
790 }
791
792 // QualityWarning if needed.
793 if (meshProperties.maxMeshMultiviewViewCount > maxViews)
794 {
795 const auto maxViewsStr = std::to_string(maxViews);
796 return tcu::TestStatus(QP_TEST_RESULT_QUALITY_WARNING,
797 "Test passed but maxMeshMultiviewViewCount greater than " + maxViewsStr);
798 }
799
800 return tcu::TestStatus::pass("Pass");
801 }
802
803 class MaxOutputLayersCase : public vkt::TestCase
804 {
805 public:
MaxOutputLayersCase(tcu::TestContext & testCtx,const std::string & name)806 MaxOutputLayersCase(tcu::TestContext &testCtx, const std::string &name) : vkt::TestCase(testCtx, name)
807 {
808 }
~MaxOutputLayersCase(void)809 virtual ~MaxOutputLayersCase(void)
810 {
811 }
812
813 TestInstance *createInstance(Context &context) const override;
814 void checkSupport(Context &context) const override;
815 void initPrograms(vk::SourceCollections &programCollection) const override;
816 };
817
818 class MaxOutputLayersInstance : public vkt::TestInstance
819 {
820 public:
MaxOutputLayersInstance(Context & context)821 MaxOutputLayersInstance(Context &context) : vkt::TestInstance(context)
822 {
823 }
~MaxOutputLayersInstance(void)824 virtual ~MaxOutputLayersInstance(void)
825 {
826 }
827
828 tcu::TestStatus iterate(void) override;
829 };
830
createInstance(Context & context) const831 TestInstance *MaxOutputLayersCase::createInstance(Context &context) const
832 {
833 return new MaxOutputLayersInstance(context);
834 }
835
checkSupport(Context & context) const836 void MaxOutputLayersCase::checkSupport(Context &context) const
837 {
838 checkTaskMeshShaderSupportEXT(context, false /*requireTask*/, true /*requireMesh*/);
839 }
840
initPrograms(vk::SourceCollections & programCollection) const841 void MaxOutputLayersCase::initPrograms(vk::SourceCollections &programCollection) const
842 {
843 const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
844
845 std::ostringstream mesh;
846 mesh << "#version 450\n"
847 << "#extension GL_EXT_mesh_shader : enable\n"
848 << "\n"
849 << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
850 << "layout (triangles) out;\n"
851 << "layout (max_vertices=3, max_primitives=1) out;\n"
852 << "\n"
853 << "void main (void) {\n"
854 << " SetMeshOutputsEXT(3u, 1u);\n"
855 << "\n"
856 << " gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
857 << " gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0, 3.0, 0.0, 1.0);\n"
858 << " gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
859 << "\n"
860 << " gl_MeshPrimitivesEXT[0].gl_Layer = int(gl_WorkGroupID.x);\n"
861 << " gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u);\n"
862 << "}\n";
863 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
864
865 std::ostringstream frag;
866 frag << "#version 450\n"
867 << "\n"
868 << "layout (location=0) out uvec4 outColor;\n"
869 << "\n"
870 << "void main (void) {\n"
871 << " outColor = uvec4(uint(gl_Layer) + 1u, 0, 0, 0);\n"
872 << "}\n";
873 programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
874 }
875
iterate(void)876 tcu::TestStatus MaxOutputLayersInstance::iterate(void)
877 {
878 const auto &vki = m_context.getInstanceInterface();
879 const auto &physicalDevice = m_context.getPhysicalDevice();
880 const auto &vkd = m_context.getDeviceInterface();
881 const auto device = m_context.getDevice();
882 auto &alloc = m_context.getDefaultAllocator();
883 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
884 const auto queue = m_context.getUniversalQueue();
885 const auto fbFormat = VK_FORMAT_R32_UINT;
886 const auto imageType = VK_IMAGE_TYPE_2D;
887 const auto tiling = VK_IMAGE_TILING_OPTIMAL;
888 const auto usage = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
889 const auto sampleCount = VK_SAMPLE_COUNT_1_BIT;
890 auto &log = m_context.getTestContext().getLog();
891
892 // Find out how many layers we can actually use.
893 const auto &properties = m_context.getDeviceProperties();
894 const auto &meshProperties = m_context.getMeshShaderPropertiesEXT();
895 const auto formatProperties =
896 getPhysicalDeviceImageFormatProperties(vki, physicalDevice, fbFormat, imageType, tiling, usage, 0u);
897 const auto layerCount = std::min({
898 properties.limits.maxFramebufferLayers,
899 meshProperties.maxMeshOutputLayers,
900 formatProperties.maxArrayLayers,
901 meshProperties.maxMeshWorkGroupCount[0],
902 });
903
904 // This is needed for iExtent3D below.
905 DE_ASSERT(static_cast<uint64_t>(std::numeric_limits<int>::max()) >= static_cast<uint64_t>(layerCount));
906 log << tcu::TestLog::Message << "Using " + std::to_string(layerCount) + " layers" << tcu::TestLog::EndMessage;
907
908 const auto viewType = ((layerCount > 1u) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
909 const auto tcuColorFormat = mapVkFormat(fbFormat);
910 const auto pixelSize = static_cast<uint32_t>(tcu::getPixelSize(tcuColorFormat));
911 const auto fbExtent = makeExtent3D(1u, 1u, 1u);
912 const tcu::IVec3 iExtent3D(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height),
913 static_cast<int>(layerCount));
914 const tcu::UVec4 clearColor(0u, 0u, 0u, 0u);
915
916 // Create color attachment.
917 const VkImageCreateInfo colorAttachmentCreatInfo = {
918 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
919 nullptr, // const void* pNext;
920 0u, // VkImageCreateFlags flags;
921 imageType, // VkImageType imageType;
922 fbFormat, // VkFormat format;
923 fbExtent, // VkExtent3D extent;
924 1u, // uint32_t mipLevels;
925 layerCount, // uint32_t arrayLayers;
926 sampleCount, // VkSampleCountFlagBits samples;
927 tiling, // VkImageTiling tiling;
928 usage, // VkImageUsageFlags usage;
929 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
930 0u, // uint32_t queueFamilyIndexCount;
931 nullptr, // const uint32_t* pQueueFamilyIndices;
932 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
933 };
934 ImageWithMemory colorAttachment(vkd, device, alloc, colorAttachmentCreatInfo, MemoryRequirement::Any);
935 const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, layerCount);
936 const auto colorSRL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, layerCount);
937 const auto colorAttachmentView = makeImageView(vkd, device, colorAttachment.get(), viewType, fbFormat, colorSRR);
938
939 // Verification buffer for the color attachment.
940 DE_ASSERT(fbExtent.depth == 1u);
941 const auto verificationBufferUsage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
942 const auto verificationBufferSize =
943 static_cast<VkDeviceSize>(pixelSize * fbExtent.width * fbExtent.height * layerCount);
944 const auto verificationBufferCreateInfo = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
945 BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferCreateInfo,
946 MemoryRequirement::HostVisible);
947 auto &verificationBufferAlloc = verificationBuffer.getAllocation();
948 void *verificationBufferData = verificationBufferAlloc.getHostPtr();
949
950 deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
951
952 const auto pipelineLayout = makePipelineLayout(vkd, device);
953 const auto renderPass = makeRenderPass(vkd, device, fbFormat);
954 const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorAttachmentView.get(), fbExtent.width,
955 fbExtent.height, layerCount);
956
957 const auto &binaries = m_context.getBinaryCollection();
958 const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
959 const auto fragModule = createShaderModule(vkd, device, binaries.get("frag"));
960
961 const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
962 const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
963
964 const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), DE_NULL, meshModule.get(),
965 fragModule.get(), renderPass.get(), viewports, scissors);
966
967 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
968 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
969 const auto cmdBuffer = cmdBufferPtr.get();
970
971 beginCommandBuffer(vkd, cmdBuffer);
972 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
973 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
974 vkd.cmdDrawMeshTasksEXT(cmdBuffer, layerCount, 1u, 1u);
975 endRenderPass(vkd, cmdBuffer);
976
977 const auto preTransferBarrier = makeImageMemoryBarrier(
978 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
979 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorAttachment.get(), colorSRR);
980 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
981 VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
982
983 const auto copyRegion = makeBufferImageCopy(fbExtent, colorSRL);
984 vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
985 verificationBuffer.get(), 1u, ©Region);
986
987 const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
988 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
989 &postTransferBarrier);
990
991 endCommandBuffer(vkd, cmdBuffer);
992 submitCommandsAndWait(vkd, device, queue, cmdBuffer);
993
994 invalidateAlloc(vkd, device, verificationBufferAlloc);
995 tcu::ConstPixelBufferAccess resultAccess(tcuColorFormat, iExtent3D, verificationBufferData);
996
997 for (int z = 0; z < iExtent3D.z(); ++z)
998 {
999 const tcu::UVec4 expectedPixel(static_cast<uint32_t>(z) + 1u, 0u, 0u, 1u);
1000 for (int y = 0; y < iExtent3D.y(); ++y)
1001 for (int x = 0; x < iExtent3D.x(); ++x)
1002 {
1003 const auto resultPixel = resultAccess.getPixelUint(x, y, z);
1004 if (resultPixel != expectedPixel)
1005 {
1006 std::ostringstream msg;
1007 msg << "Unexpected pixel value at layer " << z << ": (" << x << ", " << y << ") is " << resultPixel
1008 << " while expecting " << expectedPixel;
1009 TCU_FAIL(msg.str());
1010 }
1011 }
1012 }
1013
1014 return tcu::TestStatus::pass("Pass");
1015 }
1016
1017 enum class MaxPrimVertType
1018 {
1019 PRIMITIVES,
1020 VERTICES,
1021 };
1022
1023 struct MaxPrimVertParams
1024 {
1025 MaxPrimVertType testType;
1026 uint32_t itemCount;
1027 };
1028
1029 class MaxMeshOutputPrimVertCase : public vkt::TestCase
1030 {
1031 public:
MaxMeshOutputPrimVertCase(tcu::TestContext & testCtx,const std::string & name,const MaxPrimVertParams & params)1032 MaxMeshOutputPrimVertCase(tcu::TestContext &testCtx, const std::string &name, const MaxPrimVertParams ¶ms)
1033 : vkt::TestCase(testCtx, name)
1034 , m_params(params)
1035 {
1036 }
~MaxMeshOutputPrimVertCase(void)1037 virtual ~MaxMeshOutputPrimVertCase(void)
1038 {
1039 }
1040
1041 void initPrograms(vk::SourceCollections &programCollection) const override;
1042 TestInstance *createInstance(Context &context) const override;
1043 void checkSupport(Context &context) const override;
1044
1045 protected:
1046 static constexpr uint32_t kLocalInvocations = 128u;
1047
1048 const MaxPrimVertParams m_params;
1049 };
1050
1051 class MaxMeshOutputPrimVertInstance : public vkt::TestInstance
1052 {
1053 public:
MaxMeshOutputPrimVertInstance(Context & context,uint32_t shaderPrimitives,uint32_t fbWidth)1054 MaxMeshOutputPrimVertInstance(Context &context, uint32_t shaderPrimitives, uint32_t fbWidth)
1055 : vkt::TestInstance(context)
1056 , m_shaderPrimitives(shaderPrimitives)
1057 , m_fbWidth(fbWidth)
1058 {
1059 DE_ASSERT(m_shaderPrimitives > 0u);
1060 DE_ASSERT(m_fbWidth > 0u);
1061 }
~MaxMeshOutputPrimVertInstance(void)1062 virtual ~MaxMeshOutputPrimVertInstance(void)
1063 {
1064 }
1065
1066 tcu::TestStatus iterate(void) override;
1067
1068 protected:
1069 const uint32_t m_shaderPrimitives;
1070 const uint32_t m_fbWidth;
1071 };
1072
createInstance(Context & context) const1073 TestInstance *MaxMeshOutputPrimVertCase::createInstance(Context &context) const
1074 {
1075 const auto fbWidth = ((m_params.testType == MaxPrimVertType::PRIMITIVES) ? 1u : m_params.itemCount);
1076 return new MaxMeshOutputPrimVertInstance(context, m_params.itemCount, fbWidth);
1077 }
1078
checkSupport(Context & context) const1079 void MaxMeshOutputPrimVertCase::checkSupport(Context &context) const
1080 {
1081 checkTaskMeshShaderSupportEXT(context, false /*requireTask*/, true /*requireMesh*/);
1082 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_FRAGMENT_STORES_AND_ATOMICS);
1083
1084 // Note when testing vertices, due to our usage of points as the primitive type, we are also limited by the number of primitives.
1085
1086 const auto isVertices = (m_params.testType == MaxPrimVertType::VERTICES);
1087 const auto &meshProperties = context.getMeshShaderPropertiesEXT();
1088 const auto &itemLimit = isVertices ?
1089 std::min(meshProperties.maxMeshOutputVertices, meshProperties.maxMeshOutputPrimitives) :
1090 meshProperties.maxMeshOutputPrimitives;
1091
1092 if (m_params.itemCount > itemLimit)
1093 TCU_THROW(NotSupportedError, "Implementation does not support the given amount of items");
1094
1095 // Check memory limits just in case.
1096 uint32_t totalBytes = 0u;
1097 const auto perVertexBytes =
1098 static_cast<uint32_t>(sizeof(tcu::Vec4) + sizeof(float)); // gl_Position and gl_PointSize
1099
1100 if (isVertices)
1101 {
1102 // No per-primitive data in this variant.
1103 const auto actualVertices = de::roundUp(m_params.itemCount, meshProperties.meshOutputPerVertexGranularity);
1104
1105 totalBytes = perVertexBytes * actualVertices;
1106 }
1107 else
1108 {
1109 // Single vertex, but using gl_PrimitiveID in each primitive.
1110 const auto perPrimitiveBytes = static_cast<uint32_t>(sizeof(uint32_t)); // gl_PrimitiveID
1111 const auto actualVertices = de::roundUp(1u, meshProperties.meshOutputPerVertexGranularity);
1112 const auto actualPrimitives = de::roundUp(m_params.itemCount, meshProperties.meshOutputPerPrimitiveGranularity);
1113
1114 totalBytes = perVertexBytes * actualVertices + perPrimitiveBytes * actualPrimitives;
1115 }
1116
1117 if (totalBytes > meshProperties.maxMeshOutputMemorySize)
1118 TCU_THROW(NotSupportedError, "Not enough output memory for this test");
1119 }
1120
initPrograms(vk::SourceCollections & programCollection) const1121 void MaxMeshOutputPrimVertCase::initPrograms(vk::SourceCollections &programCollection) const
1122 {
1123 const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1124 const bool isPrimitives = (m_params.testType == MaxPrimVertType::PRIMITIVES);
1125 const auto associatedVertex = (isPrimitives ? "0u" : "primitiveID");
1126 const auto maxVertices = (isPrimitives ? 1u : m_params.itemCount);
1127 const auto ssboIndex = (isPrimitives ? "gl_PrimitiveID" : "uint(gl_FragCoord.x)");
1128 const auto xCoord = (isPrimitives ? "0.0" : "(float(vertexID) + 0.5) / float(maxVertices) * 2.0 - 1.0");
1129 const auto maxPrimitives = m_params.itemCount;
1130
1131 // When testing vertices, we'll use a wide framebuffer, emit one vertex per pixel and use the fragment coords to index into the
1132 // SSBO. When testing primitives, we'll use a 1x1 framebuffer, emit one single vertex in the center and use the primitive id to
1133 // index into the SSBO.
1134 std::ostringstream frag;
1135 frag << "#version 450\n"
1136 << "\n"
1137 << "layout (set=0, binding=0, std430) buffer OutputBlock {\n"
1138 << " uint flags[];\n"
1139 << "} ssbo;\n"
1140 << "\n"
1141 << "void main (void) {\n"
1142 << " ssbo.flags[" << ssboIndex << "] = 1u;\n"
1143 << "}\n";
1144 programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
1145
1146 std::ostringstream mesh;
1147 mesh << "#version 450\n"
1148 << "#extension GL_EXT_mesh_shader : enable\n"
1149 << "\n"
1150 << "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
1151 << "layout (points) out;\n"
1152 << "layout (max_vertices=" << maxVertices << ", max_primitives=" << maxPrimitives << ") out;\n"
1153 << "\n"
1154 << "out gl_MeshPerVertexEXT {\n"
1155 << " vec4 gl_Position;\n"
1156 << " float gl_PointSize;\n"
1157 << "} gl_MeshVerticesEXT[];\n"
1158 << "\n";
1159
1160 if (isPrimitives)
1161 {
1162 mesh << "perprimitiveEXT out gl_MeshPerPrimitiveEXT {\n"
1163 << " int gl_PrimitiveID;\n"
1164 << "} gl_MeshPrimitivesEXT[];\n"
1165 << "\n";
1166 }
1167
1168 mesh << "void main (void) {\n"
1169 << " const uint localInvs = " << kLocalInvocations << "u;\n"
1170 << " const uint maxVertices = " << maxVertices << "u;\n"
1171 << " const uint maxPoints = " << maxPrimitives << "u;\n"
1172 << " const uint verticesPerInvocation = (maxVertices + localInvs - 1u) / localInvs;\n"
1173 << " const uint primitivesPerInvocation = (maxPoints + localInvs - 1u) / localInvs;\n"
1174 << "\n"
1175 << " SetMeshOutputsEXT(maxVertices, maxPoints);\n"
1176 << "\n"
1177 << " for (uint i = 0u; i < verticesPerInvocation; ++i) {\n"
1178 << " const uint vertexID = gl_LocalInvocationIndex * verticesPerInvocation + i;\n"
1179 << " if (vertexID >= maxVertices) {\n"
1180 << " break;\n"
1181 << " }\n"
1182 << " const float xCoord = " << xCoord << ";\n"
1183 << " gl_MeshVerticesEXT[vertexID].gl_Position = vec4(xCoord, 0.0, 0.0, 1.0);\n"
1184 << " gl_MeshVerticesEXT[vertexID].gl_PointSize = 1.0f;\n"
1185 << " }\n"
1186 << "\n"
1187 << " for (uint i = 0u; i < primitivesPerInvocation; ++i) {\n"
1188 << " const uint primitiveID = gl_LocalInvocationIndex * primitivesPerInvocation + i;\n"
1189 << " if (primitiveID >= maxPoints) {\n"
1190 << " break;\n"
1191 << " }\n"
1192 << (isPrimitives ? " gl_MeshPrimitivesEXT[primitiveID].gl_PrimitiveID = int(primitiveID);\n" : "")
1193 << " gl_PrimitivePointIndicesEXT[primitiveID] = " << associatedVertex << ";\n"
1194 << " }\n"
1195 << "}\n";
1196 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1197 }
1198
iterate(void)1199 tcu::TestStatus MaxMeshOutputPrimVertInstance::iterate(void)
1200 {
1201 const auto &vkd = m_context.getDeviceInterface();
1202 const auto device = m_context.getDevice();
1203 auto &alloc = m_context.getDefaultAllocator();
1204 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
1205 const auto queue = m_context.getUniversalQueue();
1206 const auto fbExtent = makeExtent2D(m_fbWidth, 1u);
1207 const auto bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
1208
1209 const auto ssboSize = static_cast<VkDeviceSize>(sizeof(uint32_t) * m_shaderPrimitives);
1210 const auto ssboUsage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
1211 const auto ssboDescType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1212
1213 const auto ssboCreateInfo = makeBufferCreateInfo(ssboSize, ssboUsage);
1214 BufferWithMemory ssbo(vkd, device, alloc, ssboCreateInfo, MemoryRequirement::HostVisible);
1215 auto &ssboAlloc = ssbo.getAllocation();
1216 void *ssboData = ssboAlloc.getHostPtr();
1217 const auto ssboDescInfo = makeDescriptorBufferInfo(ssbo.get(), 0ull, ssboSize);
1218
1219 // Zero-out SSBO.
1220 deMemset(ssboData, 0, static_cast<size_t>(ssboSize));
1221 flushAlloc(vkd, device, ssboAlloc);
1222
1223 // Descriptor set layout, pool, set and set update.
1224 DescriptorSetLayoutBuilder setLayoutBuilder;
1225 setLayoutBuilder.addSingleBinding(ssboDescType, VK_SHADER_STAGE_FRAGMENT_BIT);
1226 const auto setLayout = setLayoutBuilder.build(vkd, device);
1227
1228 DescriptorPoolBuilder poolBuilder;
1229 poolBuilder.addType(ssboDescType);
1230 const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1231 const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
1232
1233 DescriptorSetUpdateBuilder updateBuilder;
1234 updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), ssboDescType,
1235 &ssboDescInfo);
1236 updateBuilder.update(vkd, device);
1237
1238 // Pipeline layout, render pass and pipeline.
1239 const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
1240 const auto renderPass = makeRenderPass(vkd, device);
1241 const auto framebuffer =
1242 makeFramebuffer(vkd, device, renderPass.get(), 0u, nullptr, fbExtent.width, fbExtent.height);
1243
1244 const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
1245 const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
1246
1247 const auto &binaries = m_context.getBinaryCollection();
1248 const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
1249 const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
1250 const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), DE_NULL, meshShader.get(),
1251 fragShader.get(), renderPass.get(), viewports, scissors);
1252
1253 // Command pool and buffer.
1254 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
1255 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1256 const auto cmdBuffer = cmdBufferPtr.get();
1257
1258 beginCommandBuffer(vkd, cmdBuffer);
1259 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u));
1260 vkd.cmdBindDescriptorSets(cmdBuffer, bindPoint, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
1261 vkd.cmdBindPipeline(cmdBuffer, bindPoint, pipeline.get());
1262 vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1263 endRenderPass(vkd, cmdBuffer);
1264 endCommandBuffer(vkd, cmdBuffer);
1265 submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1266
1267 invalidateAlloc(vkd, device, ssboAlloc);
1268 std::vector<uint32_t> outputFlags(m_shaderPrimitives, 0u);
1269 deMemcpy(outputFlags.data(), ssboData, de::dataSize(outputFlags));
1270
1271 // Verify output SSBO.
1272 bool pass = true;
1273 auto &log = m_context.getTestContext().getLog();
1274
1275 for (size_t i = 0u; i < outputFlags.size(); ++i)
1276 {
1277 if (outputFlags[i] != 1u)
1278 {
1279 std::ostringstream msg;
1280 msg << "Primitive ID " << i << " flag != 1: " << outputFlags[i];
1281 log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
1282 pass = false;
1283 }
1284 }
1285
1286 if (!pass)
1287 TCU_FAIL("Check log for details");
1288
1289 return tcu::TestStatus::pass("Pass");
1290 }
1291
1292 class MaxMeshOutputComponentsCase : public vkt::TestCase
1293 {
1294 public:
MaxMeshOutputComponentsCase(tcu::TestContext & testCtx,const std::string & name)1295 MaxMeshOutputComponentsCase(tcu::TestContext &testCtx, const std::string &name) : vkt::TestCase(testCtx, name)
1296 {
1297 }
1298
~MaxMeshOutputComponentsCase(void)1299 virtual ~MaxMeshOutputComponentsCase(void)
1300 {
1301 }
1302
1303 void initPrograms(vk::SourceCollections &programCollection) const override;
1304 TestInstance *createInstance(Context &context) const override;
1305 void checkSupport(Context &context) const override;
1306
1307 protected:
1308 struct ParamsFromContext
1309 {
1310 uint32_t maxLocations;
1311 };
1312 ParamsFromContext getParamsFromContext(Context &context) const;
1313 };
1314
1315 class MaxMeshOutputComponentsInstance : public SpecConstantInstance
1316 {
1317 public:
MaxMeshOutputComponentsInstance(Context & context,SpecConstVector && scVector)1318 MaxMeshOutputComponentsInstance(Context &context, SpecConstVector &&scVector)
1319 : SpecConstantInstance(context, std::move(scVector))
1320 {
1321 }
1322
~MaxMeshOutputComponentsInstance(void)1323 virtual ~MaxMeshOutputComponentsInstance(void)
1324 {
1325 }
1326
1327 tcu::TestStatus iterate(void) override;
1328 };
1329
getParamsFromContext(Context & context) const1330 MaxMeshOutputComponentsCase::ParamsFromContext MaxMeshOutputComponentsCase::getParamsFromContext(Context &context) const
1331 {
1332 const uint32_t kLocationComponents =
1333 4u; // Each location can handle up to 4 32-bit components (and we'll be using uvec4).
1334 const uint32_t kUsedLocations = 1u; // For gl_Position.
1335 const uint32_t maxLocations =
1336 context.getMeshShaderPropertiesEXT().maxMeshOutputComponents / kLocationComponents - kUsedLocations;
1337
1338 ParamsFromContext params{maxLocations};
1339 return params;
1340 }
1341
checkSupport(Context & context) const1342 void MaxMeshOutputComponentsCase::checkSupport(Context &context) const
1343 {
1344 checkTaskMeshShaderSupportEXT(context, false /*requireTask*/, true /*requireMesh*/);
1345 }
1346
createInstance(Context & context) const1347 TestInstance *MaxMeshOutputComponentsCase::createInstance(Context &context) const
1348 {
1349 const auto ctxParams = getParamsFromContext(context);
1350 SpecConstVector specConstVec{ctxParams.maxLocations};
1351
1352 return new MaxMeshOutputComponentsInstance(context, std::move(specConstVec));
1353 }
1354
initPrograms(vk::SourceCollections & programCollection) const1355 void MaxMeshOutputComponentsCase::initPrograms(vk::SourceCollections &programCollection) const
1356 {
1357 const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1358
1359 const std::string locationStructDecl = "layout (constant_id=0) const uint maxLocations = 1u;\n"
1360 "struct LocationStruct {\n"
1361 " uvec4 location_var[maxLocations];\n"
1362 "};\n";
1363
1364 const std::string declOut =
1365 locationStructDecl + "layout (location=0) perprimitiveEXT flat out LocationStruct ls[];\n";
1366
1367 const std::string declIn = locationStructDecl + "layout (location=0) perprimitiveEXT flat in LocationStruct ls;\n";
1368
1369 std::ostringstream mesh;
1370 mesh << "#version 450\n"
1371 << "#extension GL_EXT_mesh_shader : enable\n"
1372 << "\n"
1373 << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
1374 << "layout (triangles) out;\n"
1375 << "layout (max_vertices=3, max_primitives=1) out;\n"
1376 << "\n"
1377 << "out gl_MeshPerVertexEXT {\n"
1378 << " vec4 gl_Position;\n"
1379 << "} gl_MeshVerticesEXT[];\n"
1380 << "\n"
1381 << declOut << "\n"
1382 << "void main (void) {\n"
1383 << " SetMeshOutputsEXT(3u, 1u);\n"
1384 << " gl_MeshVerticesEXT[0].gl_Position = vec4( 0.0, -0.5, 0.0, 1.0);\n"
1385 << " gl_MeshVerticesEXT[1].gl_Position = vec4(-0.5, 0.5, 0.0, 1.0);\n"
1386 << " gl_MeshVerticesEXT[2].gl_Position = vec4( 0.5, 0.5, 0.0, 1.0);\n"
1387 << " gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u);\n"
1388 << "\n"
1389 << " for (uint i = 0u; i < maxLocations; ++i) {\n"
1390 << " const uint baseVal = 10000u * (i + 1u);\n"
1391 << " const uvec4 expectedValue = uvec4(baseVal + 1u, baseVal + 2u, baseVal + 3u, baseVal + 4u);\n"
1392 << " ls[0].location_var[i] = expectedValue;\n"
1393 << " }\n"
1394 << "}\n";
1395 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1396
1397 std::ostringstream frag;
1398 frag << "#version 450\n"
1399 << "#extension GL_EXT_mesh_shader : enable\n"
1400 << "\n"
1401 << "layout (location=0) out vec4 outColor;\n"
1402 << "\n"
1403 << declIn << "\n"
1404 << "void main (void) {\n"
1405 << " bool success = true;\n"
1406 << " for (uint i = 0u; i < maxLocations; ++i) {\n"
1407 << " const uint baseVal = 10000u * (i + 1u);\n"
1408 << " const uvec4 expectedValue = uvec4(baseVal + 1u, baseVal + 2u, baseVal + 3u, baseVal + 4u);\n"
1409 << " success = success && (ls.location_var[i] == expectedValue);\n"
1410 << " }\n"
1411 << " outColor = (success ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(0.0, 0.0, 0.0, 1.0));\n"
1412 << "}\n";
1413 programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
1414 }
1415
iterate(void)1416 tcu::TestStatus MaxMeshOutputComponentsInstance::iterate(void)
1417 {
1418 const auto &vkd = m_context.getDeviceInterface();
1419 const auto device = m_context.getDevice();
1420 auto &alloc = m_context.getDefaultAllocator();
1421 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
1422 const auto queue = m_context.getUniversalQueue();
1423
1424 const auto colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
1425 const auto tcuColorFormat = mapVkFormat(colorFormat);
1426 const auto pixelSize = static_cast<uint32_t>(tcu::getPixelSize(tcuColorFormat));
1427 const auto colorUsage = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1428 const auto fbExtent = makeExtent3D(1u, 1u, 1u);
1429 const tcu::IVec3 iExtent3D(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height),
1430 static_cast<int>(fbExtent.depth));
1431 const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 1.0f);
1432 const tcu::Vec4 expectedColor(0.0f, 0.0f, 1.0f, 1.0f);
1433 const tcu::Vec4 colorThreshold(0.0f, 0.0f, 0.0f, 0.0f);
1434
1435 // Create color attachment.
1436 const VkImageCreateInfo colorAttachmentCreatInfo = {
1437 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1438 nullptr, // const void* pNext;
1439 0u, // VkImageCreateFlags flags;
1440 VK_IMAGE_TYPE_2D, // VkImageType imageType;
1441 colorFormat, // VkFormat format;
1442 fbExtent, // VkExtent3D extent;
1443 1u, // uint32_t mipLevels;
1444 1u, // uint32_t arrayLayers;
1445 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1446 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
1447 colorUsage, // VkImageUsageFlags usage;
1448 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1449 0u, // uint32_t queueFamilyIndexCount;
1450 nullptr, // const uint32_t* pQueueFamilyIndices;
1451 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1452 };
1453 ImageWithMemory colorAttachment(vkd, device, alloc, colorAttachmentCreatInfo, MemoryRequirement::Any);
1454 const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1455 const auto colorSRL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
1456 const auto colorAttachmentView =
1457 makeImageView(vkd, device, colorAttachment.get(), VK_IMAGE_VIEW_TYPE_2D, colorFormat, colorSRR);
1458
1459 // Verification buffer for the color attachment.
1460 DE_ASSERT(fbExtent.depth == 1u);
1461 const auto verificationBufferUsage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1462 const auto verificationBufferSize =
1463 static_cast<VkDeviceSize>(pixelSize * fbExtent.width * fbExtent.height * fbExtent.depth);
1464 const auto verificationBufferCreateInfo = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
1465 BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferCreateInfo,
1466 MemoryRequirement::HostVisible);
1467 auto &verificationBufferAlloc = verificationBuffer.getAllocation();
1468 void *verificationBufferData = verificationBufferAlloc.getHostPtr();
1469
1470 deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
1471
1472 const auto pipelineLayout = makePipelineLayout(vkd, device);
1473 const auto renderPass = makeRenderPass(vkd, device, colorFormat);
1474 const auto framebuffer =
1475 makeFramebuffer(vkd, device, renderPass.get(), colorAttachmentView.get(), fbExtent.width, fbExtent.height, 1u);
1476
1477 const auto &binaries = m_context.getBinaryCollection();
1478 const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
1479 const auto fragModule = createShaderModule(vkd, device, binaries.get("frag"));
1480
1481 const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
1482 const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
1483
1484 const auto specMap = makeSpecializationMap();
1485 const VkSpecializationInfo specInfo = {
1486 static_cast<uint32_t>(specMap.size()), // uint32_t mapEntryCount;
1487 de::dataOrNull(specMap), // const VkSpecializationMapEntry* pMapEntries;
1488 de::dataSize(m_specConstants), // size_t dataSize;
1489 de::dataOrNull(m_specConstants), // const void* pData;
1490 };
1491
1492 std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
1493 VkPipelineShaderStageCreateInfo stageInfo = {
1494 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
1495 nullptr, // const void* pNext;
1496 0u, // VkPipelineShaderStageCreateFlags flags;
1497 VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM, // VkShaderStageFlagBits stage;
1498 DE_NULL, // VkShaderModule module;
1499 "main", // const char* pName;
1500 &specInfo, // const VkSpecializationInfo* pSpecializationInfo;
1501 };
1502
1503 {
1504 stageInfo.stage = VK_SHADER_STAGE_MESH_BIT_EXT;
1505 stageInfo.module = meshModule.get();
1506 shaderStages.push_back(stageInfo);
1507 }
1508
1509 {
1510 stageInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
1511 stageInfo.module = fragModule.get();
1512 shaderStages.push_back(stageInfo);
1513 }
1514
1515 const auto pipeline = makeGraphicsPipeline(vkd, device, DE_NULL, pipelineLayout.get(), 0u, shaderStages,
1516 renderPass.get(), viewports, scissors);
1517
1518 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
1519 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1520 const auto cmdBuffer = cmdBufferPtr.get();
1521
1522 beginCommandBuffer(vkd, cmdBuffer);
1523 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
1524 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
1525 vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1526 endRenderPass(vkd, cmdBuffer);
1527
1528 const auto preTransferBarrier = makeImageMemoryBarrier(
1529 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1530 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorAttachment.get(), colorSRR);
1531 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1532 VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
1533
1534 const auto copyRegion = makeBufferImageCopy(fbExtent, colorSRL);
1535 vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1536 verificationBuffer.get(), 1u, ©Region);
1537
1538 const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1539 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1540 &postTransferBarrier);
1541
1542 endCommandBuffer(vkd, cmdBuffer);
1543 submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1544
1545 invalidateAlloc(vkd, device, verificationBufferAlloc);
1546 tcu::ConstPixelBufferAccess resultAccess(tcuColorFormat, iExtent3D, verificationBufferData);
1547
1548 auto &log = m_context.getTestContext().getLog();
1549 log << tcu::TestLog::Message << "maxLocations value: " << m_specConstants.at(0u) << tcu::TestLog::EndMessage;
1550 if (!tcu::floatThresholdCompare(log, "Result", "", expectedColor, resultAccess, colorThreshold,
1551 tcu::COMPARE_LOG_ON_ERROR))
1552 TCU_FAIL("Check log for details");
1553
1554 return tcu::TestStatus::pass("Pass");
1555 }
1556
1557 class MeshPayloadShMemSizeCase : public vkt::TestCase
1558 {
1559 public:
MeshPayloadShMemSizeCase(tcu::TestContext & testCtx,const std::string & name,const MeshPayloadShMemSizeParams & params)1560 MeshPayloadShMemSizeCase(tcu::TestContext &testCtx, const std::string &name,
1561 const MeshPayloadShMemSizeParams ¶ms)
1562 : vkt::TestCase(testCtx, name)
1563 , m_params(params)
1564 {
1565 }
~MeshPayloadShMemSizeCase(void)1566 virtual ~MeshPayloadShMemSizeCase(void)
1567 {
1568 }
1569
1570 void checkSupport(Context &context) const override;
1571 void initPrograms(vk::SourceCollections &programCollection) const override;
1572 TestInstance *createInstance(Context &context) const override;
1573
1574 protected:
1575 struct ParamsFromContext
1576 {
1577 uint32_t payloadElements;
1578 uint32_t sharedMemoryElements;
1579 };
1580 ParamsFromContext getParamsFromContext(Context &context) const;
1581
1582 const MeshPayloadShMemSizeParams m_params;
1583
1584 static constexpr uint32_t kElementSize = static_cast<uint32_t>(sizeof(uint32_t));
1585 static constexpr uint32_t kLocalInvocations = 128u;
1586 };
1587
checkSupport(Context & context) const1588 void MeshPayloadShMemSizeCase::checkSupport(Context &context) const
1589 {
1590 const bool requireTask = m_params.hasPayload();
1591
1592 checkTaskMeshShaderSupportEXT(context, requireTask, true /*requireMesh*/);
1593 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
1594
1595 const auto &meshProperties = context.getMeshShaderPropertiesEXT();
1596 const auto minSize = kLocalInvocations * kElementSize;
1597
1598 // Note: the min required values for these properties in the spec would pass these checks.
1599
1600 if (requireTask)
1601 {
1602 if (meshProperties.maxTaskPayloadSize < minSize)
1603 TCU_FAIL("Invalid maxTaskPayloadSize");
1604
1605 if (meshProperties.maxTaskPayloadAndSharedMemorySize < minSize)
1606 TCU_FAIL("Invalid maxTaskPayloadAndSharedMemorySize");
1607 }
1608
1609 if (meshProperties.maxMeshSharedMemorySize < minSize)
1610 TCU_FAIL("Invalid maxMeshSharedMemorySize");
1611
1612 if (meshProperties.maxMeshPayloadAndSharedMemorySize < minSize)
1613 TCU_FAIL("Invalid maxMeshPayloadAndSharedMemorySize");
1614
1615 if (meshProperties.maxMeshPayloadAndOutputMemorySize < minSize)
1616 TCU_FAIL("Invalid maxMeshPayloadAndOutputMemorySize");
1617 }
1618
getParamsFromContext(Context & context) const1619 MeshPayloadShMemSizeCase::ParamsFromContext MeshPayloadShMemSizeCase::getParamsFromContext(Context &context) const
1620 {
1621 ParamsFromContext params;
1622
1623 const auto &meshProperties = context.getMeshShaderPropertiesEXT();
1624 const auto maxTaskPayloadSize =
1625 std::min(meshProperties.maxTaskPayloadAndSharedMemorySize, meshProperties.maxTaskPayloadSize);
1626 const auto maxMeshPayloadSize =
1627 std::min(meshProperties.maxMeshPayloadAndOutputMemorySize, meshProperties.maxMeshPayloadAndSharedMemorySize);
1628 const auto maxPayloadElements = std::min(maxTaskPayloadSize, maxMeshPayloadSize) / kElementSize;
1629 const auto maxShMemElements = meshProperties.maxMeshSharedMemorySize / kElementSize;
1630 const auto maxTotalElements = meshProperties.maxTaskPayloadAndSharedMemorySize / kElementSize;
1631
1632 if (m_params.testType == PayLoadShMemSizeType::PAYLOAD)
1633 {
1634 params.sharedMemoryElements = 0u;
1635 params.payloadElements = std::min(maxTotalElements, maxPayloadElements);
1636 }
1637 else if (m_params.testType == PayLoadShMemSizeType::SHARED_MEMORY)
1638 {
1639 params.payloadElements = 0u;
1640 params.sharedMemoryElements = std::min(maxTotalElements, maxShMemElements);
1641 }
1642 else
1643 {
1644 uint32_t *minPtr;
1645 uint32_t minVal;
1646 uint32_t *maxPtr;
1647 uint32_t maxVal;
1648
1649 // Divide them as evenly as possible getting them as closest as possible to maxTotalElements.
1650 if (maxPayloadElements < maxShMemElements)
1651 {
1652 minPtr = ¶ms.payloadElements;
1653 minVal = maxPayloadElements;
1654
1655 maxPtr = ¶ms.sharedMemoryElements;
1656 maxVal = maxShMemElements;
1657 }
1658 else
1659 {
1660 minPtr = ¶ms.sharedMemoryElements;
1661 minVal = maxShMemElements;
1662
1663 maxPtr = ¶ms.payloadElements;
1664 maxVal = maxPayloadElements;
1665 }
1666
1667 *minPtr = std::min(minVal, maxTotalElements / 2u);
1668 *maxPtr = std::min(maxTotalElements - (*minPtr), maxVal);
1669 }
1670
1671 return params;
1672 }
1673
createInstance(Context & context) const1674 TestInstance *MeshPayloadShMemSizeCase::createInstance(Context &context) const
1675 {
1676 const auto ctxParams = getParamsFromContext(context);
1677 SpecConstVector vec{ctxParams.payloadElements, ctxParams.sharedMemoryElements};
1678
1679 return new PayloadShMemSizeInstance(context, m_params, std::move(vec));
1680 }
1681
initPrograms(vk::SourceCollections & programCollection) const1682 void MeshPayloadShMemSizeCase::initPrograms(vk::SourceCollections &programCollection) const
1683 {
1684 const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1685
1686 const std::string scDecl = "layout (constant_id=0) const uint payloadElements = 1u;\n"
1687 "layout (constant_id=1) const uint sharedMemoryElements = 1u;\n";
1688
1689 const std::string dsDecl = "layout (set=0, binding=0, std430) buffer ResultBlock {\n"
1690 " uint sharedOK;\n"
1691 " uint payloadOK;\n"
1692 "} result;\n";
1693
1694 std::string taskData;
1695 std::string taskPayloadBody;
1696 std::string meshPayloadBody;
1697
1698 if (m_params.hasPayload())
1699 {
1700 std::ostringstream taskDataStream;
1701 taskDataStream << "struct TaskData {\n"
1702 << " uint elements[payloadElements];\n"
1703 << "};\n"
1704 << "taskPayloadSharedEXT TaskData td;\n";
1705 taskData = taskDataStream.str();
1706
1707 std::ostringstream taskBodyStream;
1708 taskBodyStream << " const uint payloadElementsPerInvocation = uint(ceil(float(payloadElements) / float("
1709 << kLocalInvocations << ")));\n"
1710 << " for (uint i = 0u; i < payloadElementsPerInvocation; ++i) {\n"
1711 << " const uint elemIdx = payloadElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
1712 << " if (elemIdx < payloadElements) {\n"
1713 << " td.elements[elemIdx] = elemIdx + 2000u;\n"
1714 << " }\n"
1715 << " }\n"
1716 << "\n";
1717 taskPayloadBody = taskBodyStream.str();
1718
1719 std::ostringstream meshBodyStream;
1720 meshBodyStream << " if (gl_LocalInvocationIndex == 0u) {\n"
1721 << " bool allOK = true;\n"
1722 << " for (uint i = 0u; i < payloadElements; ++i) {\n"
1723 << " if (td.elements[i] != i + 2000u) {\n"
1724 << " allOK = false;\n"
1725 << " break;\n"
1726 << " }\n"
1727 << " }\n"
1728 << " result.payloadOK = (allOK ? 1u : 0u);\n"
1729 << " }\n"
1730 << "\n";
1731 meshPayloadBody = meshBodyStream.str();
1732 }
1733 else
1734 {
1735 meshPayloadBody = " result.payloadOK = 1u;\n";
1736 }
1737
1738 std::string sharedData;
1739 std::string meshSharedDataBody;
1740
1741 if (m_params.hasSharedMemory())
1742 {
1743 sharedData = "shared uint sharedElements[sharedMemoryElements];\n";
1744
1745 std::ostringstream bodyStream;
1746 bodyStream << " const uint shMemElementsPerInvocation = uint(ceil(float(sharedMemoryElements) / float("
1747 << kLocalInvocations << ")));\n"
1748 << " for (uint i = 0u; i < shMemElementsPerInvocation; ++i) {\n"
1749 << " const uint elemIdx = shMemElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
1750 << " if (elemIdx < sharedMemoryElements) {\n"
1751 << " sharedElements[elemIdx] = elemIdx * 2u + 1000u;\n" // Write
1752 << " }\n"
1753 << " }\n"
1754 << " memoryBarrierShared();\n"
1755 << " barrier();\n"
1756 << " for (uint i = 0u; i < shMemElementsPerInvocation; ++i) {\n"
1757 << " const uint elemIdx = shMemElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
1758 << " if (elemIdx < sharedMemoryElements) {\n"
1759 << " const uint accessIdx = sharedMemoryElements - 1u - elemIdx;\n"
1760 << " sharedElements[accessIdx] += accessIdx;\n" // Read+Write a different element.
1761 << " }\n"
1762 << " }\n"
1763 << " memoryBarrierShared();\n"
1764 << " barrier();\n"
1765 << " if (gl_LocalInvocationIndex == 0u) {\n"
1766 << " bool allOK = true;\n"
1767 << " for (uint i = 0u; i < sharedMemoryElements; ++i) {\n"
1768 << " if (sharedElements[i] != i*3u + 1000u) {\n"
1769 << " allOK = false;\n"
1770 << " break;\n"
1771 << " }\n"
1772 << " }\n"
1773 << " result.sharedOK = (allOK ? 1u : 0u);\n"
1774 << " }\n"
1775 << "\n";
1776 meshSharedDataBody = bodyStream.str();
1777 }
1778 else
1779 {
1780 meshSharedDataBody = " if (gl_LocalInvocationIndex == 0u) {\n"
1781 " result.sharedOK = 1u;\n"
1782 " }\n";
1783 }
1784
1785 if (m_params.hasPayload())
1786 {
1787 std::ostringstream task;
1788 task << "#version 450\n"
1789 << "#extension GL_EXT_mesh_shader : enable\n"
1790 << "\n"
1791 << "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
1792 << scDecl << dsDecl << taskData << "\n"
1793 << "void main () {\n"
1794 << taskPayloadBody << " EmitMeshTasksEXT(1u, 1u, 1u);\n"
1795 << "}\n";
1796 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1797 }
1798
1799 std::ostringstream mesh;
1800 mesh << "#version 450\n"
1801 << "#extension GL_EXT_mesh_shader : enable\n"
1802 << "\n"
1803 << "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
1804 << "layout (triangles) out;\n"
1805 << "layout (max_vertices=3, max_primitives=1) out;\n"
1806 << scDecl << dsDecl << taskData << sharedData << "\n"
1807 << "void main () {\n"
1808 << meshSharedDataBody << meshPayloadBody << " SetMeshOutputsEXT(0u, 0u);\n"
1809 << "}\n";
1810 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1811 }
1812
1813 enum class LocationType
1814 {
1815 PER_VERTEX,
1816 PER_PRIMITIVE,
1817 };
1818
1819 enum class ViewIndexType
1820 {
1821 NO_VIEW_INDEX,
1822 VIEW_INDEX_FRAG,
1823 VIEW_INDEX_BOTH,
1824 };
1825
1826 struct MaxMeshOutputParams
1827 {
1828 bool usePayload;
1829 LocationType locationType;
1830 ViewIndexType viewIndexType;
1831
isMultiViewvkt::MeshShader::__anon1a407c490111::MaxMeshOutputParams1832 bool isMultiView(void) const
1833 {
1834 return (viewIndexType != ViewIndexType::NO_VIEW_INDEX);
1835 }
1836
viewIndexInMeshvkt::MeshShader::__anon1a407c490111::MaxMeshOutputParams1837 bool viewIndexInMesh(void) const
1838 {
1839 return (viewIndexType == ViewIndexType::VIEW_INDEX_BOTH);
1840 }
1841 };
1842
1843 class MaxMeshOutputSizeCase : public vkt::TestCase
1844 {
1845 public:
MaxMeshOutputSizeCase(tcu::TestContext & testCtx,const std::string & name,const MaxMeshOutputParams & params)1846 MaxMeshOutputSizeCase(tcu::TestContext &testCtx, const std::string &name, const MaxMeshOutputParams ¶ms)
1847 : vkt::TestCase(testCtx, name)
1848 , m_params(params)
1849 {
1850 }
~MaxMeshOutputSizeCase(void)1851 virtual ~MaxMeshOutputSizeCase(void)
1852 {
1853 }
1854
1855 TestInstance *createInstance(Context &context) const override;
1856 void checkSupport(Context &context) const override;
1857 void initPrograms(vk::SourceCollections &programCollection) const override;
1858
1859 // Small-ish numbers allow for more fine-grained control in the amount of memory, but it can't be too small or we hit the locations limit.
1860 static constexpr uint32_t kMaxPoints = 96u;
1861 static constexpr uint32_t kNumViews = 2u; // For the multiView case.
1862
1863 protected:
1864 static constexpr uint32_t kUvec4Size = 16u; // We'll use 4 scalars at a time in the form of a uvec4.
1865 static constexpr uint32_t kUvec4Comp = 4u; // 4 components per uvec4.
1866 static constexpr uint32_t kPayloadElementSize = 4u; // Each payload element will be a uint.
1867
1868 struct ParamsFromContext
1869 {
1870 uint32_t payloadElements;
1871 uint32_t locationCount;
1872 };
1873 ParamsFromContext getParamsFromContext(Context &context) const;
1874
1875 const MaxMeshOutputParams m_params;
1876 };
1877
1878 class MaxMeshOutputSizeInstance : public SpecConstantInstance
1879 {
1880 public:
MaxMeshOutputSizeInstance(Context & context,SpecConstVector && vec,uint32_t numViews)1881 MaxMeshOutputSizeInstance(Context &context, SpecConstVector &&vec, uint32_t numViews)
1882 : SpecConstantInstance(context, std::move(vec))
1883 , m_numViews(numViews)
1884 {
1885 }
~MaxMeshOutputSizeInstance(void)1886 virtual ~MaxMeshOutputSizeInstance(void)
1887 {
1888 }
1889
1890 tcu::TestStatus iterate(void) override;
1891
1892 protected:
1893 Move<VkRenderPass> makeCustomRenderPass(const DeviceInterface &vkd, VkDevice device, uint32_t layerCount,
1894 VkFormat format);
1895
1896 const uint32_t m_numViews;
1897 };
1898
checkSupport(Context & context) const1899 void MaxMeshOutputSizeCase::checkSupport(Context &context) const
1900 {
1901 checkTaskMeshShaderSupportEXT(context, m_params.usePayload /*requireTask*/, true /*requireMesh*/);
1902
1903 if (m_params.isMultiView())
1904 {
1905 const auto &multiviewFeatures = context.getMultiviewFeatures();
1906 if (!multiviewFeatures.multiview)
1907 TCU_THROW(NotSupportedError, "Multiview not supported");
1908
1909 const auto &meshFeatures = context.getMeshShaderFeaturesEXT();
1910 if (!meshFeatures.multiviewMeshShader)
1911 TCU_THROW(NotSupportedError, "Multiview not supported for mesh shaders");
1912
1913 const auto &meshProperties = context.getMeshShaderPropertiesEXT();
1914 if (meshProperties.maxMeshMultiviewViewCount < kNumViews)
1915 TCU_THROW(NotSupportedError, "maxMeshMultiviewViewCount too low");
1916 }
1917 }
1918
getParamsFromContext(Context & context) const1919 MaxMeshOutputSizeCase::ParamsFromContext MaxMeshOutputSizeCase::getParamsFromContext(Context &context) const
1920 {
1921 const auto &meshProperties = context.getMeshShaderPropertiesEXT();
1922 const auto maxOutSize =
1923 std::min(meshProperties.maxMeshOutputMemorySize, meshProperties.maxMeshPayloadAndOutputMemorySize);
1924 const auto maxMeshPayloadSize =
1925 std::min(meshProperties.maxMeshPayloadAndSharedMemorySize, meshProperties.maxMeshPayloadAndOutputMemorySize);
1926 const auto maxTaskPayloadSize =
1927 std::min(meshProperties.maxTaskPayloadSize, meshProperties.maxTaskPayloadAndSharedMemorySize);
1928 const auto maxPayloadSize = std::min(maxMeshPayloadSize, maxTaskPayloadSize);
1929 const auto numViewFactor = (m_params.viewIndexInMesh() ? kNumViews : 1u);
1930
1931 uint32_t payloadSize;
1932 uint32_t outSize;
1933
1934 if (m_params.usePayload)
1935 {
1936 const auto totalMax = maxOutSize + maxPayloadSize;
1937
1938 if (totalMax <= meshProperties.maxMeshPayloadAndOutputMemorySize)
1939 {
1940 payloadSize = maxPayloadSize;
1941 outSize = maxOutSize;
1942 }
1943 else
1944 {
1945 payloadSize = maxPayloadSize;
1946 outSize = meshProperties.maxMeshPayloadAndOutputMemorySize - payloadSize;
1947 }
1948 }
1949 else
1950 {
1951 payloadSize = 0u;
1952 outSize = maxOutSize;
1953 }
1954
1955 // This uses the equation in "Mesh Shader Output" spec section. Note per-vertex data already has gl_Position and gl_PointSize.
1956 // Also note gl_PointSize uses 1 effective location (4 scalar components) despite being a float.
1957 const auto granularity =
1958 ((m_params.locationType == LocationType::PER_PRIMITIVE) ? meshProperties.meshOutputPerPrimitiveGranularity :
1959 meshProperties.meshOutputPerVertexGranularity);
1960 const auto actualPoints = de::roundUp(kMaxPoints, granularity);
1961 const auto sizeMultiplier = actualPoints * kUvec4Size;
1962 const auto builtinDataSize = (16u /*gl_Position*/ + 16u /*gl_PointSize*/) * actualPoints;
1963 const auto locationsDataSize = (outSize - builtinDataSize) / numViewFactor;
1964 const auto maxTotalLocations =
1965 meshProperties.maxMeshOutputComponents / kUvec4Comp - 2u; // gl_Position and gl_PointSize use 1 location each.
1966 const auto locationCount = std::min(locationsDataSize / sizeMultiplier, maxTotalLocations);
1967
1968 ParamsFromContext params;
1969 params.payloadElements = payloadSize / kPayloadElementSize;
1970 params.locationCount = locationCount;
1971
1972 auto &log = context.getTestContext().getLog();
1973 {
1974 const auto actualOuputSize = builtinDataSize + locationCount * sizeMultiplier * numViewFactor;
1975
1976 log << tcu::TestLog::Message << "Payload elements: " << params.payloadElements << tcu::TestLog::EndMessage;
1977 log << tcu::TestLog::Message << "Location count: " << params.locationCount << tcu::TestLog::EndMessage;
1978 log << tcu::TestLog::Message
1979 << "Max mesh payload and output size (bytes): " << meshProperties.maxMeshPayloadAndOutputMemorySize
1980 << tcu::TestLog::EndMessage;
1981 log << tcu::TestLog::Message << "Max output size (bytes): " << maxOutSize << tcu::TestLog::EndMessage;
1982 log << tcu::TestLog::Message << "Payload size (bytes): " << payloadSize << tcu::TestLog::EndMessage;
1983 log << tcu::TestLog::Message << "Output data size (bytes): " << actualOuputSize << tcu::TestLog::EndMessage;
1984 log << tcu::TestLog::Message << "Output + payload size (bytes): " << (payloadSize + actualOuputSize)
1985 << tcu::TestLog::EndMessage;
1986 }
1987
1988 return params;
1989 }
1990
createInstance(Context & context) const1991 TestInstance *MaxMeshOutputSizeCase::createInstance(Context &context) const
1992 {
1993 const auto ctxParams = getParamsFromContext(context);
1994 SpecConstVector specConstVec{ctxParams.payloadElements, ctxParams.locationCount};
1995 const auto numViews = (m_params.isMultiView() ? kNumViews : 1u);
1996
1997 return new MaxMeshOutputSizeInstance(context, std::move(specConstVec), numViews);
1998 }
1999
initPrograms(vk::SourceCollections & programCollection) const2000 void MaxMeshOutputSizeCase::initPrograms(vk::SourceCollections &programCollection) const
2001 {
2002 const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
2003 const std::string locationQualifier =
2004 ((m_params.locationType == LocationType::PER_PRIMITIVE) ? "perprimitiveEXT" : "");
2005 const std::string multiViewExtDecl = "#extension GL_EXT_multiview : enable\n";
2006
2007 const std::string scDecl = "layout (constant_id=0) const uint payloadElements = 1u;\n"
2008 "layout (constant_id=1) const uint locationCount = 1u;\n";
2009
2010 std::string taskPayload;
2011 std::string payloadVerification = " bool payloadOK = true;\n";
2012 std::string locStruct = "struct LocationBlock {\n"
2013 " uvec4 elements[locationCount];\n"
2014 "};\n";
2015
2016 if (m_params.usePayload)
2017 {
2018 taskPayload = "struct TaskData {\n"
2019 " uint elements[payloadElements];\n"
2020 "};\n"
2021 "taskPayloadSharedEXT TaskData td;\n";
2022
2023 std::ostringstream task;
2024 task << "#version 450\n"
2025 << "#extension GL_EXT_mesh_shader : enable\n"
2026 << "\n"
2027 << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
2028 << scDecl << taskPayload << "\n"
2029 << "void main (void) {\n"
2030 << " for (uint i = 0; i < payloadElements; ++i) {\n"
2031 << " td.elements[i] = 1000000u + i;\n"
2032 << " }\n"
2033 << " EmitMeshTasksEXT(1u, 1u, 1u);\n"
2034 << "}\n";
2035 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
2036
2037 payloadVerification += " for (uint i = 0; i < payloadElements; ++i) {\n"
2038 " if (td.elements[i] != 1000000u + i) {\n"
2039 " payloadOK = false;\n"
2040 " break;\n"
2041 " }\n"
2042 " }\n";
2043 }
2044
2045 // Do values depend on view indices?
2046 const bool valFromViewIndex = m_params.viewIndexInMesh();
2047 const std::string extraCompOffset = (valFromViewIndex ? "(4u * uint(gl_ViewIndex))" : "0u");
2048
2049 {
2050 const std::string multiViewExt = (valFromViewIndex ? multiViewExtDecl : "");
2051
2052 std::ostringstream mesh;
2053 mesh << "#version 450\n"
2054 << "#extension GL_EXT_mesh_shader : enable\n"
2055 << multiViewExt << "\n"
2056 << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
2057 << "layout (points) out;\n"
2058 << "layout (max_vertices=" << kMaxPoints << ", max_primitives=" << kMaxPoints << ") out;\n"
2059 << "\n"
2060 << "out gl_MeshPerVertexEXT {\n"
2061 << " vec4 gl_Position;\n"
2062 << " float gl_PointSize;\n"
2063 << "} gl_MeshVerticesEXT[];\n"
2064 << "\n"
2065 << scDecl << taskPayload << "\n"
2066 << locStruct << "layout (location=0) out " << locationQualifier << " LocationBlock loc[];\n"
2067 << "\n"
2068 << "void main (void) {\n"
2069 << payloadVerification << "\n"
2070 << " SetMeshOutputsEXT(" << kMaxPoints << ", " << kMaxPoints << ");\n"
2071 << " const uint payloadOffset = (payloadOK ? 10u : 0u);\n"
2072 << " const uint compOffset = " << extraCompOffset << ";\n"
2073 << " for (uint pointIdx = 0u; pointIdx < " << kMaxPoints << "; ++pointIdx) {\n"
2074 << " const float xCoord = ((float(pointIdx) + 0.5) / float(" << kMaxPoints << ")) * 2.0 - 1.0;\n"
2075 << " gl_MeshVerticesEXT[pointIdx].gl_Position = vec4(xCoord, 0.0, 0.0, 1.0);\n"
2076 << " gl_MeshVerticesEXT[pointIdx].gl_PointSize = 1.0f;\n"
2077 << " gl_PrimitivePointIndicesEXT[pointIdx] = pointIdx;\n"
2078 << " for (uint elemIdx = 0; elemIdx < locationCount; ++elemIdx) {\n"
2079 << " const uint baseVal = 200000000u + 100000u * pointIdx + 1000u * elemIdx + payloadOffset;\n"
2080 << " loc[pointIdx].elements[elemIdx] = uvec4(baseVal + 1u + compOffset, baseVal + 2u + "
2081 "compOffset, baseVal + 3u + compOffset, baseVal + 4u + compOffset);\n"
2082 << " }\n"
2083 << " }\n"
2084 << "}\n";
2085 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
2086 }
2087
2088 {
2089 const std::string multiViewExt = (m_params.isMultiView() ? multiViewExtDecl : "");
2090 const std::string outColorMod = (m_params.isMultiView() ? " outColor.r += float(gl_ViewIndex);\n" : "");
2091
2092 std::ostringstream frag;
2093 frag << "#version 450\n"
2094 << "#extension GL_EXT_mesh_shader : enable\n"
2095 << multiViewExt << "\n"
2096 << "layout (location=0) out vec4 outColor;\n"
2097 << scDecl << locStruct << "layout (location=0) in flat " << locationQualifier << " LocationBlock loc;\n"
2098 << "\n"
2099 << "void main (void) {\n"
2100 << " bool pointOK = true;\n"
2101 << " const uint pointIdx = uint(gl_FragCoord.x);\n"
2102 << " const uint expectedPayloadOffset = 10u;\n"
2103 << " const uint compOffset = " << extraCompOffset << ";\n"
2104 << " for (uint elemIdx = 0; elemIdx < locationCount; ++elemIdx) {\n"
2105 << " const uint baseVal = 200000000u + 100000u * pointIdx + 1000u * elemIdx + "
2106 "expectedPayloadOffset;\n"
2107 << " const uvec4 expectedVal = uvec4(baseVal + 1u + compOffset, baseVal + 2u + compOffset, baseVal "
2108 "+ 3u + compOffset, baseVal + 4u + compOffset);\n"
2109 << " if (loc.elements[elemIdx] != expectedVal) {\n"
2110 << " pointOK = false;\n"
2111 << " break;\n"
2112 << " }\n"
2113 << " }\n"
2114 << " const vec4 okColor = vec4(0.0, 0.0, 1.0, 1.0);\n"
2115 << " const vec4 failColor = vec4(0.0, 0.0, 0.0, 1.0);\n"
2116 << " outColor = (pointOK ? okColor : failColor);\n"
2117 << outColorMod << "}\n";
2118 programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
2119 }
2120 }
2121
makeCustomRenderPass(const DeviceInterface & vkd,VkDevice device,uint32_t layerCount,VkFormat format)2122 Move<VkRenderPass> MaxMeshOutputSizeInstance::makeCustomRenderPass(const DeviceInterface &vkd, VkDevice device,
2123 uint32_t layerCount, VkFormat format)
2124 {
2125 DE_ASSERT(layerCount > 0u);
2126
2127 const VkAttachmentDescription colorAttachmentDescription = {
2128 0u, // VkAttachmentDescriptionFlags flags
2129 format, // VkFormat format
2130 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples
2131 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp
2132 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp
2133 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp
2134 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp
2135 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout
2136 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout
2137 };
2138
2139 const VkAttachmentReference colorAttachmentRef =
2140 makeAttachmentReference(0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
2141
2142 const VkSubpassDescription subpassDescription = {
2143 0u, // VkSubpassDescriptionFlags flags
2144 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint
2145 0u, // uint32_t inputAttachmentCount
2146 nullptr, // const VkAttachmentReference* pInputAttachments
2147 1u, // uint32_t colorAttachmentCount
2148 &colorAttachmentRef, // const VkAttachmentReference* pColorAttachments
2149 nullptr, // const VkAttachmentReference* pResolveAttachments
2150 nullptr, // const VkAttachmentReference* pDepthStencilAttachment
2151 0u, // uint32_t preserveAttachmentCount
2152 nullptr // const uint32_t* pPreserveAttachments
2153 };
2154
2155 const uint32_t viewMask = ((1u << layerCount) - 1u);
2156 const VkRenderPassMultiviewCreateInfo multiviewCreateInfo = {
2157 VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO, // VkStructureType sType;
2158 nullptr, // const void* pNext;
2159 1u, // uint32_t subpassCount;
2160 &viewMask, // const uint32_t* pViewMasks;
2161 0u, // uint32_t dependencyCount;
2162 nullptr, // const int32_t* pViewOffsets;
2163 1u, // uint32_t correlationMaskCount;
2164 &viewMask, // const uint32_t* pCorrelationMasks;
2165 };
2166
2167 const void *pNext = ((layerCount > 1u) ? &multiviewCreateInfo : nullptr);
2168
2169 const VkRenderPassCreateInfo renderPassInfo = {
2170 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType
2171 pNext, // const void* pNext
2172 0u, // VkRenderPassCreateFlags flags
2173 1u, // uint32_t attachmentCount
2174 &colorAttachmentDescription, // const VkAttachmentDescription* pAttachments
2175 1u, // uint32_t subpassCount
2176 &subpassDescription, // const VkSubpassDescription* pSubpasses
2177 0u, // uint32_t dependencyCount
2178 nullptr, // const VkSubpassDependency* pDependencies
2179 };
2180
2181 return createRenderPass(vkd, device, &renderPassInfo);
2182 }
2183
iterate(void)2184 tcu::TestStatus MaxMeshOutputSizeInstance::iterate(void)
2185 {
2186 const auto &vkd = m_context.getDeviceInterface();
2187 const auto device = m_context.getDevice();
2188 auto &alloc = m_context.getDefaultAllocator();
2189 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
2190 const auto queue = m_context.getUniversalQueue();
2191
2192 const auto colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
2193 const auto tcuColorFormat = mapVkFormat(colorFormat);
2194 const auto pixelSize = static_cast<uint32_t>(tcu::getPixelSize(tcuColorFormat));
2195 const auto colorUsage = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2196 const auto imageViewType = ((m_numViews > 1u) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
2197 const auto fbExtent = makeExtent3D(MaxMeshOutputSizeCase::kMaxPoints, 1u, 1u);
2198 const tcu::IVec3 iExtent3D(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height),
2199 static_cast<int>(m_numViews));
2200 const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 1.0f);
2201 const tcu::Vec4 expectedColor(0.0f, 0.0f, 1.0f, 1.0f);
2202 const tcu::Vec4 colorThreshold(0.0f, 0.0f, 0.0f, 0.0f);
2203
2204 // Create color attachment.
2205 const VkImageCreateInfo colorAttachmentCreatInfo = {
2206 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
2207 nullptr, // const void* pNext;
2208 0u, // VkImageCreateFlags flags;
2209 VK_IMAGE_TYPE_2D, // VkImageType imageType;
2210 colorFormat, // VkFormat format;
2211 fbExtent, // VkExtent3D extent;
2212 1u, // uint32_t mipLevels;
2213 m_numViews, // uint32_t arrayLayers;
2214 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
2215 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
2216 colorUsage, // VkImageUsageFlags usage;
2217 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
2218 0u, // uint32_t queueFamilyIndexCount;
2219 nullptr, // const uint32_t* pQueueFamilyIndices;
2220 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
2221 };
2222 ImageWithMemory colorAttachment(vkd, device, alloc, colorAttachmentCreatInfo, MemoryRequirement::Any);
2223 const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, m_numViews);
2224 const auto colorSRL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, m_numViews);
2225 const auto colorAttachmentView =
2226 makeImageView(vkd, device, colorAttachment.get(), imageViewType, colorFormat, colorSRR);
2227
2228 // Verification buffer for the color attachment.
2229 DE_ASSERT(fbExtent.depth == 1u);
2230 const auto verificationBufferUsage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
2231 const auto verificationBufferSize =
2232 static_cast<VkDeviceSize>(pixelSize * fbExtent.width * fbExtent.height * m_numViews);
2233 const auto verificationBufferCreateInfo = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
2234 BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferCreateInfo,
2235 MemoryRequirement::HostVisible);
2236 auto &verificationBufferAlloc = verificationBuffer.getAllocation();
2237 void *verificationBufferData = verificationBufferAlloc.getHostPtr();
2238
2239 deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
2240
2241 const auto pipelineLayout = makePipelineLayout(vkd, device);
2242 const auto renderPass = makeCustomRenderPass(vkd, device, m_numViews, colorFormat);
2243 const auto framebuffer =
2244 makeFramebuffer(vkd, device, renderPass.get(), colorAttachmentView.get(), fbExtent.width, fbExtent.height, 1u);
2245
2246 const auto &binaries = m_context.getBinaryCollection();
2247 const bool hasTask = binaries.contains("task");
2248 const auto taskModule = (hasTask ? createShaderModule(vkd, device, binaries.get("task")) : Move<VkShaderModule>());
2249 const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
2250 const auto fragModule = createShaderModule(vkd, device, binaries.get("frag"));
2251
2252 const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
2253 const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
2254
2255 const auto specMap = makeSpecializationMap();
2256 const VkSpecializationInfo specInfo = {
2257 static_cast<uint32_t>(specMap.size()), // uint32_t mapEntryCount;
2258 de::dataOrNull(specMap), // const VkSpecializationMapEntry* pMapEntries;
2259 de::dataSize(m_specConstants), // size_t dataSize;
2260 de::dataOrNull(m_specConstants), // const void* pData;
2261 };
2262
2263 std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
2264 VkPipelineShaderStageCreateInfo stageInfo = {
2265 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
2266 nullptr, // const void* pNext;
2267 0u, // VkPipelineShaderStageCreateFlags flags;
2268 VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM, // VkShaderStageFlagBits stage;
2269 DE_NULL, // VkShaderModule module;
2270 "main", // const char* pName;
2271 &specInfo, // const VkSpecializationInfo* pSpecializationInfo;
2272 };
2273
2274 if (hasTask)
2275 {
2276 stageInfo.stage = VK_SHADER_STAGE_TASK_BIT_EXT;
2277 stageInfo.module = taskModule.get();
2278 shaderStages.push_back(stageInfo);
2279 }
2280
2281 {
2282 stageInfo.stage = VK_SHADER_STAGE_MESH_BIT_EXT;
2283 stageInfo.module = meshModule.get();
2284 shaderStages.push_back(stageInfo);
2285 }
2286
2287 {
2288 stageInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
2289 stageInfo.module = fragModule.get();
2290 shaderStages.push_back(stageInfo);
2291 }
2292
2293 const auto pipeline = makeGraphicsPipeline(vkd, device, DE_NULL, pipelineLayout.get(), 0u, shaderStages,
2294 renderPass.get(), viewports, scissors);
2295
2296 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
2297 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2298 const auto cmdBuffer = cmdBufferPtr.get();
2299
2300 beginCommandBuffer(vkd, cmdBuffer);
2301 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
2302 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
2303 vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
2304 endRenderPass(vkd, cmdBuffer);
2305
2306 const auto preTransferBarrier = makeImageMemoryBarrier(
2307 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2308 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorAttachment.get(), colorSRR);
2309 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
2310 VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
2311
2312 const auto copyRegion = makeBufferImageCopy(fbExtent, colorSRL);
2313 vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2314 verificationBuffer.get(), 1u, ©Region);
2315
2316 const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
2317 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
2318 &postTransferBarrier);
2319
2320 endCommandBuffer(vkd, cmdBuffer);
2321 submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2322
2323 invalidateAlloc(vkd, device, verificationBufferAlloc);
2324 tcu::ConstPixelBufferAccess resultAccess(tcuColorFormat, iExtent3D, verificationBufferData);
2325 tcu::TextureLevel referenceLevel(tcuColorFormat, iExtent3D.x(), iExtent3D.y(), iExtent3D.z());
2326 tcu::PixelBufferAccess referenceAccess = referenceLevel.getAccess();
2327
2328 for (int z = 0; z < iExtent3D.z(); ++z)
2329 {
2330 const auto layer = tcu::getSubregion(referenceAccess, 0, 0, z, iExtent3D.x(), iExtent3D.y(), 1);
2331 const tcu::Vec4 expectedLayerColor(static_cast<float>(z), expectedColor.y(), expectedColor.z(),
2332 expectedColor.w());
2333 tcu::clear(layer, expectedLayerColor);
2334 }
2335
2336 auto &log = m_context.getTestContext().getLog();
2337 if (!tcu::floatThresholdCompare(log, "Result", "", referenceAccess, resultAccess, colorThreshold,
2338 tcu::COMPARE_LOG_ON_ERROR))
2339 TCU_FAIL("Check log for details");
2340
2341 return tcu::TestStatus::pass("Pass");
2342 }
2343
2344 } // namespace
2345
createMeshShaderPropertyTestsEXT(tcu::TestContext & testCtx)2346 tcu::TestCaseGroup *createMeshShaderPropertyTestsEXT(tcu::TestContext &testCtx)
2347 {
2348 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
2349
2350 // Tests checking mesh shading properties
2351 GroupPtr mainGroup(new tcu::TestCaseGroup(testCtx, "properties"));
2352
2353 const struct
2354 {
2355 PayLoadShMemSizeType testType;
2356 const char *name;
2357 } taskPayloadShMemCases[] = {
2358 {PayLoadShMemSizeType::PAYLOAD, "task_payload_size"},
2359 {PayLoadShMemSizeType::SHARED_MEMORY, "task_shared_memory_size"},
2360 {PayLoadShMemSizeType::BOTH, "task_payload_and_shared_memory_size"},
2361 };
2362
2363 for (const auto &taskPayloadShMemCase : taskPayloadShMemCases)
2364 {
2365 const TaskPayloadShMemSizeParams params{taskPayloadShMemCase.testType};
2366 mainGroup->addChild(new TaskPayloadShMemSizeCase(testCtx, taskPayloadShMemCase.name, params));
2367 }
2368
2369 mainGroup->addChild(new MaxViewIndexCase(testCtx, "max_view_index"));
2370 mainGroup->addChild(new MaxOutputLayersCase(testCtx, "max_output_layers"));
2371
2372 const struct
2373 {
2374 MaxPrimVertType limitPrimVertType;
2375 const char *prefix;
2376 } limitPrimVertCases[] = {
2377 {MaxPrimVertType::PRIMITIVES, "max_mesh_output_primitives_"},
2378 {MaxPrimVertType::VERTICES, "max_mesh_output_vertices_"},
2379 };
2380
2381 const uint32_t itemCounts[] = {256u, 512u, 1024u, 2048u};
2382
2383 for (const auto &primVertCase : limitPrimVertCases)
2384 {
2385 for (const auto &count : itemCounts)
2386 {
2387 const MaxPrimVertParams params{primVertCase.limitPrimVertType, count};
2388 mainGroup->addChild(
2389 new MaxMeshOutputPrimVertCase(testCtx, primVertCase.prefix + std::to_string(count), params));
2390 }
2391 }
2392
2393 mainGroup->addChild(new MaxMeshOutputComponentsCase(testCtx, "max_mesh_output_components"));
2394
2395 const struct
2396 {
2397 PayLoadShMemSizeType testType;
2398 const char *name;
2399 } meshPayloadShMemCases[] = {
2400 // No actual property for the first one, combines the two properties involving payload size.
2401 {PayLoadShMemSizeType::PAYLOAD, "mesh_payload_size"},
2402 {PayLoadShMemSizeType::SHARED_MEMORY, "mesh_shared_memory_size"},
2403 {PayLoadShMemSizeType::BOTH, "mesh_payload_and_shared_memory_size"},
2404 };
2405 for (const auto &meshPayloadShMemCase : meshPayloadShMemCases)
2406 {
2407 const MeshPayloadShMemSizeParams params{meshPayloadShMemCase.testType};
2408 mainGroup->addChild(new MeshPayloadShMemSizeCase(testCtx, meshPayloadShMemCase.name, params));
2409 }
2410
2411 const struct
2412 {
2413 bool usePayload;
2414 const char *suffix;
2415 } meshOutputPayloadCases[] = {
2416 {false, "_without_payload"},
2417 {true, "_with_payload"},
2418 };
2419
2420 const struct
2421 {
2422 LocationType locationType;
2423 const char *suffix;
2424 } locationTypeCases[] = {
2425 {LocationType::PER_PRIMITIVE, "_per_primitive"},
2426 {LocationType::PER_VERTEX, "_per_vertex"},
2427 };
2428
2429 const struct
2430 {
2431 ViewIndexType viewIndexType;
2432 const char *suffix;
2433 } multiviewCases[] = {
2434 {ViewIndexType::NO_VIEW_INDEX, "_no_view_index"},
2435 {ViewIndexType::VIEW_INDEX_FRAG, "_view_index_in_frag"},
2436 {ViewIndexType::VIEW_INDEX_BOTH, "_view_index_in_mesh_and_frag"},
2437 };
2438
2439 for (const auto &meshOutputPayloadCase : meshOutputPayloadCases)
2440 {
2441 for (const auto &locationTypeCase : locationTypeCases)
2442 {
2443 for (const auto &multiviewCase : multiviewCases)
2444 {
2445 const std::string name = std::string("max_mesh_output_size") + meshOutputPayloadCase.suffix +
2446 locationTypeCase.suffix + multiviewCase.suffix;
2447 const MaxMeshOutputParams params = {
2448 meshOutputPayloadCase.usePayload, // bool usePayload;
2449 locationTypeCase.locationType, // LocationType locationType;
2450 multiviewCase.viewIndexType, // ViewIndexType viewIndexType;
2451 };
2452
2453 mainGroup->addChild(new MaxMeshOutputSizeCase(testCtx, name, params));
2454 }
2455 }
2456 }
2457
2458 return mainGroup.release();
2459 }
2460 } // namespace MeshShader
2461 } // namespace vkt
2462