xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSyncTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 The Khronos Group Inc.
6  * Copyright (c) 2021 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Mesh Shader Synchronization Tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMeshShaderSyncTests.hpp"
26 #include "vktMeshShaderUtil.hpp"
27 #include "vktTestCase.hpp"
28 
29 #include "vkDefs.hpp"
30 #include "vkTypeUtil.hpp"
31 #include "vkImageWithMemory.hpp"
32 #include "vkBufferWithMemory.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 #include "vkImageUtil.hpp"
38 
39 #include "deUniquePtr.hpp"
40 
41 #include <iostream>
42 #include <sstream>
43 #include <vector>
44 
45 namespace vkt
46 {
47 namespace MeshShader
48 {
49 
50 namespace
51 {
52 
53 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
54 
55 using namespace vk;
56 
57 // Stages that will be used in these tests.
58 enum class Stage
59 {
60     HOST = 0,
61     TRANSFER,
62     TASK,
63     MESH,
64     FRAG,
65 };
66 
operator <<(std::ostream & stream,Stage stage)67 std::ostream &operator<<(std::ostream &stream, Stage stage)
68 {
69     switch (stage)
70     {
71     case Stage::HOST:
72         stream << "host";
73         break;
74     case Stage::TRANSFER:
75         stream << "transfer";
76         break;
77     case Stage::TASK:
78         stream << "task";
79         break;
80     case Stage::MESH:
81         stream << "mesh";
82         break;
83     case Stage::FRAG:
84         stream << "frag";
85         break;
86     default:
87         DE_ASSERT(false);
88         break;
89     }
90 
91     return stream;
92 }
93 
isShaderStage(Stage stage)94 bool isShaderStage(Stage stage)
95 {
96     return (stage == Stage::TASK || stage == Stage::MESH || stage == Stage::FRAG);
97 }
98 
stageToFlags(Stage stage)99 VkPipelineStageFlags stageToFlags(Stage stage)
100 {
101     switch (stage)
102     {
103     case Stage::HOST:
104         return VK_PIPELINE_STAGE_HOST_BIT;
105     case Stage::TRANSFER:
106         return VK_PIPELINE_STAGE_TRANSFER_BIT;
107     case Stage::TASK:
108         return VK_PIPELINE_STAGE_TASK_SHADER_BIT_NV;
109     case Stage::MESH:
110         return VK_PIPELINE_STAGE_MESH_SHADER_BIT_NV;
111     case Stage::FRAG:
112         return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
113     default:
114         DE_ASSERT(false);
115         break;
116     }
117 
118     // Unreachable.
119     DE_ASSERT(false);
120     return 0u;
121 }
122 
getImageFormat()123 VkFormat getImageFormat()
124 {
125     return VK_FORMAT_R32_UINT;
126 }
127 
getImageExtent()128 VkExtent3D getImageExtent()
129 {
130     return makeExtent3D(1u, 1u, 1u);
131 }
132 
133 // Types of resources we will use.
134 enum class ResourceType
135 {
136     UNIFORM_BUFFER = 0,
137     STORAGE_BUFFER,
138     STORAGE_IMAGE,
139     SAMPLED_IMAGE,
140 };
141 
resourceTypeToDescriptor(ResourceType resType)142 VkDescriptorType resourceTypeToDescriptor(ResourceType resType)
143 {
144     switch (resType)
145     {
146     case ResourceType::UNIFORM_BUFFER:
147         return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
148     case ResourceType::STORAGE_BUFFER:
149         return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
150     case ResourceType::STORAGE_IMAGE:
151         return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
152     case ResourceType::SAMPLED_IMAGE:
153         return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
154     default:
155         DE_ASSERT(false);
156         break;
157     }
158 
159     // Unreachable.
160     DE_ASSERT(false);
161     return VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR;
162 }
163 
164 // Will the test use a specific barrier or a general memory barrier?
165 enum class BarrierType
166 {
167     GENERAL = 0,
168     SPECIFIC,
169 };
170 
171 // Types of writes we will use.
172 enum class WriteAccess
173 {
174     HOST_WRITE = 0,
175     TRANSFER_WRITE,
176     SHADER_WRITE,
177 };
178 
writeAccessToFlags(WriteAccess access)179 VkAccessFlags writeAccessToFlags(WriteAccess access)
180 {
181     switch (access)
182     {
183     case WriteAccess::HOST_WRITE:
184         return VK_ACCESS_HOST_WRITE_BIT;
185     case WriteAccess::TRANSFER_WRITE:
186         return VK_ACCESS_TRANSFER_WRITE_BIT;
187     case WriteAccess::SHADER_WRITE:
188         return VK_ACCESS_SHADER_WRITE_BIT;
189     default:
190         DE_ASSERT(false);
191         break;
192     }
193 
194     // Unreachable.
195     DE_ASSERT(false);
196     return 0u;
197 }
198 
199 // Types of reads we will use.
200 enum class ReadAccess
201 {
202     HOST_READ = 0,
203     TRANSFER_READ,
204     SHADER_READ,
205     UNIFORM_READ,
206 };
207 
readAccessToFlags(ReadAccess access)208 VkAccessFlags readAccessToFlags(ReadAccess access)
209 {
210     switch (access)
211     {
212     case ReadAccess::HOST_READ:
213         return VK_ACCESS_HOST_READ_BIT;
214     case ReadAccess::TRANSFER_READ:
215         return VK_ACCESS_TRANSFER_READ_BIT;
216     case ReadAccess::SHADER_READ:
217         return VK_ACCESS_SHADER_READ_BIT;
218     case ReadAccess::UNIFORM_READ:
219         return VK_ACCESS_UNIFORM_READ_BIT;
220     default:
221         DE_ASSERT(false);
222         break;
223     }
224 
225     // Unreachable.
226     DE_ASSERT(false);
227     return 0u;
228 }
229 
230 // Auxiliary functions to verify certain combinations are possible.
231 
232 // Check if the writing stage can use the specified write access.
canWriteFromStageAsAccess(Stage writeStage,WriteAccess access)233 bool canWriteFromStageAsAccess(Stage writeStage, WriteAccess access)
234 {
235     switch (writeStage)
236     {
237     case Stage::HOST:
238         return (access == WriteAccess::HOST_WRITE);
239     case Stage::TRANSFER:
240         return (access == WriteAccess::TRANSFER_WRITE);
241     case Stage::TASK: // fallthrough
242     case Stage::MESH: // fallthrough
243     case Stage::FRAG:
244         return (access == WriteAccess::SHADER_WRITE);
245     default:
246         DE_ASSERT(false);
247         break;
248     }
249 
250     return false;
251 }
252 
253 // Check if the reading stage can use the specified read access.
canReadFromStageAsAccess(Stage readStage,ReadAccess access)254 bool canReadFromStageAsAccess(Stage readStage, ReadAccess access)
255 {
256     switch (readStage)
257     {
258     case Stage::HOST:
259         return (access == ReadAccess::HOST_READ);
260     case Stage::TRANSFER:
261         return (access == ReadAccess::TRANSFER_READ);
262     case Stage::TASK: // fallthrough
263     case Stage::MESH: // fallthrough
264     case Stage::FRAG:
265         return (access == ReadAccess::SHADER_READ || access == ReadAccess::UNIFORM_READ);
266     default:
267         DE_ASSERT(false);
268         break;
269     }
270 
271     return false;
272 }
273 
274 // Check if reading the given resource type is possible with the given type of read access.
canReadResourceAsAccess(ResourceType resType,ReadAccess access)275 bool canReadResourceAsAccess(ResourceType resType, ReadAccess access)
276 {
277     if (access == ReadAccess::UNIFORM_READ)
278         return (resType == ResourceType::UNIFORM_BUFFER);
279     return true;
280 }
281 
282 // Check if writing to the given resource type is possible with the given type of write access.
canWriteResourceAsAccess(ResourceType resType,WriteAccess access)283 bool canWriteResourceAsAccess(ResourceType resType, WriteAccess access)
284 {
285     if (resType == ResourceType::UNIFORM_BUFFER)
286         return (access != WriteAccess::SHADER_WRITE);
287     return true;
288 }
289 
290 // Check if the given stage can write to the given resource type.
canWriteTo(Stage stage,ResourceType resType)291 bool canWriteTo(Stage stage, ResourceType resType)
292 {
293     switch (stage)
294     {
295     case Stage::HOST:
296         return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER);
297     case Stage::TRANSFER:
298         return true;
299     case Stage::TASK: // fallthrough
300     case Stage::MESH:
301         return (resType == ResourceType::STORAGE_BUFFER || resType == ResourceType::STORAGE_IMAGE);
302     default:
303         DE_ASSERT(false);
304         break;
305     }
306 
307     return false;
308 }
309 
310 // Check if the given stage can read from the given resource type.
canReadFrom(Stage stage,ResourceType resType)311 bool canReadFrom(Stage stage, ResourceType resType)
312 {
313     switch (stage)
314     {
315     case Stage::HOST:
316         return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER);
317     case Stage::TRANSFER: // fallthrough
318     case Stage::TASK:     // fallthrough
319     case Stage::MESH:
320     case Stage::FRAG:
321         return true;
322     default:
323         DE_ASSERT(false);
324         break;
325     }
326 
327     return false;
328 }
329 
330 // Will we need to store the test value in an auxiliar buffer to be read?
needsAuxiliarSourceBuffer(Stage fromStage,Stage toStage)331 bool needsAuxiliarSourceBuffer(Stage fromStage, Stage toStage)
332 {
333     DE_UNREF(toStage);
334     return (fromStage == Stage::TRANSFER);
335 }
336 
337 // Will we need to store the read operation result into an auxiliar buffer to be checked?
needsAuxiliarDestBuffer(Stage fromStage,Stage toStage)338 bool needsAuxiliarDestBuffer(Stage fromStage, Stage toStage)
339 {
340     DE_UNREF(fromStage);
341     return (toStage == Stage::TRANSFER);
342 }
343 
344 // Needs any auxiliar buffer for any case?
needsAuxiliarBuffer(Stage fromStage,Stage toStage)345 bool needsAuxiliarBuffer(Stage fromStage, Stage toStage)
346 {
347     return (needsAuxiliarSourceBuffer(fromStage, toStage) || needsAuxiliarDestBuffer(fromStage, toStage));
348 }
349 
350 // Will the final value be stored in the auxiliar destination buffer?
valueInAuxiliarDestBuffer(Stage toStage)351 bool valueInAuxiliarDestBuffer(Stage toStage)
352 {
353     return (toStage == Stage::TRANSFER);
354 }
355 
356 // Will the final value be stored in the resource buffer itself?
valueInResourceBuffer(Stage toStage)357 bool valueInResourceBuffer(Stage toStage)
358 {
359     return (toStage == Stage::HOST);
360 }
361 
362 // Will the final value be stored in the color buffer?
valueInColorBuffer(Stage toStage)363 bool valueInColorBuffer(Stage toStage)
364 {
365     return (!valueInAuxiliarDestBuffer(toStage) && !valueInResourceBuffer(toStage));
366 }
367 
368 // Image usage flags for the image resource.
resourceImageUsageFlags(ResourceType resourceType)369 VkImageUsageFlags resourceImageUsageFlags(ResourceType resourceType)
370 {
371     VkImageUsageFlags flags = (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
372 
373     switch (resourceType)
374     {
375     case ResourceType::STORAGE_IMAGE:
376         flags |= VK_IMAGE_USAGE_STORAGE_BIT;
377         break;
378     case ResourceType::SAMPLED_IMAGE:
379         flags |= VK_IMAGE_USAGE_SAMPLED_BIT;
380         break;
381     default:
382         DE_ASSERT(false);
383         break;
384     }
385 
386     return flags;
387 }
388 
389 // Buffer usage flags for the buffer resource.
resourceBufferUsageFlags(ResourceType resourceType)390 VkBufferUsageFlags resourceBufferUsageFlags(ResourceType resourceType)
391 {
392     VkBufferUsageFlags flags = (VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
393 
394     switch (resourceType)
395     {
396     case ResourceType::UNIFORM_BUFFER:
397         flags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
398         break;
399     case ResourceType::STORAGE_BUFFER:
400         flags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
401         break;
402     default:
403         DE_ASSERT(false);
404         break;
405     }
406 
407     return flags;
408 }
409 
410 // Is the resource written to and read from a shader stage?
readAndWriteFromShaders(Stage fromStage,Stage toStage)411 bool readAndWriteFromShaders(Stage fromStage, Stage toStage)
412 {
413     return (isShaderStage(fromStage) && isShaderStage(toStage));
414 }
415 
416 struct TestParams
417 {
418     Stage fromStage;
419     Stage toStage;
420     ResourceType resourceType;
421     BarrierType barrierType;
422     WriteAccess writeAccess;
423     ReadAccess readAccess;
424     uint32_t testValue;
425 
426 protected:
readsOrWritesInvkt::MeshShader::__anona9e283100111::TestParams427     bool readsOrWritesIn(Stage stage) const
428     {
429         DE_ASSERT(fromStage != toStage);
430         return (fromStage == stage || toStage == stage);
431     }
432 
433 public:
needsTaskvkt::MeshShader::__anona9e283100111::TestParams434     bool needsTask() const
435     {
436         return readsOrWritesIn(Stage::TASK);
437     }
438 
readsOrWritesInMeshvkt::MeshShader::__anona9e283100111::TestParams439     bool readsOrWritesInMesh() const
440     {
441         return readsOrWritesIn(Stage::MESH);
442     }
443 
getResourceDeclvkt::MeshShader::__anona9e283100111::TestParams444     std::string getResourceDecl() const
445     {
446         const auto imgFormat     = ((resourceType == ResourceType::STORAGE_IMAGE) ? ", r32ui" : "");
447         const auto storagePrefix = ((writeAccess == WriteAccess::SHADER_WRITE) ? "" : "readonly ");
448         std::ostringstream decl;
449 
450         decl << "layout (set=0, binding=0" << imgFormat << ") ";
451         switch (resourceType)
452         {
453         case ResourceType::UNIFORM_BUFFER:
454             decl << "uniform UniformBuffer { uint value; } ub;";
455             break;
456         case ResourceType::STORAGE_BUFFER:
457             decl << storagePrefix << "buffer StorageBuffer { uint value; } sb;";
458             break;
459         case ResourceType::STORAGE_IMAGE:
460             decl << storagePrefix << "uniform uimage2D si;";
461             break;
462         case ResourceType::SAMPLED_IMAGE:
463             decl << "uniform usampler2D sampled;";
464             break;
465         default:
466             DE_ASSERT(false);
467             break;
468         }
469 
470         decl << "\n";
471         return decl.str();
472     }
473 
474     struct PushConstantStruct
475     {
476         uint32_t writeVal;
477         uint32_t readVal;
478     };
479 
480     // Get declaration for the "pc" push constant block. Must match the structure above.
getPushConstantDeclvkt::MeshShader::__anona9e283100111::TestParams481     std::string getPushConstantDecl() const
482     {
483         std::ostringstream pc;
484         pc << "layout (push_constant, std430) uniform PushConstantBlock {\n"
485            << "    uint writeVal;\n"
486            << "    uint readVal;\n"
487            << "} pc;\n";
488         return pc.str();
489     }
490 
getReadStatementvkt::MeshShader::__anona9e283100111::TestParams491     std::string getReadStatement(const std::string &outName) const
492     {
493         std::ostringstream statement;
494         statement << "    if (pc.readVal > 0u) { " << outName << " = ";
495 
496         switch (resourceType)
497         {
498         case ResourceType::UNIFORM_BUFFER:
499             statement << "ub.value";
500             break;
501         case ResourceType::STORAGE_BUFFER:
502             statement << "sb.value";
503             break;
504         case ResourceType::STORAGE_IMAGE:
505             statement << "imageLoad(si, ivec2(0, 0)).x";
506             break;
507         case ResourceType::SAMPLED_IMAGE:
508             statement << "texture(sampled, vec2(0.5, 0.5)).x";
509             break;
510         default:
511             DE_ASSERT(false);
512             break;
513         }
514 
515         statement << "; }\n";
516         return statement.str();
517     }
518 
getWriteStatementvkt::MeshShader::__anona9e283100111::TestParams519     std::string getWriteStatement(const std::string &valueName) const
520     {
521         std::ostringstream statement;
522         statement << "    if (pc.writeVal > 0u) { ";
523 
524         switch (resourceType)
525         {
526         case ResourceType::STORAGE_BUFFER:
527             statement << "sb.value = " << valueName;
528             break;
529         case ResourceType::STORAGE_IMAGE:
530             statement << "imageStore(si, ivec2(0, 0), uvec4(" << valueName << ", 0, 0, 0))";
531             break;
532         case ResourceType::UNIFORM_BUFFER: // fallthrough
533         case ResourceType::SAMPLED_IMAGE:  // fallthrough
534         default:
535             DE_ASSERT(false);
536             break;
537         }
538 
539         statement << "; }\n";
540         return statement.str();
541     }
542 
getResourceShaderStagesvkt::MeshShader::__anona9e283100111::TestParams543     VkShaderStageFlags getResourceShaderStages() const
544     {
545         VkShaderStageFlags flags = 0u;
546 
547         if (fromStage == Stage::TASK || toStage == Stage::TASK)
548             flags |= VK_SHADER_STAGE_TASK_BIT_NV;
549         if (fromStage == Stage::MESH || toStage == Stage::MESH)
550             flags |= VK_SHADER_STAGE_MESH_BIT_NV;
551         if (fromStage == Stage::FRAG || toStage == Stage::FRAG)
552             flags |= VK_SHADER_STAGE_FRAGMENT_BIT;
553 
554         // We assume at least something must be done either on the task or mesh shaders for the tests to be interesting.
555         DE_ASSERT((flags & (VK_SHADER_STAGE_TASK_BIT_NV | VK_SHADER_STAGE_MESH_BIT_NV)) != 0u);
556         return flags;
557     }
558 
559     // We'll prefer to keep the image in the general layout if it will be written to from a shader stage or if the barrier is going to be a generic memory barrier.
preferGeneralLayoutvkt::MeshShader::__anona9e283100111::TestParams560     bool preferGeneralLayout() const
561     {
562         return (isShaderStage(fromStage) || (barrierType == BarrierType::GENERAL) ||
563                 (resourceType == ResourceType::STORAGE_IMAGE));
564     }
565 
566     // A subpass dependency is needed if both the source and destination stages are shader stages.
needsSubpassDependencyvkt::MeshShader::__anona9e283100111::TestParams567     bool needsSubpassDependency() const
568     {
569         return readAndWriteFromShaders(fromStage, toStage);
570     }
571 };
572 
573 class MeshShaderSyncCase : public vkt::TestCase
574 {
575 public:
MeshShaderSyncCase(tcu::TestContext & testCtx,const std::string & name,const TestParams & params)576     MeshShaderSyncCase(tcu::TestContext &testCtx, const std::string &name, const TestParams &params)
577         : vkt::TestCase(testCtx, name)
578         , m_params(params)
579     {
580     }
581 
~MeshShaderSyncCase(void)582     virtual ~MeshShaderSyncCase(void)
583     {
584     }
585 
586     void checkSupport(Context &context) const override;
587     void initPrograms(vk::SourceCollections &programCollection) const override;
588     TestInstance *createInstance(Context &context) const override;
589 
590 protected:
591     TestParams m_params;
592 };
593 
594 class MeshShaderSyncInstance : public vkt::TestInstance
595 {
596 public:
MeshShaderSyncInstance(Context & context,const TestParams & params)597     MeshShaderSyncInstance(Context &context, const TestParams &params) : vkt::TestInstance(context), m_params(params)
598     {
599     }
~MeshShaderSyncInstance(void)600     virtual ~MeshShaderSyncInstance(void)
601     {
602     }
603 
604     tcu::TestStatus iterate(void) override;
605 
606 protected:
607     TestParams m_params;
608 };
609 
checkSupport(Context & context) const610 void MeshShaderSyncCase::checkSupport(Context &context) const
611 {
612     checkTaskMeshShaderSupportNV(context, m_params.needsTask(), true);
613 
614     if (m_params.writeAccess == WriteAccess::SHADER_WRITE)
615     {
616         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
617     }
618 }
619 
initPrograms(vk::SourceCollections & programCollection) const620 void MeshShaderSyncCase::initPrograms(vk::SourceCollections &programCollection) const
621 {
622     const bool needsTaskShader = m_params.needsTask();
623     const auto valueStr        = de::toString(m_params.testValue);
624     const auto resourceDecl    = m_params.getResourceDecl();
625     const auto pcDecl          = m_params.getPushConstantDecl();
626 
627     if (needsTaskShader)
628     {
629 
630         std::ostringstream task;
631         task << "#version 450\n"
632              << "#extension GL_NV_mesh_shader : enable\n"
633              << "\n"
634              << "layout(local_size_x=1) in;\n"
635              << "\n"
636              << "out taskNV TaskData { uint value; } td;\n"
637              << "\n"
638              << resourceDecl << pcDecl << "\n"
639              << "void main ()\n"
640              << "{\n"
641              << "    gl_TaskCountNV = 1u;\n"
642              << "    td.value = 0u;\n"
643              << ((m_params.fromStage == Stage::TASK) ? m_params.getWriteStatement(valueStr) : "")
644              << ((m_params.toStage == Stage::TASK) ? m_params.getReadStatement("td.value") : "") << "}\n";
645         programCollection.glslSources.add("task") << glu::TaskSource(task.str());
646     }
647 
648     {
649         const bool rwInMesh = m_params.readsOrWritesInMesh();
650 
651         std::ostringstream mesh;
652         mesh << "#version 450\n"
653              << "#extension GL_NV_mesh_shader : enable\n"
654              << "\n"
655              << "layout(local_size_x=1) in;\n"
656              << "layout(triangles) out;\n"
657              << "layout(max_vertices=3, max_primitives=1) out;\n"
658              << "\n"
659              << (needsTaskShader ? "in taskNV TaskData { uint value; } td;\n" : "")
660              << "layout (location=0) out perprimitiveNV uint primitiveValue[];\n"
661              << "\n"
662              << (rwInMesh ? resourceDecl : "") << (rwInMesh ? pcDecl : "") << "\n"
663              << "void main ()\n"
664              << "{\n"
665              << "    gl_PrimitiveCountNV = 1u;\n"
666              << (needsTaskShader ? "    primitiveValue[0] = td.value;\n" : "")
667              << ((m_params.fromStage == Stage::MESH) ? m_params.getWriteStatement(valueStr) : "")
668              << ((m_params.toStage == Stage::MESH) ? m_params.getReadStatement("primitiveValue[0]") : "") << "\n"
669              << "    gl_MeshVerticesNV[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
670              << "    gl_MeshVerticesNV[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
671              << "    gl_MeshVerticesNV[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
672              << "    gl_PrimitiveIndicesNV[0] = 0;\n"
673              << "    gl_PrimitiveIndicesNV[1] = 1;\n"
674              << "    gl_PrimitiveIndicesNV[2] = 2;\n"
675              << "}\n";
676         programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
677     }
678 
679     {
680         const bool readFromFrag = (m_params.toStage == Stage::FRAG);
681         std::ostringstream frag;
682 
683         frag << "#version 450\n"
684              << "#extension GL_NV_mesh_shader : enable\n"
685              << "\n"
686              << "layout (location=0) in perprimitiveNV flat uint primitiveValue;\n"
687              << "layout (location=0) out uvec4 outColor;\n"
688              << "\n"
689              << (readFromFrag ? resourceDecl : "") << (readFromFrag ? pcDecl : "") << "\n"
690              << "void main ()\n"
691              << "{\n"
692              << "    outColor = uvec4(primitiveValue, 0, 0, 0);\n"
693              << "    uint readVal = 0u;\n"
694              << (readFromFrag ? m_params.getReadStatement("readVal") : "")
695              << (readFromFrag ? "    outColor = uvec4(readVal, 0, 0, 0);\n" : "") << "}\n";
696         programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
697     }
698 }
699 
createInstance(Context & context) const700 TestInstance *MeshShaderSyncCase::createInstance(Context &context) const
701 {
702     return new MeshShaderSyncInstance(context, m_params);
703 }
704 
705 // General description behind these tests.
706 //
707 //    From                To
708 // ==============================
709 //    HOST                TASK            Prepare buffer from host. Only valid for uniform and storage buffers. Read value from task into td.value. Verify color buffer.
710 //    HOST                MESH            Same situation. Read value from mesh into primitiveValue[0]. Verify color buffer.
711 //    TRANSFER            TASK            Prepare auxiliary host-coherent source buffer from host. Copy buffer to buffer or buffer to image. Read from task into td.value. Verify color buffer.
712 //    TRANSFER            MESH            Same initial steps. Read from mesh into primitiveValue[0]. Verify color buffer.
713 //    TASK                MESH            Write value to buffer or image from task shader. Only valid for storage buffers and images. Read from mesh into primitiveValue[0]. Verify color buffer.
714 //    TASK                FRAG            Same write procedure and restrictions. Read from frag into outColor. Verify color buffer.
715 //    TASK                TRANSFER        Same write procedure and restrictions. Prepare auxiliary host-coherent read buffer and copy buffer to buffer or image to buffer. Verify auxiliary buffer.
716 //    TASK                HOST            Due to From/To restrictions, only valid for storage buffers. Same write procedure. Read and verify buffer directly.
717 //    MESH                FRAG            Same as task to frag but the write instructions need to be in the mesh shader.
718 //    MESH                TRANSFER        Same as task to transfer but the write instructions need to be in the mesh shader.
719 //    MESH                HOST            Same as task to host but the write instructions need to be in the mesh shader.
720 //
721 
createCustomRenderPass(const DeviceInterface & vkd,VkDevice device,VkFormat colorFormat,const TestParams & params)722 Move<VkRenderPass> createCustomRenderPass(const DeviceInterface &vkd, VkDevice device, VkFormat colorFormat,
723                                           const TestParams &params)
724 {
725     const std::vector<VkAttachmentDescription> attachmentDescs = {{
726         0u,                                       // VkAttachmentDescriptionFlags flags;
727         colorFormat,                              // VkFormat format;
728         VK_SAMPLE_COUNT_1_BIT,                    // VkSampleCountFlagBits samples;
729         VK_ATTACHMENT_LOAD_OP_CLEAR,              // VkAttachmentLoadOp loadOp;
730         VK_ATTACHMENT_STORE_OP_STORE,             // VkAttachmentStoreOp storeOp;
731         VK_ATTACHMENT_LOAD_OP_DONT_CARE,          // VkAttachmentLoadOp stencilLoadOp;
732         VK_ATTACHMENT_STORE_OP_DONT_CARE,         // VkAttachmentStoreOp stencilStoreOp;
733         VK_IMAGE_LAYOUT_UNDEFINED,                // VkImageLayout initialLayout;
734         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout;
735     }};
736 
737     const std::vector<VkAttachmentReference> attachmentRefs = {{0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}};
738 
739     const std::vector<VkSubpassDescription> subpassDescs = {{
740         0u,                                           // VkSubpassDescriptionFlags flags;
741         VK_PIPELINE_BIND_POINT_GRAPHICS,              // VkPipelineBindPoint pipelineBindPoint;
742         0u,                                           // uint32_t inputAttachmentCount;
743         nullptr,                                      // const VkAttachmentReference* pInputAttachments;
744         static_cast<uint32_t>(attachmentRefs.size()), // uint32_t colorAttachmentCount;
745         de::dataOrNull(attachmentRefs),               // const VkAttachmentReference* pColorAttachments;
746         nullptr,                                      // const VkAttachmentReference* pResolveAttachments;
747         nullptr,                                      // const VkAttachmentReference* pDepthStencilAttachment;
748         0u,                                           // uint32_t preserveAttachmentCount;
749         nullptr,                                      // const uint32_t* pPreserveAttachments;
750     }};
751 
752     // When both stages are shader stages, the dependency will be expressed as a subpass dependency.
753     std::vector<VkSubpassDependency> dependencies;
754     if (params.needsSubpassDependency())
755     {
756         const VkSubpassDependency dependency = {
757             0u,                                     // uint32_t srcSubpass;
758             0u,                                     // uint32_t dstSubpass;
759             stageToFlags(params.fromStage),         // VkPipelineStageFlags srcStageMask;
760             stageToFlags(params.toStage),           // VkPipelineStageFlags dstStageMask;
761             writeAccessToFlags(params.writeAccess), // VkAccessFlags srcAccessMask;
762             readAccessToFlags(params.readAccess),   // VkAccessFlags dstAccessMask;
763             0u,                                     // VkDependencyFlags dependencyFlags;
764         };
765         dependencies.push_back(dependency);
766     }
767 
768     const VkRenderPassCreateInfo createInfo = {
769         VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,     // VkStructureType sType;
770         nullptr,                                       // const void* pNext;
771         0u,                                            // VkRenderPassCreateFlags flags;
772         static_cast<uint32_t>(attachmentDescs.size()), // uint32_t attachmentCount;
773         de::dataOrNull(attachmentDescs),               // const VkAttachmentDescription* pAttachments;
774         static_cast<uint32_t>(subpassDescs.size()),    // uint32_t subpassCount;
775         de::dataOrNull(subpassDescs),                  // const VkSubpassDescription* pSubpasses;
776         static_cast<uint32_t>(dependencies.size()),    // uint32_t dependencyCount;
777         de::dataOrNull(dependencies),                  // const VkSubpassDependency* pDependencies;
778     };
779 
780     return createRenderPass(vkd, device, &createInfo);
781 }
782 
hostToTransferMemoryBarrier(const DeviceInterface & vkd,VkCommandBuffer cmdBuffer)783 void hostToTransferMemoryBarrier(const DeviceInterface &vkd, VkCommandBuffer cmdBuffer)
784 {
785     const auto barrier = makeMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
786     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 1u, &barrier, 0u,
787                            nullptr, 0u, nullptr);
788 }
789 
transferToHostMemoryBarrier(const DeviceInterface & vkd,VkCommandBuffer cmdBuffer)790 void transferToHostMemoryBarrier(const DeviceInterface &vkd, VkCommandBuffer cmdBuffer)
791 {
792     const auto barrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
793     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &barrier, 0u,
794                            nullptr, 0u, nullptr);
795 }
796 
iterate(void)797 tcu::TestStatus MeshShaderSyncInstance::iterate(void)
798 {
799     const auto &vkd       = m_context.getDeviceInterface();
800     const auto device     = m_context.getDevice();
801     auto &alloc           = m_context.getDefaultAllocator();
802     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
803     const auto queue      = m_context.getUniversalQueue();
804 
805     const auto imageFormat         = getImageFormat();
806     const auto imageExtent         = getImageExtent();
807     const auto colorBufferUsage    = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
808     const auto colorSRR            = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
809     const auto colorSRL            = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
810     const auto bufferSize          = static_cast<VkDeviceSize>(sizeof(m_params.testValue));
811     const auto descriptorType      = resourceTypeToDescriptor(m_params.resourceType);
812     const auto resourceStages      = m_params.getResourceShaderStages();
813     const auto auxiliarBufferUsage = (VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
814     const auto useGeneralLayout    = m_params.preferGeneralLayout();
815     const bool needsTwoDrawCalls   = m_params.needsSubpassDependency();
816 
817     const auto writeAccessFlags = writeAccessToFlags(m_params.writeAccess);
818     const auto readAccessFlags  = readAccessToFlags(m_params.readAccess);
819     const auto fromStageFlags   = stageToFlags(m_params.fromStage);
820     const auto toStageFlags     = stageToFlags(m_params.toStage);
821 
822     // Prepare color buffer.
823     const VkImageCreateInfo colorBufferCreateInfo = {
824         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
825         nullptr,                             // const void* pNext;
826         0u,                                  // VkImageCreateFlags flags;
827         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
828         imageFormat,                         // VkFormat format;
829         imageExtent,                         // VkExtent3D extent;
830         1u,                                  // uint32_t mipLevels;
831         1u,                                  // uint32_t arrayLayers;
832         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
833         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
834         colorBufferUsage,                    // VkImageUsageFlags usage;
835         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
836         0u,                                  // uint32_t queueFamilyIndexCount;
837         nullptr,                             // const uint32_t* pQueueFamilyIndices;
838         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
839     };
840     ImageWithMemory colorBuffer(vkd, device, alloc, colorBufferCreateInfo, MemoryRequirement::Any);
841     const auto colorBufferView =
842         makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
843 
844     // Main resource.
845     using ImageWithMemoryPtr  = de::MovePtr<ImageWithMemory>;
846     using BufferWithMemoryPtr = de::MovePtr<BufferWithMemory>;
847 
848     ImageWithMemoryPtr imageResource;
849     Move<VkImageView> imageResourceView;
850     VkImageLayout imageDescriptorLayout =
851         (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
852     VkImageLayout currentLayout = VK_IMAGE_LAYOUT_UNDEFINED;
853     BufferWithMemoryPtr bufferResource;
854 
855     bool useImageResource  = false;
856     bool useBufferResource = false;
857 
858     switch (m_params.resourceType)
859     {
860     case ResourceType::UNIFORM_BUFFER:
861     case ResourceType::STORAGE_BUFFER:
862         useBufferResource = true;
863         break;
864     case ResourceType::STORAGE_IMAGE:
865     case ResourceType::SAMPLED_IMAGE:
866         useImageResource = true;
867         break;
868     default:
869         DE_ASSERT(false);
870         break;
871     }
872 
873     // One resource needed.
874     DE_ASSERT(useImageResource != useBufferResource);
875 
876     if (useImageResource)
877     {
878         const auto resourceImageUsage = resourceImageUsageFlags(m_params.resourceType);
879 
880         const VkImageCreateInfo resourceCreateInfo = {
881             VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
882             nullptr,                             // const void* pNext;
883             0u,                                  // VkImageCreateFlags flags;
884             VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
885             imageFormat,                         // VkFormat format;
886             imageExtent,                         // VkExtent3D extent;
887             1u,                                  // uint32_t mipLevels;
888             1u,                                  // uint32_t arrayLayers;
889             VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
890             VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
891             resourceImageUsage,                  // VkImageUsageFlags usage;
892             VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
893             0u,                                  // uint32_t queueFamilyIndexCount;
894             nullptr,                             // const uint32_t* pQueueFamilyIndices;
895             VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
896         };
897         imageResource =
898             ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, resourceCreateInfo, MemoryRequirement::Any));
899         imageResourceView =
900             makeImageView(vkd, device, imageResource->get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
901     }
902     else
903     {
904         const auto resourceBufferUsage      = resourceBufferUsageFlags(m_params.resourceType);
905         const auto resourceBufferCreateInfo = makeBufferCreateInfo(bufferSize, resourceBufferUsage);
906         bufferResource                      = BufferWithMemoryPtr(
907             new BufferWithMemory(vkd, device, alloc, resourceBufferCreateInfo, MemoryRequirement::HostVisible));
908     }
909 
910     Move<VkSampler> sampler;
911     if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
912     {
913         const VkSamplerCreateInfo samplerCreateInfo = {
914             VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType;
915             nullptr,                               // const void* pNext;
916             0u,                                    // VkSamplerCreateFlags flags;
917             VK_FILTER_NEAREST,                     // VkFilter magFilter;
918             VK_FILTER_NEAREST,                     // VkFilter minFilter;
919             VK_SAMPLER_MIPMAP_MODE_NEAREST,        // VkSamplerMipmapMode mipmapMode;
920             VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeU;
921             VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeV;
922             VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW;
923             0.0f,                                  // float mipLodBias;
924             VK_FALSE,                              // VkBool32 anisotropyEnable;
925             1.0f,                                  // float maxAnisotropy;
926             VK_FALSE,                              // VkBool32 compareEnable;
927             VK_COMPARE_OP_NEVER,                   // VkCompareOp compareOp;
928             0.0f,                                  // float minLod;
929             0.0f,                                  // float maxLod;
930             VK_BORDER_COLOR_INT_TRANSPARENT_BLACK, // VkBorderColor borderColor;
931             VK_FALSE,                              // VkBool32 unnormalizedCoordinates;
932         };
933         sampler = createSampler(vkd, device, &samplerCreateInfo);
934     }
935 
936     // Auxiliary host-coherent buffer for some cases. Being host-coherent lets us avoid extra barriers that would "pollute" synchronization tests.
937     BufferWithMemoryPtr hostCoherentBuffer;
938     void *hostCoherentDataPtr = nullptr;
939     if (needsAuxiliarBuffer(m_params.fromStage, m_params.toStage))
940     {
941         const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage);
942         hostCoherentBuffer =
943             BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo,
944                                                      (MemoryRequirement::HostVisible | MemoryRequirement::Coherent)));
945         hostCoherentDataPtr = hostCoherentBuffer->getAllocation().getHostPtr();
946     }
947 
948     // Descriptor pool.
949     Move<VkDescriptorPool> descriptorPool;
950     {
951         DescriptorPoolBuilder poolBuilder;
952         poolBuilder.addType(descriptorType);
953         descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
954     }
955 
956     // Descriptor set layout.
957     Move<VkDescriptorSetLayout> setLayout;
958     {
959         DescriptorSetLayoutBuilder layoutBuilder;
960         layoutBuilder.addSingleBinding(descriptorType, resourceStages);
961         setLayout = layoutBuilder.build(vkd, device);
962     }
963 
964     // Descriptor set.
965     const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
966 
967     // Update descriptor set.
968     {
969         DescriptorSetUpdateBuilder updateBuilder;
970         const auto location = DescriptorSetUpdateBuilder::Location::binding(0u);
971 
972         switch (descriptorType)
973         {
974         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
975         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
976         {
977             const auto bufferInfo = makeDescriptorBufferInfo(bufferResource->get(), 0ull, bufferSize);
978             updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &bufferInfo);
979         }
980         break;
981         case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
982         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
983         {
984             auto descriptorImageInfo =
985                 makeDescriptorImageInfo(sampler.get(), imageResourceView.get(), imageDescriptorLayout);
986             updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &descriptorImageInfo);
987         }
988         break;
989         default:
990             DE_ASSERT(false);
991             break;
992         }
993 
994         updateBuilder.update(vkd, device);
995     }
996 
997     // Shader modules.
998     Move<VkShaderModule> taskShader;
999     Move<VkShaderModule> meshShader;
1000     Move<VkShaderModule> fragShader;
1001 
1002     const auto &binaries = m_context.getBinaryCollection();
1003 
1004     if (m_params.needsTask())
1005         taskShader = createShaderModule(vkd, device, binaries.get("task"), 0u);
1006     meshShader = createShaderModule(vkd, device, binaries.get("mesh"), 0u);
1007     fragShader = createShaderModule(vkd, device, binaries.get("frag"), 0u);
1008 
1009     using PushConstantStruct = TestParams::PushConstantStruct;
1010 
1011     // Pipeline layout, render pass, framebuffer.
1012     const auto pcSize         = static_cast<uint32_t>(sizeof(PushConstantStruct));
1013     const auto pcRange        = makePushConstantRange(resourceStages, 0u, pcSize);
1014     const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get(), &pcRange);
1015     const auto renderPass     = createCustomRenderPass(vkd, device, imageFormat, m_params);
1016     const auto framebuffer =
1017         makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), imageExtent.width, imageExtent.height);
1018 
1019     // Pipeline.
1020     std::vector<VkViewport> viewports(1u, makeViewport(imageExtent));
1021     std::vector<VkRect2D> scissors(1u, makeRect2D(imageExtent));
1022     const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(),
1023                                                fragShader.get(), renderPass.get(), viewports, scissors);
1024 
1025     // Command pool and buffer.
1026     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
1027     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1028     const auto cmdBuffer    = cmdBufferPtr.get();
1029 
1030     beginCommandBuffer(vkd, cmdBuffer);
1031 
1032     if (m_params.fromStage == Stage::HOST)
1033     {
1034         // Prepare buffer from host when the source stage is the host.
1035         DE_ASSERT(useBufferResource);
1036 
1037         auto &resourceBufferAlloc   = bufferResource->getAllocation();
1038         void *resourceBufferDataPtr = resourceBufferAlloc.getHostPtr();
1039 
1040         deMemcpy(resourceBufferDataPtr, &m_params.testValue, sizeof(m_params.testValue));
1041         flushAlloc(vkd, device, resourceBufferAlloc);
1042     }
1043     else if (m_params.fromStage == Stage::TRANSFER)
1044     {
1045         // Put value in host-coherent buffer and transfer it to the resource buffer or image.
1046         deMemcpy(hostCoherentDataPtr, &m_params.testValue, sizeof(m_params.testValue));
1047         hostToTransferMemoryBarrier(vkd, cmdBuffer);
1048 
1049         if (useBufferResource)
1050         {
1051             const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize);
1052             vkd.cmdCopyBuffer(cmdBuffer, hostCoherentBuffer->get(), bufferResource->get(), 1u, &copyRegion);
1053         }
1054         else
1055         {
1056             // Move image to the right layout for transfer.
1057             const auto newLayout = (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1058             if (newLayout != currentLayout)
1059             {
1060                 const auto preCopyBarrier = makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, currentLayout,
1061                                                                    newLayout, imageResource->get(), colorSRR);
1062                 vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
1063                                        0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
1064                 currentLayout = newLayout;
1065             }
1066             const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1067             vkd.cmdCopyBufferToImage(cmdBuffer, hostCoherentBuffer->get(), imageResource->get(), currentLayout, 1u,
1068                                      &copyRegion);
1069         }
1070     }
1071     else if (m_params.fromStage == Stage::TASK || m_params.fromStage == Stage::MESH)
1072     {
1073         // The image or buffer will be written to from shaders. Images need to be in the right layout.
1074         if (useImageResource)
1075         {
1076             const auto newLayout = VK_IMAGE_LAYOUT_GENERAL;
1077             if (newLayout != currentLayout)
1078             {
1079                 const auto preWriteBarrier =
1080                     makeImageMemoryBarrier(0u, (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT), currentLayout,
1081                                            newLayout, imageResource->get(), colorSRR);
1082                 vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, fromStageFlags, 0u, 0u, nullptr,
1083                                        0u, nullptr, 1u, &preWriteBarrier);
1084                 currentLayout = newLayout;
1085             }
1086         }
1087     }
1088     else
1089     {
1090         DE_ASSERT(false);
1091     }
1092 
1093     // If the resource is going to be read from shaders, we'll insert the main barrier before running the pipeline.
1094     if (isShaderStage(m_params.toStage) && !needsTwoDrawCalls)
1095     {
1096         if (m_params.barrierType == BarrierType::GENERAL)
1097         {
1098             const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1099             vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 1u, &memoryBarrier, 0u, nullptr, 0u,
1100                                    nullptr);
1101         }
1102         else if (m_params.barrierType == BarrierType::SPECIFIC)
1103         {
1104             if (useBufferResource)
1105             {
1106                 const auto bufferBarrier =
1107                     makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize);
1108                 vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u,
1109                                        nullptr);
1110             }
1111             else
1112             {
1113                 const auto newLayout =
1114                     (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
1115                 const auto imageBarrier = makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout,
1116                                                                  newLayout, imageResource->get(), colorSRR);
1117 
1118                 vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 0u, nullptr, 0u, nullptr, 1u,
1119                                        &imageBarrier);
1120                 currentLayout = newLayout;
1121             }
1122         }
1123         else
1124         {
1125             DE_ASSERT(false);
1126         }
1127     }
1128 
1129     if (needsTwoDrawCalls)
1130     {
1131         // Transition image to the general layout before writing to it. When we need two draw calls (because the image will be
1132         // written to and read from a shader stage), the layout will always be general.
1133         if (useImageResource)
1134         {
1135             const auto newLayout = VK_IMAGE_LAYOUT_GENERAL;
1136             const auto imageBarrier =
1137                 makeImageMemoryBarrier(0u, writeAccessFlags, currentLayout, newLayout, imageResource->get(), colorSRR);
1138 
1139             vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, fromStageFlags, 0u, 0u, nullptr, 0u,
1140                                    nullptr, 1u, &imageBarrier);
1141             currentLayout = newLayout;
1142         }
1143     }
1144 
1145     // Run the pipeline.
1146     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), tcu::UVec4(0u));
1147     vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u,
1148                               &descriptorSet.get(), 0u, nullptr);
1149     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
1150     if (needsTwoDrawCalls)
1151     {
1152         // The first draw call will write to the resource and the second one will read from the resource.
1153         PushConstantStruct pcData;
1154 
1155         pcData.writeVal = 1u;
1156         pcData.readVal  = 0u;
1157 
1158         vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), resourceStages, 0u, pcSize, &pcData);
1159         vkd.cmdDrawMeshTasksNV(cmdBuffer, 1u, 0u);
1160 
1161         // Use a barrier between both draw calls. The barrier must be generic because:
1162         //    * VUID-vkCmdPipelineBarrier-bufferMemoryBarrierCount-01178 forbids using buffer barriers inside render passes.
1163         //    * VUID-vkCmdPipelineBarrier-image-04073 forbids using image memory barriers inside render passes with resources that are not attachments.
1164         if (m_params.barrierType == BarrierType::GENERAL)
1165         {
1166             const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1167             vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 1u, &memoryBarrier, 0u, nullptr, 0u,
1168                                    nullptr);
1169         }
1170         else
1171         {
1172             DE_ASSERT(false);
1173         }
1174 
1175         pcData.writeVal = 0u;
1176         pcData.readVal  = 1u;
1177 
1178         vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), resourceStages, 0u, pcSize, &pcData);
1179         vkd.cmdDrawMeshTasksNV(cmdBuffer, 1u, 0u);
1180     }
1181     else
1182     {
1183         PushConstantStruct pcData;
1184         pcData.writeVal = 1u;
1185         pcData.readVal  = 1u;
1186 
1187         vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), resourceStages, 0u, pcSize, &pcData);
1188         vkd.cmdDrawMeshTasksNV(cmdBuffer, 1u, 0u);
1189     }
1190     endRenderPass(vkd, cmdBuffer);
1191 
1192     // If the resource was written to from the shaders, insert the main barrier after running the pipeline.
1193     if (isShaderStage(m_params.fromStage) && !needsTwoDrawCalls)
1194     {
1195         if (m_params.barrierType == BarrierType::GENERAL)
1196         {
1197             const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1198             vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 1u, &memoryBarrier, 0u, nullptr, 0u,
1199                                    nullptr);
1200         }
1201         else if (m_params.barrierType == BarrierType::SPECIFIC)
1202         {
1203             if (useBufferResource)
1204             {
1205                 const auto bufferBarrier =
1206                     makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize);
1207                 vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u,
1208                                        nullptr);
1209             }
1210             else
1211             {
1212                 // Note: the image will only be read from shader stages (which is covered in BarrierType::DEPENDENCY) or from the transfer stage.
1213                 const auto newLayout =
1214                     (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1215                 const auto imageBarrier = makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout,
1216                                                                  newLayout, imageResource->get(), colorSRR);
1217 
1218                 vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 0u, nullptr, 0u, nullptr, 1u,
1219                                        &imageBarrier);
1220                 currentLayout = newLayout;
1221             }
1222         }
1223         // For subpass dependencies, they have already been included in the render pass.
1224     }
1225 
1226     // Read resource from the destination stage if needed.
1227     if (m_params.toStage == Stage::HOST)
1228     {
1229         // Nothing to do. The test value should be in the resource buffer already, which is host-visible.
1230     }
1231     else if (m_params.toStage == Stage::TRANSFER)
1232     {
1233         // Copy value from resource to host-coherent buffer to be verified later.
1234         if (useBufferResource)
1235         {
1236             const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize);
1237             vkd.cmdCopyBuffer(cmdBuffer, bufferResource->get(), hostCoherentBuffer->get(), 1u, &copyRegion);
1238         }
1239         else
1240         {
1241             const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1242             vkd.cmdCopyImageToBuffer(cmdBuffer, imageResource->get(), currentLayout, hostCoherentBuffer->get(), 1u,
1243                                      &copyRegion);
1244         }
1245 
1246         transferToHostMemoryBarrier(vkd, cmdBuffer);
1247     }
1248 
1249     // If the output value will be available in the color buffer, take the chance to transfer its contents to a host-coherent buffer.
1250     BufferWithMemoryPtr colorVerificationBuffer;
1251     void *colorVerificationDataPtr = nullptr;
1252 
1253     if (valueInColorBuffer(m_params.toStage))
1254     {
1255         const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage);
1256         colorVerificationBuffer =
1257             BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo,
1258                                                      (MemoryRequirement::HostVisible | MemoryRequirement::Coherent)));
1259         colorVerificationDataPtr = colorVerificationBuffer->getAllocation().getHostPtr();
1260 
1261         const auto srcAccess = (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1262         const auto dstAccess = VK_ACCESS_TRANSFER_READ_BIT;
1263         const auto colorBarrier =
1264             makeImageMemoryBarrier(srcAccess, dstAccess, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1265                                    VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorBuffer.get(), colorSRR);
1266         vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1267                                0u, 0u, nullptr, 0u, nullptr, 1u, &colorBarrier);
1268 
1269         const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1270         vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1271                                  colorVerificationBuffer->get(), 1u, &copyRegion);
1272 
1273         transferToHostMemoryBarrier(vkd, cmdBuffer);
1274     }
1275 
1276     endCommandBuffer(vkd, cmdBuffer);
1277     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1278 
1279     // Verify output resources as needed.
1280 
1281     if (valueInAuxiliarDestBuffer(m_params.toStage))
1282     {
1283         uint32_t bufferValue;
1284         deMemcpy(&bufferValue, hostCoherentDataPtr, sizeof(bufferValue));
1285 
1286         if (bufferValue != m_params.testValue)
1287         {
1288             std::ostringstream msg;
1289             msg << "Unexpected value in auxiliar host-coherent buffer: found " << bufferValue << " and expected "
1290                 << m_params.testValue;
1291             TCU_FAIL(msg.str());
1292         }
1293     }
1294 
1295     if (valueInResourceBuffer(m_params.toStage))
1296     {
1297         auto &resourceBufferAlloc   = bufferResource->getAllocation();
1298         void *resourceBufferDataPtr = resourceBufferAlloc.getHostPtr();
1299         uint32_t bufferValue;
1300 
1301         invalidateAlloc(vkd, device, resourceBufferAlloc);
1302         deMemcpy(&bufferValue, resourceBufferDataPtr, sizeof(bufferValue));
1303 
1304         if (bufferValue != m_params.testValue)
1305         {
1306             std::ostringstream msg;
1307             msg << "Unexpected value in resource buffer: found " << bufferValue << " and expected "
1308                 << m_params.testValue;
1309             TCU_FAIL(msg.str());
1310         }
1311     }
1312 
1313     if (valueInColorBuffer(m_params.toStage))
1314     {
1315         uint32_t bufferValue;
1316         deMemcpy(&bufferValue, colorVerificationDataPtr, sizeof(bufferValue));
1317 
1318         if (bufferValue != m_params.testValue)
1319         {
1320             std::ostringstream msg;
1321             msg << "Unexpected value in color verification buffer: found " << bufferValue << " and expected "
1322                 << m_params.testValue;
1323             TCU_FAIL(msg.str());
1324         }
1325     }
1326 
1327     return tcu::TestStatus::pass("Pass");
1328 }
1329 
1330 } // namespace
1331 
createMeshShaderSyncTests(tcu::TestContext & testCtx)1332 tcu::TestCaseGroup *createMeshShaderSyncTests(tcu::TestContext &testCtx)
1333 {
1334     const struct
1335     {
1336         Stage fromStage;
1337         Stage toStage;
1338     } stageCombinations[] = {
1339         // Combinations where the source and destination stages involve mesh shaders.
1340         // Note: this could be tested procedurally.
1341         {Stage::HOST, Stage::TASK},     {Stage::HOST, Stage::MESH}, {Stage::TRANSFER, Stage::TASK},
1342         {Stage::TRANSFER, Stage::MESH}, {Stage::TASK, Stage::MESH}, {Stage::TASK, Stage::FRAG},
1343         {Stage::TASK, Stage::TRANSFER}, {Stage::TASK, Stage::HOST}, {Stage::MESH, Stage::FRAG},
1344         {Stage::MESH, Stage::TRANSFER}, {Stage::MESH, Stage::HOST},
1345     };
1346 
1347     const struct
1348     {
1349         ResourceType resourceType;
1350         const char *name;
1351     } resourceTypes[] = {
1352         {ResourceType::UNIFORM_BUFFER, "uniform_buffer"},
1353         {ResourceType::STORAGE_BUFFER, "storage_buffer"},
1354         {ResourceType::STORAGE_IMAGE, "storage_image"},
1355         {ResourceType::SAMPLED_IMAGE, "sampled_image"},
1356     };
1357 
1358     const struct
1359     {
1360         BarrierType barrierType;
1361         const char *name;
1362     } barrierTypes[] = {
1363         {BarrierType::GENERAL, "memory_barrier"},
1364         {BarrierType::SPECIFIC, "specific_barrier"},
1365     };
1366 
1367     const struct
1368     {
1369         WriteAccess writeAccess;
1370         const char *name;
1371     } writeAccesses[] = {
1372         {WriteAccess::HOST_WRITE, "host_write"},
1373         {WriteAccess::TRANSFER_WRITE, "transfer_write"},
1374         {WriteAccess::SHADER_WRITE, "shader_write"},
1375     };
1376 
1377     const struct
1378     {
1379         ReadAccess readAccess;
1380         const char *name;
1381     } readAccesses[] = {
1382         {ReadAccess::HOST_READ, "host_read"},
1383         {ReadAccess::TRANSFER_READ, "transfer_read"},
1384         {ReadAccess::SHADER_READ, "shader_read"},
1385         {ReadAccess::UNIFORM_READ, "uniform_read"},
1386     };
1387 
1388     uint32_t testValue = 1628510124u;
1389 
1390     GroupPtr mainGroup(new tcu::TestCaseGroup(testCtx, "synchronization"));
1391 
1392     for (const auto &stageCombination : stageCombinations)
1393     {
1394         const std::string combinationName =
1395             de::toString(stageCombination.fromStage) + "_to_" + de::toString(stageCombination.toStage);
1396         GroupPtr combinationGroup(new tcu::TestCaseGroup(testCtx, combinationName.c_str()));
1397 
1398         for (const auto &resourceCase : resourceTypes)
1399         {
1400             if (!canWriteTo(stageCombination.fromStage, resourceCase.resourceType))
1401                 continue;
1402 
1403             if (!canReadFrom(stageCombination.toStage, resourceCase.resourceType))
1404                 continue;
1405 
1406             GroupPtr resourceGroup(new tcu::TestCaseGroup(testCtx, resourceCase.name));
1407 
1408             for (const auto &barrierCase : barrierTypes)
1409             {
1410                 // See note above about VUID-vkCmdPipelineBarrier-bufferMemoryBarrierCount-01178 and VUID-vkCmdPipelineBarrier-image-04073.
1411                 if (readAndWriteFromShaders(stageCombination.fromStage, stageCombination.toStage) &&
1412                     barrierCase.barrierType == BarrierType::SPECIFIC)
1413                     continue;
1414 
1415                 GroupPtr barrierGroup(new tcu::TestCaseGroup(testCtx, barrierCase.name));
1416 
1417                 for (const auto &writeCase : writeAccesses)
1418                     for (const auto &readCase : readAccesses)
1419                     {
1420                         if (!canReadResourceAsAccess(resourceCase.resourceType, readCase.readAccess))
1421                             continue;
1422                         if (!canWriteResourceAsAccess(resourceCase.resourceType, writeCase.writeAccess))
1423                             continue;
1424                         if (!canReadFromStageAsAccess(stageCombination.toStage, readCase.readAccess))
1425                             continue;
1426                         if (!canWriteFromStageAsAccess(stageCombination.fromStage, writeCase.writeAccess))
1427                             continue;
1428 
1429                         const std::string accessCaseName = writeCase.name + std::string("_") + readCase.name;
1430 
1431                         const TestParams testParams = {
1432                             stageCombination.fromStage, // Stage fromStage;
1433                             stageCombination.toStage,   // Stage toStage;
1434                             resourceCase.resourceType,  // ResourceType resourceType;
1435                             barrierCase.barrierType,    // BarrierType barrierType;
1436                             writeCase.writeAccess,      // WriteAccess writeAccess;
1437                             readCase.readAccess,        // ReadAccess readAccess;
1438                             testValue++,                // uint32_t testValue;
1439                         };
1440 
1441                         barrierGroup->addChild(new MeshShaderSyncCase(testCtx, accessCaseName, testParams));
1442                     }
1443 
1444                 resourceGroup->addChild(barrierGroup.release());
1445             }
1446 
1447             combinationGroup->addChild(resourceGroup.release());
1448         }
1449 
1450         mainGroup->addChild(combinationGroup.release());
1451     }
1452 
1453     return mainGroup.release();
1454 }
1455 
1456 } // namespace MeshShader
1457 } // namespace vkt
1458