xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSyncTestsEXT.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 The Khronos Group Inc.
6  * Copyright (c) 2021 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Mesh Shader Synchronization Tests for VK_EXT_mesh_shader
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMeshShaderSyncTestsEXT.hpp"
26 #include "vktMeshShaderUtil.hpp"
27 #include "vktTestCase.hpp"
28 
29 #include "vkDefs.hpp"
30 #include "vkTypeUtil.hpp"
31 #include "vkImageWithMemory.hpp"
32 #include "vkBufferWithMemory.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 #include "vkImageUtil.hpp"
38 
39 #include "deUniquePtr.hpp"
40 
41 #include <iostream>
42 #include <sstream>
43 #include <vector>
44 #include <set>
45 
46 namespace vkt
47 {
48 namespace MeshShader
49 {
50 
51 namespace
52 {
53 
54 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
55 
56 using namespace vk;
57 
58 // Stages that will be used in these tests. Shader stages sorted in pipeline order.
59 enum class Stage
60 {
61     HOST = 0,
62     TRANSFER,
63     TASK,
64     MESH,
65     FRAG,
66 };
67 
operator <<(std::ostream & stream,Stage stage)68 std::ostream &operator<<(std::ostream &stream, Stage stage)
69 {
70     switch (stage)
71     {
72     case Stage::HOST:
73         stream << "host";
74         break;
75     case Stage::TRANSFER:
76         stream << "transfer";
77         break;
78     case Stage::TASK:
79         stream << "task";
80         break;
81     case Stage::MESH:
82         stream << "mesh";
83         break;
84     case Stage::FRAG:
85         stream << "frag";
86         break;
87     default:
88         DE_ASSERT(false);
89         break;
90     }
91 
92     return stream;
93 }
94 
isShaderStage(Stage stage)95 bool isShaderStage(Stage stage)
96 {
97     return (stage == Stage::TASK || stage == Stage::MESH || stage == Stage::FRAG);
98 }
99 
stageToFlags(Stage stage)100 VkPipelineStageFlags stageToFlags(Stage stage)
101 {
102     switch (stage)
103     {
104     case Stage::HOST:
105         return VK_PIPELINE_STAGE_HOST_BIT;
106     case Stage::TRANSFER:
107         return VK_PIPELINE_STAGE_TRANSFER_BIT;
108     case Stage::TASK:
109         return VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT;
110     case Stage::MESH:
111         return VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT;
112     case Stage::FRAG:
113         return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
114     default:
115         DE_ASSERT(false);
116         break;
117     }
118 
119     // Unreachable.
120     DE_ASSERT(false);
121     return 0u;
122 }
123 
getImageFormat()124 VkFormat getImageFormat()
125 {
126     return VK_FORMAT_R32_UINT;
127 }
128 
getImageExtent()129 VkExtent3D getImageExtent()
130 {
131     return makeExtent3D(1u, 1u, 1u);
132 }
133 
134 // Types of resources we will use.
135 enum class ResourceType
136 {
137     UNIFORM_BUFFER = 0,
138     STORAGE_BUFFER,
139     STORAGE_IMAGE,
140     SAMPLED_IMAGE,
141 };
142 
resourceTypeToDescriptor(ResourceType resType)143 VkDescriptorType resourceTypeToDescriptor(ResourceType resType)
144 {
145     switch (resType)
146     {
147     case ResourceType::UNIFORM_BUFFER:
148         return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
149     case ResourceType::STORAGE_BUFFER:
150         return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
151     case ResourceType::STORAGE_IMAGE:
152         return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
153     case ResourceType::SAMPLED_IMAGE:
154         return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
155     default:
156         DE_ASSERT(false);
157         break;
158     }
159 
160     // Unreachable.
161     DE_ASSERT(false);
162     return VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR;
163 }
164 
165 // Will the test use a specific barrier or a general memory barrier?
166 enum class BarrierType
167 {
168     GENERAL = 0,
169     SPECIFIC,
170     DEPENDENCY,
171 };
172 
173 // Types of writes we will use.
174 enum class WriteAccess
175 {
176     HOST_WRITE = 0,
177     TRANSFER_WRITE,
178     SHADER_WRITE,
179 };
180 
writeAccessToFlags(WriteAccess access)181 VkAccessFlags writeAccessToFlags(WriteAccess access)
182 {
183     switch (access)
184     {
185     case WriteAccess::HOST_WRITE:
186         return VK_ACCESS_HOST_WRITE_BIT;
187     case WriteAccess::TRANSFER_WRITE:
188         return VK_ACCESS_TRANSFER_WRITE_BIT;
189     case WriteAccess::SHADER_WRITE:
190         return VK_ACCESS_SHADER_WRITE_BIT;
191     default:
192         DE_ASSERT(false);
193         break;
194     }
195 
196     // Unreachable.
197     DE_ASSERT(false);
198     return 0u;
199 }
200 
201 // Types of reads we will use.
202 enum class ReadAccess
203 {
204     HOST_READ = 0,
205     TRANSFER_READ,
206     SHADER_READ,
207     UNIFORM_READ,
208 };
209 
readAccessToFlags(ReadAccess access)210 VkAccessFlags readAccessToFlags(ReadAccess access)
211 {
212     switch (access)
213     {
214     case ReadAccess::HOST_READ:
215         return VK_ACCESS_HOST_READ_BIT;
216     case ReadAccess::TRANSFER_READ:
217         return VK_ACCESS_TRANSFER_READ_BIT;
218     case ReadAccess::SHADER_READ:
219         return VK_ACCESS_SHADER_READ_BIT;
220     case ReadAccess::UNIFORM_READ:
221         return VK_ACCESS_UNIFORM_READ_BIT;
222     default:
223         DE_ASSERT(false);
224         break;
225     }
226 
227     // Unreachable.
228     DE_ASSERT(false);
229     return 0u;
230 }
231 
232 // Auxiliary functions to verify certain combinations are possible.
233 
234 // Check if the writing stage can use the specified write access.
canWriteFromStageAsAccess(Stage writeStage,WriteAccess access)235 bool canWriteFromStageAsAccess(Stage writeStage, WriteAccess access)
236 {
237     switch (writeStage)
238     {
239     case Stage::HOST:
240         return (access == WriteAccess::HOST_WRITE);
241     case Stage::TRANSFER:
242         return (access == WriteAccess::TRANSFER_WRITE);
243     case Stage::TASK: // fallthrough
244     case Stage::MESH: // fallthrough
245     case Stage::FRAG:
246         return (access == WriteAccess::SHADER_WRITE);
247     default:
248         DE_ASSERT(false);
249         break;
250     }
251 
252     return false;
253 }
254 
255 // Check if the reading stage can use the specified read access.
canReadFromStageAsAccess(Stage readStage,ReadAccess access)256 bool canReadFromStageAsAccess(Stage readStage, ReadAccess access)
257 {
258     switch (readStage)
259     {
260     case Stage::HOST:
261         return (access == ReadAccess::HOST_READ);
262     case Stage::TRANSFER:
263         return (access == ReadAccess::TRANSFER_READ);
264     case Stage::TASK: // fallthrough
265     case Stage::MESH: // fallthrough
266     case Stage::FRAG:
267         return (access == ReadAccess::SHADER_READ || access == ReadAccess::UNIFORM_READ);
268     default:
269         DE_ASSERT(false);
270         break;
271     }
272 
273     return false;
274 }
275 
276 // Check if reading the given resource type is possible with the given type of read access.
canReadResourceAsAccess(ResourceType resType,ReadAccess access)277 bool canReadResourceAsAccess(ResourceType resType, ReadAccess access)
278 {
279     if (access == ReadAccess::UNIFORM_READ)
280         return (resType == ResourceType::UNIFORM_BUFFER);
281     return true;
282 }
283 
284 // Check if writing to the given resource type is possible with the given type of write access.
canWriteResourceAsAccess(ResourceType resType,WriteAccess access)285 bool canWriteResourceAsAccess(ResourceType resType, WriteAccess access)
286 {
287     if (resType == ResourceType::UNIFORM_BUFFER)
288         return (access != WriteAccess::SHADER_WRITE);
289     return true;
290 }
291 
292 // Check if the given stage can write to the given resource type.
canWriteTo(Stage stage,ResourceType resType)293 bool canWriteTo(Stage stage, ResourceType resType)
294 {
295     switch (stage)
296     {
297     case Stage::HOST:
298         return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER);
299     case Stage::TRANSFER:
300         return true;
301     case Stage::TASK: // fallthrough
302     case Stage::MESH: // fallthrough
303     case Stage::FRAG:
304         return (resType == ResourceType::STORAGE_BUFFER || resType == ResourceType::STORAGE_IMAGE);
305     default:
306         DE_ASSERT(false);
307         break;
308     }
309 
310     return false;
311 }
312 
313 // Check if the given stage can read from the given resource type.
canReadFrom(Stage stage,ResourceType resType)314 bool canReadFrom(Stage stage, ResourceType resType)
315 {
316     switch (stage)
317     {
318     case Stage::HOST:
319         return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER);
320     case Stage::TRANSFER: // fallthrough
321     case Stage::TASK:     // fallthrough
322     case Stage::MESH:     // fallthrough
323     case Stage::FRAG:
324         return true;
325     default:
326         DE_ASSERT(false);
327         break;
328     }
329 
330     return false;
331 }
332 
333 // Will we need to store the test value in an auxiliar buffer to be read?
needsAuxiliarSourceBuffer(Stage fromStage,Stage toStage)334 bool needsAuxiliarSourceBuffer(Stage fromStage, Stage toStage)
335 {
336     DE_UNREF(toStage);
337     return (fromStage == Stage::TRANSFER);
338 }
339 
340 // Will we need to store the read operation result into an auxiliar buffer to be checked?
needsAuxiliarDestBuffer(Stage fromStage,Stage toStage)341 bool needsAuxiliarDestBuffer(Stage fromStage, Stage toStage)
342 {
343     DE_UNREF(fromStage);
344     return (toStage == Stage::TRANSFER);
345 }
346 
347 // Needs any auxiliar buffer for any case?
needsAuxiliarBuffer(Stage fromStage,Stage toStage)348 bool needsAuxiliarBuffer(Stage fromStage, Stage toStage)
349 {
350     return (needsAuxiliarSourceBuffer(fromStage, toStage) || needsAuxiliarDestBuffer(fromStage, toStage));
351 }
352 
353 // Will the final value be stored in the auxiliar destination buffer?
valueInAuxiliarDestBuffer(Stage toStage)354 bool valueInAuxiliarDestBuffer(Stage toStage)
355 {
356     return (toStage == Stage::TRANSFER);
357 }
358 
359 // Will the final value be stored in the resource buffer itself?
valueInResourceBuffer(Stage toStage)360 bool valueInResourceBuffer(Stage toStage)
361 {
362     return (toStage == Stage::HOST);
363 }
364 
365 // Will the final value be stored in the color buffer?
valueInColorBuffer(Stage toStage)366 bool valueInColorBuffer(Stage toStage)
367 {
368     return (!valueInAuxiliarDestBuffer(toStage) && !valueInResourceBuffer(toStage));
369 }
370 
371 // Image usage flags for the image resource.
resourceImageUsageFlags(ResourceType resourceType)372 VkImageUsageFlags resourceImageUsageFlags(ResourceType resourceType)
373 {
374     VkImageUsageFlags flags = (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
375 
376     switch (resourceType)
377     {
378     case ResourceType::STORAGE_IMAGE:
379         flags |= VK_IMAGE_USAGE_STORAGE_BIT;
380         break;
381     case ResourceType::SAMPLED_IMAGE:
382         flags |= VK_IMAGE_USAGE_SAMPLED_BIT;
383         break;
384     default:
385         DE_ASSERT(false);
386         break;
387     }
388 
389     return flags;
390 }
391 
392 // Buffer usage flags for the buffer resource.
resourceBufferUsageFlags(ResourceType resourceType)393 VkBufferUsageFlags resourceBufferUsageFlags(ResourceType resourceType)
394 {
395     VkBufferUsageFlags flags = (VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
396 
397     switch (resourceType)
398     {
399     case ResourceType::UNIFORM_BUFFER:
400         flags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
401         break;
402     case ResourceType::STORAGE_BUFFER:
403         flags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
404         break;
405     default:
406         DE_ASSERT(false);
407         break;
408     }
409 
410     return flags;
411 }
412 
413 // Returns true if both the write and read stages are shader stages.
fromShaderToShader(Stage fromStage,Stage toStage)414 bool fromShaderToShader(Stage fromStage, Stage toStage)
415 {
416     return (isShaderStage(fromStage) && isShaderStage(toStage));
417 }
418 
419 // Supposing we'll use two subpasses, decide the stages of a subpass based on the mandatory stages and the one we're interested in.
subpassStages(Stage wantedStage,bool lastSubpass)420 std::vector<Stage> subpassStages(Stage wantedStage, bool lastSubpass)
421 {
422     std::set<Stage> stages;
423     stages.insert(wantedStage);
424     stages.insert(Stage::MESH); // This one is mandatory.
425     if (lastSubpass)
426         stages.insert(Stage::FRAG); // In the last subpass we always need a fragment shader (passthrough).
427     return std::vector<Stage>(begin(stages), end(stages));
428 }
429 
430 // Is the task shader in the list?
hasTask(const std::vector<Stage> & stages)431 bool hasTask(const std::vector<Stage> &stages)
432 {
433     return de::contains(begin(stages), end(stages), Stage::TASK);
434 }
435 
436 // Is the frag shader in the list?
hasFrag(const std::vector<Stage> & stages)437 bool hasFrag(const std::vector<Stage> &stages)
438 {
439     return de::contains(begin(stages), end(stages), Stage::FRAG);
440 }
441 
442 struct TestParams
443 {
444     Stage fromStage;
445     Stage toStage;
446     ResourceType resourceType;
447     BarrierType barrierType;
448     WriteAccess writeAccess;
449     ReadAccess readAccess;
450     uint32_t testValue;
451 
452 protected:
readsOrWritesInvkt::MeshShader::__anon0d35f4610111::TestParams453     bool readsOrWritesIn(Stage stage) const
454     {
455         DE_ASSERT(fromStage != toStage);
456         return (fromStage == stage || toStage == stage);
457     }
458 
459 public:
needsTaskvkt::MeshShader::__anon0d35f4610111::TestParams460     bool needsTask() const
461     {
462         return readsOrWritesIn(Stage::TASK);
463     }
464 
readsOrWritesInMeshvkt::MeshShader::__anon0d35f4610111::TestParams465     bool readsOrWritesInMesh() const
466     {
467         return readsOrWritesIn(Stage::MESH);
468     }
469 
getResourceDeclvkt::MeshShader::__anon0d35f4610111::TestParams470     std::string getResourceDecl() const
471     {
472         const auto imgFormat     = ((resourceType == ResourceType::STORAGE_IMAGE) ? ", r32ui" : "");
473         const auto storagePrefix = ((writeAccess == WriteAccess::SHADER_WRITE) ? "" : "readonly ");
474         std::ostringstream decl;
475 
476         decl << "layout (set=0, binding=0" << imgFormat << ") ";
477         switch (resourceType)
478         {
479         case ResourceType::UNIFORM_BUFFER:
480             decl << "uniform UniformBuffer { uint value; } ub;";
481             break;
482         case ResourceType::STORAGE_BUFFER:
483             decl << storagePrefix << "buffer StorageBuffer { uint value; } sb;";
484             break;
485         case ResourceType::STORAGE_IMAGE:
486             decl << storagePrefix << "uniform uimage2D si;";
487             break;
488         case ResourceType::SAMPLED_IMAGE:
489             decl << "uniform usampler2D sampled;";
490             break;
491         default:
492             DE_ASSERT(false);
493             break;
494         }
495 
496         decl << "\n";
497         return decl.str();
498     }
499 
500     struct PushConstantStruct
501     {
502         uint32_t writeVal;
503         uint32_t readVal;
504     };
505 
506     // Get declaration for the "pc" push constant block. Must match the structure above.
getPushConstantDeclvkt::MeshShader::__anon0d35f4610111::TestParams507     std::string getPushConstantDecl() const
508     {
509         std::ostringstream pc;
510         pc << "layout (push_constant, std430) uniform PushConstantBlock {\n"
511            << "    uint writeVal;\n"
512            << "    uint readVal;\n"
513            << "} pc;\n";
514         return pc.str();
515     }
516 
getReadStatementvkt::MeshShader::__anon0d35f4610111::TestParams517     std::string getReadStatement(const std::string &outName) const
518     {
519         std::ostringstream statement;
520         statement << "    if (pc.readVal > 0u) { " << outName << " = ";
521 
522         switch (resourceType)
523         {
524         case ResourceType::UNIFORM_BUFFER:
525             statement << "ub.value";
526             break;
527         case ResourceType::STORAGE_BUFFER:
528             statement << "sb.value";
529             break;
530         case ResourceType::STORAGE_IMAGE:
531             statement << "imageLoad(si, ivec2(0, 0)).x";
532             break;
533         case ResourceType::SAMPLED_IMAGE:
534             statement << "texture(sampled, vec2(0.5, 0.5)).x";
535             break;
536         default:
537             DE_ASSERT(false);
538             break;
539         }
540 
541         statement << "; }\n";
542         return statement.str();
543     }
544 
getWriteStatementvkt::MeshShader::__anon0d35f4610111::TestParams545     std::string getWriteStatement(const std::string &valueName) const
546     {
547         std::ostringstream statement;
548         statement << "    if (pc.writeVal > 0u) { ";
549 
550         switch (resourceType)
551         {
552         case ResourceType::STORAGE_BUFFER:
553             statement << "sb.value = " << valueName;
554             break;
555         case ResourceType::STORAGE_IMAGE:
556             statement << "imageStore(si, ivec2(0, 0), uvec4(" << valueName << ", 0, 0, 0))";
557             break;
558         case ResourceType::UNIFORM_BUFFER: // fallthrough
559         case ResourceType::SAMPLED_IMAGE:  // fallthrough
560         default:
561             DE_ASSERT(false);
562             break;
563         }
564 
565         statement << "; }\n";
566         return statement.str();
567     }
568 
getResourceShaderStagesvkt::MeshShader::__anon0d35f4610111::TestParams569     VkShaderStageFlags getResourceShaderStages() const
570     {
571         VkShaderStageFlags flags = 0u;
572 
573         if (fromStage == Stage::TASK || toStage == Stage::TASK)
574             flags |= VK_SHADER_STAGE_TASK_BIT_EXT;
575         if (fromStage == Stage::MESH || toStage == Stage::MESH)
576             flags |= VK_SHADER_STAGE_MESH_BIT_EXT;
577         if (fromStage == Stage::FRAG || toStage == Stage::FRAG)
578             flags |= VK_SHADER_STAGE_FRAGMENT_BIT;
579 
580         // We assume at least something must be done either on the task or mesh shaders for the tests to be interesting.
581         DE_ASSERT((flags & (VK_SHADER_STAGE_TASK_BIT_EXT | VK_SHADER_STAGE_MESH_BIT_EXT)) != 0u);
582         return flags;
583     }
584 
585     // We'll prefer to keep the image in the general layout if it will be written to from a shader stage or if the barrier is going to be a generic memory barrier.
preferGeneralLayoutvkt::MeshShader::__anon0d35f4610111::TestParams586     bool preferGeneralLayout() const
587     {
588         return (isShaderStage(fromStage) || (barrierType == BarrierType::GENERAL) ||
589                 (resourceType == ResourceType::STORAGE_IMAGE));
590     }
591 
592     // We need two pipelines if both the writing and reading stage are shaders, and either:
593     // - The writing stage comes after the reading stage in the pipeline.
594     // - The barrier to use is not a dependency.
needsTwoPipelinesvkt::MeshShader::__anon0d35f4610111::TestParams595     bool needsTwoPipelines() const
596     {
597         return (fromShaderToShader(fromStage, toStage) &&
598                 (static_cast<int>(fromStage) >= static_cast<int>(toStage) || barrierType != BarrierType::DEPENDENCY));
599     }
600 
601     // We need to use generic barriers when using subpass self-dependencies (single subpass and pipeline).
602     // Note: barrierType == BarrierType::DEPENDENCY is technically redundant with !needsTwoPipelines().
subpassSelfDependencyvkt::MeshShader::__anon0d35f4610111::TestParams603     bool subpassSelfDependency() const
604     {
605         return (fromShaderToShader(fromStage, toStage) && barrierType == BarrierType::DEPENDENCY &&
606                 !needsTwoPipelines());
607     }
608 };
609 
610 class MeshShaderSyncCase : public vkt::TestCase
611 {
612 public:
MeshShaderSyncCase(tcu::TestContext & testCtx,const std::string & name,const TestParams & params)613     MeshShaderSyncCase(tcu::TestContext &testCtx, const std::string &name, const TestParams &params)
614         : vkt::TestCase(testCtx, name)
615         , m_params(params)
616     {
617     }
618 
~MeshShaderSyncCase(void)619     virtual ~MeshShaderSyncCase(void)
620     {
621     }
622 
623     void checkSupport(Context &context) const override;
624     void initPrograms(vk::SourceCollections &programCollection) const override;
625     TestInstance *createInstance(Context &context) const override;
626 
627 protected:
628     TestParams m_params;
629 };
630 
631 class MeshShaderSyncInstance : public vkt::TestInstance
632 {
633 public:
MeshShaderSyncInstance(Context & context,const TestParams & params)634     MeshShaderSyncInstance(Context &context, const TestParams &params) : vkt::TestInstance(context), m_params(params)
635     {
636     }
~MeshShaderSyncInstance(void)637     virtual ~MeshShaderSyncInstance(void)
638     {
639     }
640 
641     tcu::TestStatus iterate(void) override;
642 
643 protected:
644     TestParams m_params;
645 };
646 
checkSupport(Context & context) const647 void MeshShaderSyncCase::checkSupport(Context &context) const
648 {
649     checkTaskMeshShaderSupportEXT(context, m_params.needsTask(), true);
650 
651     if (m_params.writeAccess == WriteAccess::SHADER_WRITE)
652     {
653         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
654     }
655 }
656 
initPrograms(vk::SourceCollections & programCollection) const657 void MeshShaderSyncCase::initPrograms(vk::SourceCollections &programCollection) const
658 {
659     const auto buildOptions    = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
660     const bool needsTaskShader = m_params.needsTask();
661     const auto valueStr        = de::toString(m_params.testValue);
662     const auto resourceDecl    = m_params.getResourceDecl();
663     const auto pcDecl          = m_params.getPushConstantDecl();
664     const std::string tdDecl   = "struct TaskData { uint value; }; taskPayloadSharedEXT TaskData td;\n";
665 
666     if (needsTaskShader)
667     {
668         std::ostringstream task;
669         task << "#version 450\n"
670              << "#extension GL_EXT_mesh_shader : enable\n"
671              << "\n"
672              << "layout(local_size_x=1) in;\n"
673              << "\n"
674              << tdDecl << "\n"
675              << resourceDecl << pcDecl << "\n"
676              << "void main ()\n"
677              << "{\n"
678              << "    td.value = 0u;\n"
679              << ((m_params.fromStage == Stage::TASK) ? m_params.getWriteStatement(valueStr) : "")
680              << ((m_params.toStage == Stage::TASK) ? m_params.getReadStatement("td.value") : "")
681              << "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
682              << "}\n";
683         programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
684     }
685 
686     {
687         // In the mesh-to-task case, we need non-passthrough mesh and task shaders but the mesh shader doesn't have a previous task shader.
688         // In the task-to-mesh case, the second pipeline will have the main mesh shader but no previous task shader either.
689         const bool prevTaskInMainMesh =
690             (needsTaskShader && !(m_params.fromStage == Stage::MESH && m_params.toStage == Stage::TASK) &&
691              !(m_params.fromStage == Stage::TASK && m_params.toStage == Stage::MESH));
692         const bool rwInMeshStage = m_params.readsOrWritesInMesh();
693 
694         std::ostringstream mesh;
695         mesh << "#version 450\n"
696              << "#extension GL_EXT_mesh_shader : enable\n"
697              << "\n"
698              << "layout(local_size_x=1) in;\n"
699              << "layout(triangles) out;\n"
700              << "layout(max_vertices=3, max_primitives=1) out;\n"
701              << "\n"
702              << (prevTaskInMainMesh ? tdDecl : "") << "layout (location=0) out perprimitiveEXT uint primitiveValue[];\n"
703              << "\n"
704              << (rwInMeshStage ? resourceDecl : "") << (rwInMeshStage ? pcDecl : "") << "\n"
705              << "void main ()\n"
706              << "{\n"
707              << "    SetMeshOutputsEXT(3u, 1u);\n"
708              << (prevTaskInMainMesh ? "    primitiveValue[0] = td.value;\n" : "")
709              << ((m_params.fromStage == Stage::MESH) ? m_params.getWriteStatement(valueStr) : "")
710              << ((m_params.toStage == Stage::MESH) ? m_params.getReadStatement("primitiveValue[0]") : "") << "\n"
711              << "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
712              << "    gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
713              << "    gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
714              << "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
715              << "}\n";
716         programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
717     }
718 
719     {
720         const bool readFromFrag  = (m_params.toStage == Stage::FRAG);
721         const bool writeFromFrag = (m_params.fromStage == Stage::FRAG);
722         const bool rwInFragStage = (readFromFrag || writeFromFrag);
723         std::ostringstream frag;
724 
725         frag << "#version 450\n"
726              << "#extension GL_EXT_mesh_shader : enable\n"
727              << "\n"
728              << "layout (location=0) in perprimitiveEXT flat uint primitiveValue;\n"
729              << "layout (location=0) out uvec4 outColor;\n"
730              << "\n"
731              << (rwInFragStage ? resourceDecl : "") << (rwInFragStage ? pcDecl : "") << "\n"
732              << "void main ()\n"
733              << "{\n"
734              << "    outColor = uvec4(primitiveValue, 0, 0, 0);\n"
735              << "    uint readVal = 0u;\n"
736              << (readFromFrag ? m_params.getReadStatement("readVal") : "")
737              << (readFromFrag ? "    outColor = uvec4(readVal, 0, 0, 0);\n" : "")
738              << (writeFromFrag ? m_params.getWriteStatement(valueStr) : "") << "}\n";
739         programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
740     }
741 
742     // Passthrough shaders.
743     {
744         const std::string task = "#version 450\n"
745                                  "#extension GL_EXT_mesh_shader : enable\n"
746                                  "\n"
747                                  "layout(local_size_x=1) in;\n"
748                                  "\n" +
749                                  tdDecl +
750                                  "\n"
751                                  "void main ()\n"
752                                  "{\n"
753                                  "    td.value = 0u;\n"
754                                  "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
755                                  "}\n";
756         programCollection.glslSources.add("taskPassthrough") << glu::TaskSource(task) << buildOptions;
757 
758         const std::string frag = "#version 450\n"
759                                  "#extension GL_EXT_mesh_shader : enable\n"
760                                  "\n"
761                                  "layout (location=0) in perprimitiveEXT flat uint primitiveValue;\n"
762                                  "layout (location=0) out uvec4 outColor;\n"
763                                  "\n"
764                                  "void main ()\n"
765                                  "{\n"
766                                  "    outColor = uvec4(primitiveValue, 0, 0, 0);\n"
767                                  "}\n";
768         programCollection.glslSources.add("fragPassthrough") << glu::FragmentSource(frag) << buildOptions;
769 
770         for (int i = 0; i < 2; ++i)
771         {
772             const bool prevTask          = (i > 0);
773             const std::string nameSuffix = (prevTask ? "WithTask" : "");
774             const std::string mesh       = "#version 450\n"
775                                            "#extension GL_EXT_mesh_shader : enable\n"
776                                            "\n"
777                                            "layout(local_size_x=1) in;\n"
778                                            "layout(triangles) out;\n"
779                                            "layout(max_vertices=3, max_primitives=1) out;\n"
780                                            "\n" +
781                                      (prevTask ? tdDecl : "") +
782                                      "layout (location=0) out perprimitiveEXT uint primitiveValue[];\n"
783                                      "\n"
784                                      "void main ()\n"
785                                      "{\n"
786                                      "    SetMeshOutputsEXT(3u, 1u);\n"
787                                      "    " +
788                                      (prevTask ? "primitiveValue[0] = td.value;" : "primitiveValue[0] = 0u;") +
789                                      "\n"
790                                      "\n"
791                                      "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
792                                      "    gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
793                                      "    gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
794                                      "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
795                                      "}\n";
796             programCollection.glslSources.add("meshPassthrough" + nameSuffix) << glu::MeshSource(mesh) << buildOptions;
797         }
798     }
799 }
800 
createInstance(Context & context) const801 TestInstance *MeshShaderSyncCase::createInstance(Context &context) const
802 {
803     return new MeshShaderSyncInstance(context, m_params);
804 }
805 
806 // General description behind these tests.
807 //
808 //    From                To
809 // ==============================
810 //    HOST                TASK            Prepare buffer from host. Only valid for uniform and storage buffers. Read value from task into td.value. Verify color buffer.
811 //    HOST                MESH            Same situation. Read value from mesh into primitiveValue[0]. Verify color buffer.
812 //    TRANSFER            TASK            Prepare auxiliary host-coherent source buffer from host. Copy buffer to buffer or buffer to image. Read from task into td.value. Verify color buffer.
813 //    TRANSFER            MESH            Same initial steps. Read from mesh into primitiveValue[0]. Verify color buffer.
814 //    TASK                MESH            Write value to buffer or image from task shader. Only valid for storage buffers and images. Read from mesh into primitiveValue[0]. Verify color buffer.
815 //    TASK                FRAG            Same write procedure and restrictions. Read from frag into outColor. Verify color buffer.
816 //    TASK                TRANSFER        Same write procedure and restrictions. Prepare auxiliary host-coherent read buffer and copy buffer to buffer or image to buffer. Verify auxiliary buffer.
817 //    TASK                HOST            Due to From/To restrictions, only valid for storage buffers. Same write procedure. Read and verify buffer directly.
818 //    MESH                FRAG            Same as task to frag but the write instructions need to be in the mesh shader.
819 //    MESH                TRANSFER        Same as task to transfer but the write instructions need to be in the mesh shader.
820 //    MESH                HOST            Same as task to host but the write instructions need to be in the mesh shader.
821 //
822 //    The following cases require two pipelines
823 // =========================================
824 //    MESH                TASK            Write value to buffer or image from mesh shader. Only valid for storage buffers and images. Read from task into td.value. Verify color buffer.
825 //        Sequence: mesh, task, mesh*, frag*.
826 //    FRAG                TASK            Same as mesh to task, but writing from the first fragment shader.
827 //        Sequence: mesh*, frag, task, mesh*, frag*.
828 //    FRAG                MESH            Similar to frag to task, but reading from mesh into primitiveValue[0]. Verify color buffer after second fragment shader.
829 //        Sequence: mesh*, frag, mesh, frag*.
830 //
831 
832 // Create one or two render passes with the right dependencies depending on the test parameters.
createCustomRenderPasses(const DeviceInterface & vkd,VkDevice device,VkFormat colorFormat,const TestParams & params)833 std::vector<Move<VkRenderPass>> createCustomRenderPasses(const DeviceInterface &vkd, VkDevice device,
834                                                          VkFormat colorFormat, const TestParams &params)
835 {
836     std::vector<Move<VkRenderPass>> renderPasses;
837     const bool useDependencies     = (params.barrierType == BarrierType::DEPENDENCY);
838     const bool twoPipelines        = params.needsTwoPipelines();
839     const bool twoSubpasses        = (twoPipelines && useDependencies);
840     const uint32_t pipelineCount   = (twoPipelines ? 2u : 1u);
841     const uint32_t subpassCount    = (twoSubpasses ? 2u : 1u);
842     const uint32_t renderPassCount = ((twoPipelines && !twoSubpasses) ? 2u : 1u);
843 
844     const std::vector<VkAttachmentDescription> attachmentDescs = {{
845         0u,                                       // VkAttachmentDescriptionFlags flags;
846         colorFormat,                              // VkFormat format;
847         VK_SAMPLE_COUNT_1_BIT,                    // VkSampleCountFlagBits samples;
848         VK_ATTACHMENT_LOAD_OP_CLEAR,              // VkAttachmentLoadOp loadOp;
849         VK_ATTACHMENT_STORE_OP_STORE,             // VkAttachmentStoreOp storeOp;
850         VK_ATTACHMENT_LOAD_OP_DONT_CARE,          // VkAttachmentLoadOp stencilLoadOp;
851         VK_ATTACHMENT_STORE_OP_DONT_CARE,         // VkAttachmentStoreOp stencilStoreOp;
852         VK_IMAGE_LAYOUT_UNDEFINED,                // VkImageLayout initialLayout;
853         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout;
854     }};
855 
856     const std::vector<VkAttachmentReference> attachmentRefs = {{0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}};
857 
858     // One or two identical subpasses.
859     const VkSubpassDescription subpassDesc = {
860         0u,                                           // VkSubpassDescriptionFlags flags;
861         VK_PIPELINE_BIND_POINT_GRAPHICS,              // VkPipelineBindPoint pipelineBindPoint;
862         0u,                                           // uint32_t inputAttachmentCount;
863         nullptr,                                      // const VkAttachmentReference* pInputAttachments;
864         static_cast<uint32_t>(attachmentRefs.size()), // uint32_t colorAttachmentCount;
865         de::dataOrNull(attachmentRefs),               // const VkAttachmentReference* pColorAttachments;
866         nullptr,                                      // const VkAttachmentReference* pResolveAttachments;
867         nullptr,                                      // const VkAttachmentReference* pDepthStencilAttachment;
868         0u,                                           // uint32_t preserveAttachmentCount;
869         nullptr,                                      // const uint32_t* pPreserveAttachments;
870     };
871 
872     const std::vector<VkSubpassDescription> subpassDescs(subpassCount, subpassDesc);
873 
874     std::vector<VkSubpassDependency> dependencies;
875     if (fromShaderToShader(params.fromStage, params.toStage) && useDependencies)
876     {
877         const VkSubpassDependency dependency = {
878             0u,                                     // uint32_t srcSubpass;
879             pipelineCount - 1u,                     // uint32_t dstSubpass;
880             stageToFlags(params.fromStage),         // VkPipelineStageFlags srcStageMask;
881             stageToFlags(params.toStage),           // VkPipelineStageFlags dstStageMask;
882             writeAccessToFlags(params.writeAccess), // VkAccessFlags srcAccessMask;
883             readAccessToFlags(params.readAccess),   // VkAccessFlags dstAccessMask;
884             0u,                                     // VkDependencyFlags dependencyFlags;
885         };
886         dependencies.push_back(dependency);
887     }
888 
889     const VkRenderPassCreateInfo createInfo = {
890         VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,     // VkStructureType sType;
891         nullptr,                                       // const void* pNext;
892         0u,                                            // VkRenderPassCreateFlags flags;
893         static_cast<uint32_t>(attachmentDescs.size()), // uint32_t attachmentCount;
894         de::dataOrNull(attachmentDescs),               // const VkAttachmentDescription* pAttachments;
895         static_cast<uint32_t>(subpassDescs.size()),    // uint32_t subpassCount;
896         de::dataOrNull(subpassDescs),                  // const VkSubpassDescription* pSubpasses;
897         static_cast<uint32_t>(dependencies.size()),    // uint32_t dependencyCount;
898         de::dataOrNull(dependencies),                  // const VkSubpassDependency* pDependencies;
899     };
900 
901     for (uint32_t renderPassIdx = 0u; renderPassIdx < renderPassCount; ++renderPassIdx)
902         renderPasses.push_back(createRenderPass(vkd, device, &createInfo));
903 
904     return renderPasses;
905 }
906 
hostToTransferMemoryBarrier(const DeviceInterface & vkd,VkCommandBuffer cmdBuffer)907 void hostToTransferMemoryBarrier(const DeviceInterface &vkd, VkCommandBuffer cmdBuffer)
908 {
909     const auto barrier = makeMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
910     cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &barrier);
911 }
912 
transferToHostMemoryBarrier(const DeviceInterface & vkd,VkCommandBuffer cmdBuffer)913 void transferToHostMemoryBarrier(const DeviceInterface &vkd, VkCommandBuffer cmdBuffer)
914 {
915     const auto barrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
916     cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &barrier);
917 }
918 
iterate(void)919 tcu::TestStatus MeshShaderSyncInstance::iterate(void)
920 {
921     const auto &vkd       = m_context.getDeviceInterface();
922     const auto device     = m_context.getDevice();
923     auto &alloc           = m_context.getDefaultAllocator();
924     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
925     const auto queue      = m_context.getUniversalQueue();
926 
927     const auto imageFormat         = getImageFormat();
928     const auto imageExtent         = getImageExtent();
929     const auto colorBufferUsage    = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
930     const auto colorSRR            = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
931     const auto colorSRL            = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
932     const auto bufferSize          = static_cast<VkDeviceSize>(sizeof(m_params.testValue));
933     const auto descriptorType      = resourceTypeToDescriptor(m_params.resourceType);
934     const auto resourceStages      = m_params.getResourceShaderStages();
935     const auto auxiliarBufferUsage = (VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
936     const auto useGeneralLayout    = m_params.preferGeneralLayout();
937 
938     const auto writeAccessFlags = writeAccessToFlags(m_params.writeAccess);
939     const auto readAccessFlags  = readAccessToFlags(m_params.readAccess);
940     const auto fromStageFlags   = stageToFlags(m_params.fromStage);
941     const auto toStageFlags     = stageToFlags(m_params.toStage);
942 
943     // Prepare color buffer.
944     const VkImageCreateInfo colorBufferCreateInfo = {
945         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
946         nullptr,                             // const void* pNext;
947         0u,                                  // VkImageCreateFlags flags;
948         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
949         imageFormat,                         // VkFormat format;
950         imageExtent,                         // VkExtent3D extent;
951         1u,                                  // uint32_t mipLevels;
952         1u,                                  // uint32_t arrayLayers;
953         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
954         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
955         colorBufferUsage,                    // VkImageUsageFlags usage;
956         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
957         0u,                                  // uint32_t queueFamilyIndexCount;
958         nullptr,                             // const uint32_t* pQueueFamilyIndices;
959         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
960     };
961     ImageWithMemory colorBuffer(vkd, device, alloc, colorBufferCreateInfo, MemoryRequirement::Any);
962     const auto colorBufferView =
963         makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
964 
965     // Main resource.
966     using ImageWithMemoryPtr  = de::MovePtr<ImageWithMemory>;
967     using BufferWithMemoryPtr = de::MovePtr<BufferWithMemory>;
968 
969     ImageWithMemoryPtr imageResource;
970     Move<VkImageView> imageResourceView;
971     VkImageLayout imageDescriptorLayout =
972         (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
973     VkImageLayout currentLayout = VK_IMAGE_LAYOUT_UNDEFINED;
974     BufferWithMemoryPtr bufferResource;
975 
976     bool useImageResource  = false;
977     bool useBufferResource = false;
978 
979     switch (m_params.resourceType)
980     {
981     case ResourceType::UNIFORM_BUFFER:
982     case ResourceType::STORAGE_BUFFER:
983         useBufferResource = true;
984         break;
985     case ResourceType::STORAGE_IMAGE:
986     case ResourceType::SAMPLED_IMAGE:
987         useImageResource = true;
988         break;
989     default:
990         DE_ASSERT(false);
991         break;
992     }
993 
994     // One resource needed.
995     DE_ASSERT(useImageResource != useBufferResource);
996 
997     if (useImageResource)
998     {
999         const auto resourceImageUsage = resourceImageUsageFlags(m_params.resourceType);
1000 
1001         const VkImageCreateInfo resourceCreateInfo = {
1002             VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1003             nullptr,                             // const void* pNext;
1004             0u,                                  // VkImageCreateFlags flags;
1005             VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
1006             imageFormat,                         // VkFormat format;
1007             imageExtent,                         // VkExtent3D extent;
1008             1u,                                  // uint32_t mipLevels;
1009             1u,                                  // uint32_t arrayLayers;
1010             VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
1011             VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
1012             resourceImageUsage,                  // VkImageUsageFlags usage;
1013             VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
1014             0u,                                  // uint32_t queueFamilyIndexCount;
1015             nullptr,                             // const uint32_t* pQueueFamilyIndices;
1016             VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
1017         };
1018         imageResource =
1019             ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, resourceCreateInfo, MemoryRequirement::Any));
1020         imageResourceView =
1021             makeImageView(vkd, device, imageResource->get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
1022     }
1023     else
1024     {
1025         const auto resourceBufferUsage      = resourceBufferUsageFlags(m_params.resourceType);
1026         const auto resourceBufferCreateInfo = makeBufferCreateInfo(bufferSize, resourceBufferUsage);
1027         bufferResource                      = BufferWithMemoryPtr(
1028             new BufferWithMemory(vkd, device, alloc, resourceBufferCreateInfo, MemoryRequirement::HostVisible));
1029     }
1030 
1031     Move<VkSampler> sampler;
1032     if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
1033     {
1034         const VkSamplerCreateInfo samplerCreateInfo = {
1035             VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType;
1036             nullptr,                               // const void* pNext;
1037             0u,                                    // VkSamplerCreateFlags flags;
1038             VK_FILTER_NEAREST,                     // VkFilter magFilter;
1039             VK_FILTER_NEAREST,                     // VkFilter minFilter;
1040             VK_SAMPLER_MIPMAP_MODE_NEAREST,        // VkSamplerMipmapMode mipmapMode;
1041             VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeU;
1042             VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeV;
1043             VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW;
1044             0.0f,                                  // float mipLodBias;
1045             VK_FALSE,                              // VkBool32 anisotropyEnable;
1046             1.0f,                                  // float maxAnisotropy;
1047             VK_FALSE,                              // VkBool32 compareEnable;
1048             VK_COMPARE_OP_NEVER,                   // VkCompareOp compareOp;
1049             0.0f,                                  // float minLod;
1050             0.0f,                                  // float maxLod;
1051             VK_BORDER_COLOR_INT_TRANSPARENT_BLACK, // VkBorderColor borderColor;
1052             VK_FALSE,                              // VkBool32 unnormalizedCoordinates;
1053         };
1054         sampler = createSampler(vkd, device, &samplerCreateInfo);
1055     }
1056 
1057     // Auxiliary host-coherent buffer for some cases. Being host-coherent lets us avoid extra barriers that would "pollute" synchronization tests.
1058     BufferWithMemoryPtr hostCoherentBuffer;
1059     void *hostCoherentDataPtr = nullptr;
1060     if (needsAuxiliarBuffer(m_params.fromStage, m_params.toStage))
1061     {
1062         const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage);
1063         hostCoherentBuffer =
1064             BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo,
1065                                                      (MemoryRequirement::HostVisible | MemoryRequirement::Coherent)));
1066         hostCoherentDataPtr = hostCoherentBuffer->getAllocation().getHostPtr();
1067     }
1068 
1069     // Descriptor pool.
1070     Move<VkDescriptorPool> descriptorPool;
1071     {
1072         DescriptorPoolBuilder poolBuilder;
1073         poolBuilder.addType(descriptorType);
1074         descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1075     }
1076 
1077     // Descriptor set layout.
1078     Move<VkDescriptorSetLayout> setLayout;
1079     {
1080         DescriptorSetLayoutBuilder layoutBuilder;
1081         layoutBuilder.addSingleBinding(descriptorType, resourceStages);
1082         setLayout = layoutBuilder.build(vkd, device);
1083     }
1084 
1085     // Descriptor set.
1086     const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
1087 
1088     // Update descriptor set.
1089     {
1090         DescriptorSetUpdateBuilder updateBuilder;
1091         const auto location = DescriptorSetUpdateBuilder::Location::binding(0u);
1092 
1093         switch (descriptorType)
1094         {
1095         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1096         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1097         {
1098             const auto bufferInfo = makeDescriptorBufferInfo(bufferResource->get(), 0ull, bufferSize);
1099             updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &bufferInfo);
1100         }
1101         break;
1102         case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1103         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1104         {
1105             auto descriptorImageInfo =
1106                 makeDescriptorImageInfo(sampler.get(), imageResourceView.get(), imageDescriptorLayout);
1107             updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &descriptorImageInfo);
1108         }
1109         break;
1110         default:
1111             DE_ASSERT(false);
1112             break;
1113         }
1114 
1115         updateBuilder.update(vkd, device);
1116     }
1117 
1118     // Render passes and framebuffers.
1119     const auto renderPasses    = createCustomRenderPasses(vkd, device, imageFormat, m_params);
1120     const bool multiRenderPass = (renderPasses.size() > 1u);
1121     DE_ASSERT(renderPasses.size() > 0u);
1122 
1123     std::vector<Move<VkFramebuffer>> framebuffers;
1124     framebuffers.reserve(renderPasses.size());
1125 
1126     for (const auto &renderPass : renderPasses)
1127         framebuffers.push_back(makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), imageExtent.width,
1128                                                imageExtent.height));
1129 
1130     // Viewports and scissors.
1131     std::vector<VkViewport> viewports(1u, makeViewport(imageExtent));
1132     std::vector<VkRect2D> scissors(1u, makeRect2D(imageExtent));
1133 
1134     using PushConstantStruct = TestParams::PushConstantStruct;
1135 
1136     // Pipeline layout.
1137     const auto pcSize         = static_cast<uint32_t>(sizeof(PushConstantStruct));
1138     const auto pcRange        = makePushConstantRange(resourceStages, 0u, pcSize);
1139     const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get(), &pcRange);
1140 
1141     // Shader modules, pipelines and pipeline layouts.
1142     const auto twoPipelines = m_params.needsTwoPipelines();
1143     const auto selfDeps     = m_params.subpassSelfDependency();
1144 
1145     // Both at the same time does not make sense.
1146     DE_ASSERT(!(twoPipelines && selfDeps));
1147 
1148     const auto pipelineCount  = (twoPipelines ? 2u : 1u);
1149     const auto drawCount      = (selfDeps ? 2u : 1u);
1150     const auto iterationCount = std::max(pipelineCount, drawCount);
1151 
1152     std::vector<Move<VkPipeline>> pipelines;
1153     pipelines.reserve(pipelineCount);
1154 
1155     // Shader modules.
1156     const auto &binaries = m_context.getBinaryCollection();
1157 
1158     Move<VkShaderModule> taskShader;
1159     if (m_params.needsTask())
1160         taskShader = createShaderModule(vkd, device, binaries.get("task"));
1161 
1162     const auto meshShader                    = createShaderModule(vkd, device, binaries.get("mesh"));
1163     const auto fragShader                    = createShaderModule(vkd, device, binaries.get("frag"));
1164     const auto taskPassthroughShader         = createShaderModule(vkd, device, binaries.get("taskPassthrough"));
1165     const auto fragPassthroughShader         = createShaderModule(vkd, device, binaries.get("fragPassthrough"));
1166     const auto meshPassthroughShader         = createShaderModule(vkd, device, binaries.get("meshPassthrough"));
1167     const auto meshPassthroughWithTaskShader = createShaderModule(vkd, device, binaries.get("meshPassthroughWithTask"));
1168 
1169     if (pipelineCount == 1u)
1170     {
1171         // Pipeline.
1172         pipelines.push_back(makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(),
1173                                                  fragShader.get(), renderPasses.at(0u).get(), viewports, scissors));
1174     }
1175     else if (pipelineCount == 2u)
1176     {
1177         // Mandatory stages in each pipeline: the first pipeline will contain the "from" stage (write) and the second one the "to" stage (read).
1178         const std::vector<Stage> mandatoryStages{m_params.fromStage, m_params.toStage};
1179 
1180         // One pipeline per mandatory stage.
1181         for (uint32_t pipelineIdx = 0u; pipelineIdx < pipelineCount; ++pipelineIdx)
1182         {
1183             const auto &stage = mandatoryStages.at(pipelineIdx);
1184 
1185             VkShaderModule taskModule = DE_NULL;
1186             VkShaderModule meshModule = DE_NULL;
1187             VkShaderModule fragModule = DE_NULL;
1188 
1189             const bool lastSubpass    = (pipelineIdx == pipelineCount - 1u);
1190             const auto pipelineStages = subpassStages(stage, lastSubpass);
1191             const bool hasTaskShader  = hasTask(pipelineStages);
1192             const bool hasFragShader  = hasFrag(pipelineStages);
1193 
1194             // Decide which shaders to use for this one.
1195             if (hasTaskShader)
1196                 taskModule = ((stage == Stage::TASK) ? taskShader.get() : taskPassthroughShader.get());
1197 
1198             if (stage == Stage::MESH)
1199                 meshModule = meshShader.get();
1200             else
1201             {
1202                 meshModule = (hasTaskShader ? meshPassthroughWithTaskShader.get() : meshPassthroughShader.get());
1203             }
1204 
1205             if (hasFragShader)
1206                 fragModule = ((stage == Stage::FRAG) ? fragShader.get() : fragPassthroughShader.get());
1207 
1208             // Create pipeline. When using multiple render passes, the subpass is always zero. When using a single render pass, each pipeline is prepared for one subpass.
1209             const auto renderPass = (multiRenderPass ? renderPasses.at(pipelineIdx).get() : renderPasses[0].get());
1210             const auto subpass    = (multiRenderPass ? 0u : pipelineIdx);
1211 
1212             pipelines.push_back(makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskModule, meshModule,
1213                                                      fragModule, renderPass, viewports, scissors, subpass));
1214         }
1215     }
1216     else
1217     {
1218         DE_ASSERT(false);
1219     }
1220 
1221     // Command pool and buffer.
1222     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
1223     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1224     const auto cmdBuffer    = cmdBufferPtr.get();
1225 
1226     beginCommandBuffer(vkd, cmdBuffer);
1227 
1228     if (m_params.fromStage == Stage::HOST)
1229     {
1230         // Prepare buffer from host when the source stage is the host.
1231         DE_ASSERT(useBufferResource);
1232 
1233         auto &resourceBufferAlloc   = bufferResource->getAllocation();
1234         void *resourceBufferDataPtr = resourceBufferAlloc.getHostPtr();
1235 
1236         deMemcpy(resourceBufferDataPtr, &m_params.testValue, sizeof(m_params.testValue));
1237         flushAlloc(vkd, device, resourceBufferAlloc);
1238     }
1239     else if (m_params.fromStage == Stage::TRANSFER)
1240     {
1241         // Put value in host-coherent buffer and transfer it to the resource buffer or image.
1242         deMemcpy(hostCoherentDataPtr, &m_params.testValue, sizeof(m_params.testValue));
1243         hostToTransferMemoryBarrier(vkd, cmdBuffer);
1244 
1245         if (useBufferResource)
1246         {
1247             const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize);
1248             vkd.cmdCopyBuffer(cmdBuffer, hostCoherentBuffer->get(), bufferResource->get(), 1u, &copyRegion);
1249         }
1250         else
1251         {
1252             // Move image to the right layout for transfer.
1253             const auto newLayout = (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1254             if (newLayout != currentLayout)
1255             {
1256                 const auto preCopyBarrier = makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, currentLayout,
1257                                                                    newLayout, imageResource->get(), colorSRR);
1258                 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1259                                               VK_PIPELINE_STAGE_TRANSFER_BIT, &preCopyBarrier);
1260                 currentLayout = newLayout;
1261             }
1262             const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1263             vkd.cmdCopyBufferToImage(cmdBuffer, hostCoherentBuffer->get(), imageResource->get(), currentLayout, 1u,
1264                                      &copyRegion);
1265         }
1266     }
1267     else if (isShaderStage(m_params.fromStage))
1268     {
1269         // The image or buffer will be written to from shaders. Images need to be in the right layout.
1270         if (useImageResource)
1271         {
1272             const auto newLayout = VK_IMAGE_LAYOUT_GENERAL;
1273             if (newLayout != currentLayout)
1274             {
1275                 const auto preWriteBarrier =
1276                     makeImageMemoryBarrier(0u, (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT), currentLayout,
1277                                            newLayout, imageResource->get(), colorSRR);
1278                 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, fromStageFlags,
1279                                               &preWriteBarrier);
1280                 currentLayout = newLayout;
1281             }
1282         }
1283     }
1284     else
1285     {
1286         DE_ASSERT(false);
1287     }
1288 
1289     // If the resource is going to be read from shaders and written from a non-shader stage, we'll insert the main barrier before running the pipeline.
1290     if (isShaderStage(m_params.toStage) && !isShaderStage(m_params.fromStage))
1291     {
1292         if (m_params.barrierType == BarrierType::GENERAL)
1293         {
1294             const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1295             cmdPipelineMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &memoryBarrier);
1296         }
1297         else if (m_params.barrierType == BarrierType::SPECIFIC)
1298         {
1299             if (useBufferResource)
1300             {
1301                 const auto bufferBarrier =
1302                     makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize);
1303                 cmdPipelineBufferMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &bufferBarrier);
1304             }
1305             else
1306             {
1307                 const auto newLayout =
1308                     (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
1309                 const auto imageBarrier = makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout,
1310                                                                  newLayout, imageResource->get(), colorSRR);
1311 
1312                 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &imageBarrier);
1313                 currentLayout = newLayout;
1314             }
1315         }
1316         // For subpass dependencies, they have already been included in the render pass or loop below.
1317     }
1318 
1319     // Run the pipeline.
1320     if (!multiRenderPass)
1321         beginRenderPass(vkd, cmdBuffer, renderPasses[0].get(), framebuffers[0].get(), scissors.at(0), tcu::UVec4(0u));
1322 
1323     vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u,
1324                               &descriptorSet.get(), 0u, nullptr);
1325 
1326     for (uint32_t iterationIdx = 0u; iterationIdx < iterationCount; ++iterationIdx)
1327     {
1328         if (iterationIdx > 0u && !multiRenderPass && twoPipelines)
1329             vkd.cmdNextSubpass(cmdBuffer, VK_SUBPASS_CONTENTS_INLINE);
1330 
1331         if (multiRenderPass)
1332             beginRenderPass(vkd, cmdBuffer, renderPasses.at(iterationIdx).get(), framebuffers.at(iterationIdx).get(),
1333                             scissors.at(0), tcu::UVec4(0u));
1334 
1335         if (twoPipelines || iterationIdx == 0u)
1336             vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelines.at(iterationIdx).get());
1337 
1338         PushConstantStruct pcData;
1339         if (selfDeps)
1340         {
1341             // First draw writes, second draw reads.
1342             pcData.writeVal = 1u - iterationIdx;
1343             pcData.readVal  = iterationIdx;
1344         }
1345         else
1346         {
1347             // Otherwise reads and writes freely according to the pipeline shaders.
1348             pcData.writeVal = 1u;
1349             pcData.readVal  = 1u;
1350         }
1351         vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), resourceStages, 0u, pcSize, &pcData);
1352         vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1353 
1354         if (multiRenderPass)
1355             endRenderPass(vkd, cmdBuffer);
1356 
1357         // If there are self-dependencies or multiple render passes, synchronize resource between draw calls.
1358         if ((multiRenderPass || selfDeps) && iterationIdx == 0u)
1359         {
1360             // In the case of self-dependencies, the barrier type is BarrierType::DEPENDENCY and we'll insert a general barrier because:
1361             //    * VUID-vkCmdPipelineBarrier-bufferMemoryBarrierCount-01178 forbids using buffer barriers inside render passes.
1362             //    * VUID-vkCmdPipelineBarrier-image-04073 forbids using image memory barriers inside render passes with resources that are not attachments.
1363             if (m_params.barrierType == BarrierType::GENERAL || m_params.barrierType == BarrierType::DEPENDENCY)
1364             {
1365                 const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1366                 cmdPipelineMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &memoryBarrier);
1367             }
1368             else if (m_params.barrierType == BarrierType::SPECIFIC)
1369             {
1370                 if (useBufferResource)
1371                 {
1372                     const auto bufferBarrier = makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags,
1373                                                                        bufferResource->get(), 0ull, bufferSize);
1374                     cmdPipelineBufferMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &bufferBarrier);
1375                 }
1376                 else
1377                 {
1378                     // Note: the image will only be read from shader stages or from the transfer stage.
1379                     DE_ASSERT(useGeneralLayout);
1380                     const auto newLayout    = VK_IMAGE_LAYOUT_GENERAL;
1381                     const auto imageBarrier = makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout,
1382                                                                      newLayout, imageResource->get(), colorSRR);
1383 
1384                     cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &imageBarrier);
1385                     currentLayout = newLayout;
1386                 }
1387             }
1388             else
1389             {
1390                 DE_ASSERT(false);
1391             }
1392 
1393             if (multiRenderPass)
1394             {
1395                 // Sync color attachment writes.
1396                 const auto colorWritesBarrier =
1397                     makeMemoryBarrier(VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1398                 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1399                                          VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, &colorWritesBarrier);
1400             }
1401         }
1402     }
1403 
1404     if (!multiRenderPass)
1405         endRenderPass(vkd, cmdBuffer);
1406 
1407     // If the resource was written to from shaders and will be read from a non-shader stage, insert the main barrier after running the pipeline.
1408     if (isShaderStage(m_params.fromStage) && !isShaderStage(m_params.toStage))
1409     {
1410         if (m_params.barrierType == BarrierType::GENERAL)
1411         {
1412             const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1413             cmdPipelineMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &memoryBarrier);
1414         }
1415         else if (m_params.barrierType == BarrierType::SPECIFIC)
1416         {
1417             if (useBufferResource)
1418             {
1419                 const auto bufferBarrier =
1420                     makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize);
1421                 cmdPipelineBufferMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &bufferBarrier);
1422             }
1423             else
1424             {
1425                 // Note: the image will only be read from shader stages or from the transfer stage.
1426                 const auto newLayout =
1427                     (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1428                 const auto imageBarrier = makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout,
1429                                                                  newLayout, imageResource->get(), colorSRR);
1430 
1431                 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &imageBarrier);
1432                 currentLayout = newLayout;
1433             }
1434         }
1435         // For subpass dependencies, they have already been included in the render pass and loop.
1436     }
1437 
1438     // Read resource from the destination stage if needed.
1439     if (m_params.toStage == Stage::HOST)
1440     {
1441         // Nothing to do. The test value should be in the resource buffer already, which is host-visible.
1442     }
1443     else if (m_params.toStage == Stage::TRANSFER)
1444     {
1445         // Copy value from resource to host-coherent buffer to be verified later.
1446         if (useBufferResource)
1447         {
1448             const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize);
1449             vkd.cmdCopyBuffer(cmdBuffer, bufferResource->get(), hostCoherentBuffer->get(), 1u, &copyRegion);
1450         }
1451         else
1452         {
1453             const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1454             vkd.cmdCopyImageToBuffer(cmdBuffer, imageResource->get(), currentLayout, hostCoherentBuffer->get(), 1u,
1455                                      &copyRegion);
1456         }
1457 
1458         transferToHostMemoryBarrier(vkd, cmdBuffer);
1459     }
1460 
1461     // If the output value will be available in the color buffer, take the chance to transfer its contents to a host-coherent buffer.
1462     BufferWithMemoryPtr colorVerificationBuffer;
1463     void *colorVerificationDataPtr = nullptr;
1464 
1465     if (valueInColorBuffer(m_params.toStage))
1466     {
1467         const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage);
1468         colorVerificationBuffer =
1469             BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo,
1470                                                      (MemoryRequirement::HostVisible | MemoryRequirement::Coherent)));
1471         colorVerificationDataPtr = colorVerificationBuffer->getAllocation().getHostPtr();
1472 
1473         const auto srcAccess = (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1474         const auto dstAccess = VK_ACCESS_TRANSFER_READ_BIT;
1475         const auto colorBarrier =
1476             makeImageMemoryBarrier(srcAccess, dstAccess, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1477                                    VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorBuffer.get(), colorSRR);
1478         cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1479                                       VK_PIPELINE_STAGE_TRANSFER_BIT, &colorBarrier);
1480 
1481         const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1482         vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1483                                  colorVerificationBuffer->get(), 1u, &copyRegion);
1484 
1485         transferToHostMemoryBarrier(vkd, cmdBuffer);
1486     }
1487 
1488     endCommandBuffer(vkd, cmdBuffer);
1489     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1490 
1491     // Verify output resources as needed.
1492 
1493     if (valueInAuxiliarDestBuffer(m_params.toStage))
1494     {
1495         uint32_t bufferValue;
1496         deMemcpy(&bufferValue, hostCoherentDataPtr, sizeof(bufferValue));
1497 
1498         if (bufferValue != m_params.testValue)
1499         {
1500             std::ostringstream msg;
1501             msg << "Unexpected value in auxiliar host-coherent buffer: found " << bufferValue << " and expected "
1502                 << m_params.testValue;
1503             TCU_FAIL(msg.str());
1504         }
1505     }
1506 
1507     if (valueInResourceBuffer(m_params.toStage))
1508     {
1509         auto &resourceBufferAlloc   = bufferResource->getAllocation();
1510         void *resourceBufferDataPtr = resourceBufferAlloc.getHostPtr();
1511         uint32_t bufferValue;
1512 
1513         invalidateAlloc(vkd, device, resourceBufferAlloc);
1514         deMemcpy(&bufferValue, resourceBufferDataPtr, sizeof(bufferValue));
1515 
1516         if (bufferValue != m_params.testValue)
1517         {
1518             std::ostringstream msg;
1519             msg << "Unexpected value in resource buffer: found " << bufferValue << " and expected "
1520                 << m_params.testValue;
1521             TCU_FAIL(msg.str());
1522         }
1523     }
1524 
1525     if (valueInColorBuffer(m_params.toStage))
1526     {
1527         uint32_t bufferValue;
1528         deMemcpy(&bufferValue, colorVerificationDataPtr, sizeof(bufferValue));
1529 
1530         if (bufferValue != m_params.testValue)
1531         {
1532             std::ostringstream msg;
1533             msg << "Unexpected value in color verification buffer: found " << bufferValue << " and expected "
1534                 << m_params.testValue;
1535             TCU_FAIL(msg.str());
1536         }
1537     }
1538 
1539     return tcu::TestStatus::pass("Pass");
1540 }
1541 
1542 // Specific test to check a barrier that crosses secondary command buffers and goes from compute to task.
1543 class BarrierAcrossSecondaryCase : public vkt::TestCase
1544 {
1545 public:
BarrierAcrossSecondaryCase(tcu::TestContext & testCtx,const std::string & name)1546     BarrierAcrossSecondaryCase(tcu::TestContext &testCtx, const std::string &name) : vkt::TestCase(testCtx, name)
1547     {
1548     }
~BarrierAcrossSecondaryCase(void)1549     virtual ~BarrierAcrossSecondaryCase(void)
1550     {
1551     }
1552 
1553     void checkSupport(Context &context) const override;
1554     TestInstance *createInstance(Context &context) const override;
1555     void initPrograms(vk::SourceCollections &programCollection) const override;
1556 
1557     static constexpr uint32_t kLocalSize     = 128u;
1558     static constexpr uint32_t kNumWorkGroups = 16384u;
1559 };
1560 
1561 class BarrierAcrossSecondaryInstance : public vkt::TestInstance
1562 {
1563 public:
BarrierAcrossSecondaryInstance(Context & context)1564     BarrierAcrossSecondaryInstance(Context &context) : vkt::TestInstance(context)
1565     {
1566     }
~BarrierAcrossSecondaryInstance(void)1567     virtual ~BarrierAcrossSecondaryInstance(void)
1568     {
1569     }
1570 
1571     tcu::TestStatus iterate(void) override;
1572 };
1573 
checkSupport(Context & context) const1574 void BarrierAcrossSecondaryCase::checkSupport(Context &context) const
1575 {
1576     checkTaskMeshShaderSupportEXT(context, true, true);
1577     context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
1578 }
1579 
createInstance(Context & context) const1580 TestInstance *BarrierAcrossSecondaryCase::createInstance(Context &context) const
1581 {
1582     return new BarrierAcrossSecondaryInstance(context);
1583 }
1584 
initPrograms(vk::SourceCollections & programCollection) const1585 void BarrierAcrossSecondaryCase::initPrograms(vk::SourceCollections &programCollection) const
1586 {
1587     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1588 
1589     const std::string descriptorDecl = "layout (set=0, binding=0, std430) buffer OutputBlock {\n"
1590                                        "    uint values[];\n"
1591                                        "} outBuffer;\n"
1592                                        "layout (set=0, binding=1, std430) buffer VerificationBlock {\n"
1593                                        "    uint values[];\n"
1594                                        "} verificationBuffer;\n";
1595 
1596     // The compute shader will fill the output buffer.
1597     std::ostringstream comp;
1598     comp << "#version 450\n"
1599          << "layout(local_size_x=" << kLocalSize << ") in;\n"
1600          << descriptorDecl << "void main ()\n"
1601          << "{\n"
1602          << "    outBuffer.values[gl_GlobalInvocationID.x] = gl_GlobalInvocationID.x;\n"
1603          << "}\n";
1604     programCollection.glslSources.add("comp") << glu::ComputeSource(comp.str());
1605 
1606     // The task shader will read it, verify its contents and write the verification buffer.
1607     std::ostringstream task;
1608     task << "#version 450\n"
1609          << "#extension GL_EXT_mesh_shader : enable\n"
1610          << "layout(local_size_x=" << kLocalSize << ") in;\n"
1611          << descriptorDecl << "void main ()\n"
1612          << "{\n"
1613          << "    const uint verifResult = ((outBuffer.values[gl_GlobalInvocationID.x] == gl_GlobalInvocationID.x) ? 1u "
1614             ": 0u);\n"
1615          << "    verificationBuffer.values[gl_GlobalInvocationID.x] = verifResult;\n"
1616          << "    EmitMeshTasksEXT(0u, 0u, 0u);\n"
1617          << "}\n";
1618     programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1619 
1620     std::ostringstream mesh;
1621     mesh << "#version 450\n"
1622          << "#extension GL_EXT_mesh_shader : enable\n"
1623          << "\n"
1624          << "layout(local_size_x=1) in;\n"
1625          << "layout(triangles) out;\n"
1626          << "layout(max_vertices=3, max_primitives=1) out;\n"
1627          << "\n"
1628          << "void main ()\n"
1629          << "{\n"
1630          << "    SetMeshOutputsEXT(0u, 0u);\n"
1631          << "}\n";
1632     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1633 }
1634 
iterate(void)1635 tcu::TestStatus BarrierAcrossSecondaryInstance::iterate(void)
1636 {
1637     const auto &vkd           = m_context.getDeviceInterface();
1638     const auto device         = m_context.getDevice();
1639     auto &alloc               = m_context.getDefaultAllocator();
1640     const auto queueIndex     = m_context.getUniversalQueueFamilyIndex();
1641     const auto queue          = m_context.getUniversalQueue();
1642     const auto kLocalSize     = BarrierAcrossSecondaryCase::kLocalSize;
1643     const auto kNumWorkGroups = BarrierAcrossSecondaryCase::kNumWorkGroups;
1644     const auto bindingStages  = (VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_TASK_BIT_EXT);
1645     const auto extent         = makeExtent3D(1u, 1u, 1u);
1646 
1647     // Output buffer.
1648     const auto outputBufferSize = static_cast<VkDeviceSize>(kLocalSize * kNumWorkGroups * sizeof(uint32_t));
1649     const auto outputBufferInfo = makeBufferCreateInfo(outputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1650     BufferWithMemory outputBuffer(vkd, device, alloc, outputBufferInfo, MemoryRequirement::HostVisible);
1651     auto &outputBufferAlloc = outputBuffer.getAllocation();
1652     void *outputBufferData  = outputBufferAlloc.getHostPtr();
1653 
1654     // Verification buffer.
1655     const auto verificationBufferSize = outputBufferSize;
1656     const auto verificationBufferInfo = outputBufferInfo;
1657     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
1658     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
1659     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
1660 
1661     // Prepare buffer data.
1662     deMemset(outputBufferData, 0, static_cast<size_t>(outputBufferSize));
1663     deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
1664     flushAlloc(vkd, device, outputBufferAlloc);
1665     flushAlloc(vkd, device, verificationBufferAlloc);
1666 
1667     // Descriptor set layout.
1668     DescriptorSetLayoutBuilder setLayoutBuilder;
1669     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bindingStages);
1670     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bindingStages);
1671     const auto setLayout = setLayoutBuilder.build(vkd, device);
1672 
1673     // Pipeline layout.
1674     const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
1675 
1676     // Descriptor pool and set.
1677     DescriptorPoolBuilder poolBuilder;
1678     poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u);
1679     const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1680     const auto descriptorSet  = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
1681 
1682     // Update descriptor set.
1683     DescriptorSetUpdateBuilder updateBuilder;
1684     const auto outputBufferDescInfo = makeDescriptorBufferInfo(outputBuffer.get(), 0ull, outputBufferSize);
1685     const auto verificationBufferDescInfo =
1686         makeDescriptorBufferInfo(verificationBuffer.get(), 0ull, verificationBufferSize);
1687     updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
1688                               VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescInfo);
1689     updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
1690                               VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &verificationBufferDescInfo);
1691     updateBuilder.update(vkd, device);
1692 
1693     // Graphics pipeline auxiliary data.
1694     const auto renderPass  = makeRenderPass(vkd, device);
1695     const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), 0u, nullptr, extent.width, extent.height);
1696     const std::vector<VkViewport> viewports(1u, makeViewport(extent));
1697     const std::vector<VkRect2D> scissors(1u, makeRect2D(extent));
1698 
1699     // Create pipelines.
1700     const auto &binaries  = m_context.getBinaryCollection();
1701     const auto compModule = createShaderModule(vkd, device, binaries.get("comp"));
1702     const auto taskModule = createShaderModule(vkd, device, binaries.get("task"));
1703     const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
1704 
1705     const auto computePipeline = makeComputePipeline(vkd, device, pipelineLayout.get(), compModule.get());
1706     const auto meshPipeline    = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskModule.get(),
1707                                                       meshModule.get(), DE_NULL, renderPass.get(), viewports, scissors);
1708 
1709     // Command pool and command buffers.
1710     const auto cmdPool          = makeCommandPool(vkd, device, queueIndex);
1711     const auto primaryCmdBuffer = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1712     const auto compCmdBuffer    = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_SECONDARY);
1713     const auto meshCmdBuffer    = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_SECONDARY);
1714 
1715     // Use compute pipeline and record barrier to task shader.
1716     {
1717         const auto cmdBuffer        = compCmdBuffer.get();
1718         const auto comp2TaskBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
1719 
1720         beginSecondaryCommandBuffer(vkd, cmdBuffer);
1721         vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout.get(), 0u, 1u,
1722                                   &descriptorSet.get(), 0u, nullptr);
1723         vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.get());
1724         vkd.cmdDispatch(cmdBuffer, kNumWorkGroups, 1u, 1u);
1725         cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1726                                  VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT, &comp2TaskBarrier);
1727         endCommandBuffer(vkd, cmdBuffer);
1728     }
1729 
1730     // Use mesh pipeline and record barrier to host.
1731     {
1732         const auto cmdBuffer = meshCmdBuffer.get();
1733 
1734         beginSecondaryCommandBuffer(vkd, cmdBuffer, renderPass.get(), framebuffer.get());
1735         vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u,
1736                                   &descriptorSet.get(), 0u, nullptr);
1737         vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, meshPipeline.get());
1738         vkd.cmdDrawMeshTasksEXT(cmdBuffer, kNumWorkGroups, 1u, 1u);
1739         endCommandBuffer(vkd, cmdBuffer);
1740     }
1741 
1742     // Use both secondary command buffers.
1743     {
1744         const auto cmdBuffer        = primaryCmdBuffer.get();
1745         const auto task2HostBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1746 
1747         beginCommandBuffer(vkd, cmdBuffer);
1748         vkd.cmdExecuteCommands(cmdBuffer, 1u, &compCmdBuffer.get());
1749         beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u),
1750                         VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS);
1751         vkd.cmdExecuteCommands(cmdBuffer, 1u, &meshCmdBuffer.get());
1752         endRenderPass(vkd, cmdBuffer);
1753         cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT, VK_PIPELINE_STAGE_HOST_BIT,
1754                                  &task2HostBarrier);
1755         endCommandBuffer(vkd, cmdBuffer);
1756         submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1757     }
1758 
1759     // Verify buffer contents.
1760     invalidateAlloc(vkd, device, verificationBufferAlloc);
1761     const std::vector<uint32_t> expectedResult(kNumWorkGroups * kLocalSize, 1u);
1762 
1763     if (deMemCmp(expectedResult.data(), verificationBufferData, de::dataSize(expectedResult)) != 0)
1764         TCU_FAIL("Unexpected values found in verification buffer");
1765 
1766     return tcu::TestStatus::pass("Pass");
1767 }
1768 
1769 } // namespace
1770 
createMeshShaderSyncTestsEXT(tcu::TestContext & testCtx)1771 tcu::TestCaseGroup *createMeshShaderSyncTestsEXT(tcu::TestContext &testCtx)
1772 {
1773     const struct
1774     {
1775         Stage fromStage;
1776         Stage toStage;
1777     } stageCombinations[] = {
1778         // Combinations where the source and destination stages involve mesh shaders.
1779         // Note: this could be tested procedurally.
1780         {Stage::HOST, Stage::TASK},
1781         {Stage::HOST, Stage::MESH},
1782         {Stage::TRANSFER, Stage::TASK},
1783         {Stage::TRANSFER, Stage::MESH},
1784         {Stage::TASK, Stage::MESH},
1785         {Stage::TASK, Stage::FRAG},
1786         {Stage::TASK, Stage::TRANSFER},
1787         {Stage::TASK, Stage::HOST},
1788         {Stage::MESH, Stage::FRAG},
1789         {Stage::MESH, Stage::TRANSFER},
1790         {Stage::MESH, Stage::HOST},
1791 
1792         // These require two pipelines.
1793         {Stage::MESH, Stage::TASK},
1794         {Stage::FRAG, Stage::TASK},
1795         {Stage::FRAG, Stage::MESH},
1796     };
1797 
1798     const struct
1799     {
1800         ResourceType resourceType;
1801         const char *name;
1802     } resourceTypes[] = {
1803         {ResourceType::UNIFORM_BUFFER, "uniform_buffer"},
1804         {ResourceType::STORAGE_BUFFER, "storage_buffer"},
1805         {ResourceType::STORAGE_IMAGE, "storage_image"},
1806         {ResourceType::SAMPLED_IMAGE, "sampled_image"},
1807     };
1808 
1809     const struct
1810     {
1811         BarrierType barrierType;
1812         const char *name;
1813     } barrierTypes[] = {
1814         {BarrierType::GENERAL, "memory_barrier"},
1815         {BarrierType::SPECIFIC, "specific_barrier"},
1816         {BarrierType::DEPENDENCY, "subpass_dependency"},
1817     };
1818 
1819     const struct
1820     {
1821         WriteAccess writeAccess;
1822         const char *name;
1823     } writeAccesses[] = {
1824         {WriteAccess::HOST_WRITE, "host_write"},
1825         {WriteAccess::TRANSFER_WRITE, "transfer_write"},
1826         {WriteAccess::SHADER_WRITE, "shader_write"},
1827     };
1828 
1829     const struct
1830     {
1831         ReadAccess readAccess;
1832         const char *name;
1833     } readAccesses[] = {
1834         {ReadAccess::HOST_READ, "host_read"},
1835         {ReadAccess::TRANSFER_READ, "transfer_read"},
1836         {ReadAccess::SHADER_READ, "shader_read"},
1837         {ReadAccess::UNIFORM_READ, "uniform_read"},
1838     };
1839 
1840     uint32_t testValue = 1628510124u;
1841 
1842     GroupPtr mainGroup(new tcu::TestCaseGroup(testCtx, "synchronization"));
1843 
1844     for (const auto &stageCombination : stageCombinations)
1845     {
1846         const std::string combinationName =
1847             de::toString(stageCombination.fromStage) + "_to_" + de::toString(stageCombination.toStage);
1848         GroupPtr combinationGroup(new tcu::TestCaseGroup(testCtx, combinationName.c_str()));
1849 
1850         for (const auto &resourceCase : resourceTypes)
1851         {
1852             if (!canWriteTo(stageCombination.fromStage, resourceCase.resourceType))
1853                 continue;
1854 
1855             if (!canReadFrom(stageCombination.toStage, resourceCase.resourceType))
1856                 continue;
1857 
1858             GroupPtr resourceGroup(new tcu::TestCaseGroup(testCtx, resourceCase.name));
1859 
1860             for (const auto &barrierCase : barrierTypes)
1861             {
1862                 const auto shaderToShader = fromShaderToShader(stageCombination.fromStage, stageCombination.toStage);
1863                 const auto barrierIsDependency = (barrierCase.barrierType == BarrierType::DEPENDENCY);
1864 
1865                 // Subpass dependencies can only be used in shader to shader situations.
1866                 if (barrierIsDependency && !shaderToShader)
1867                     continue;
1868 
1869                 GroupPtr barrierGroup(new tcu::TestCaseGroup(testCtx, barrierCase.name));
1870 
1871                 for (const auto &writeCase : writeAccesses)
1872                     for (const auto &readCase : readAccesses)
1873                     {
1874                         if (!canReadResourceAsAccess(resourceCase.resourceType, readCase.readAccess))
1875                             continue;
1876                         if (!canWriteResourceAsAccess(resourceCase.resourceType, writeCase.writeAccess))
1877                             continue;
1878                         if (!canReadFromStageAsAccess(stageCombination.toStage, readCase.readAccess))
1879                             continue;
1880                         if (!canWriteFromStageAsAccess(stageCombination.fromStage, writeCase.writeAccess))
1881                             continue;
1882 
1883                         const std::string accessCaseName = writeCase.name + std::string("_") + readCase.name;
1884 
1885                         const TestParams testParams = {
1886                             stageCombination.fromStage, // Stage fromStage;
1887                             stageCombination.toStage,   // Stage toStage;
1888                             resourceCase.resourceType,  // ResourceType resourceType;
1889                             barrierCase.barrierType,    // BarrierType barrierType;
1890                             writeCase.writeAccess,      // WriteAccess writeAccess;
1891                             readCase.readAccess,        // ReadAccess readAccess;
1892                             testValue++,                // uint32_t testValue;
1893                         };
1894 
1895                         barrierGroup->addChild(new MeshShaderSyncCase(testCtx, accessCaseName, testParams));
1896                     }
1897 
1898                 resourceGroup->addChild(barrierGroup.release());
1899             }
1900 
1901             combinationGroup->addChild(resourceGroup.release());
1902         }
1903 
1904         mainGroup->addChild(combinationGroup.release());
1905     }
1906 
1907     {
1908         // Additional synchronization tests
1909         GroupPtr otherGroup(new tcu::TestCaseGroup(testCtx, "other"));
1910 
1911         // Check synchronizing compute to task across secondary command buffer boundaries
1912         otherGroup->addChild(new BarrierAcrossSecondaryCase(testCtx, "barrier_across_secondary"));
1913 
1914         mainGroup->addChild(otherGroup.release());
1915     }
1916 
1917     return mainGroup.release();
1918 }
1919 
1920 } // namespace MeshShader
1921 } // namespace vkt
1922