1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2021 The Khronos Group Inc.
6 * Copyright (c) 2021 Valve Corporation.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Mesh Shader Synchronization Tests for VK_EXT_mesh_shader
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktMeshShaderSyncTestsEXT.hpp"
26 #include "vktMeshShaderUtil.hpp"
27 #include "vktTestCase.hpp"
28
29 #include "vkDefs.hpp"
30 #include "vkTypeUtil.hpp"
31 #include "vkImageWithMemory.hpp"
32 #include "vkBufferWithMemory.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 #include "vkImageUtil.hpp"
38
39 #include "deUniquePtr.hpp"
40
41 #include <iostream>
42 #include <sstream>
43 #include <vector>
44 #include <set>
45
46 namespace vkt
47 {
48 namespace MeshShader
49 {
50
51 namespace
52 {
53
54 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
55
56 using namespace vk;
57
58 // Stages that will be used in these tests. Shader stages sorted in pipeline order.
59 enum class Stage
60 {
61 HOST = 0,
62 TRANSFER,
63 TASK,
64 MESH,
65 FRAG,
66 };
67
operator <<(std::ostream & stream,Stage stage)68 std::ostream &operator<<(std::ostream &stream, Stage stage)
69 {
70 switch (stage)
71 {
72 case Stage::HOST:
73 stream << "host";
74 break;
75 case Stage::TRANSFER:
76 stream << "transfer";
77 break;
78 case Stage::TASK:
79 stream << "task";
80 break;
81 case Stage::MESH:
82 stream << "mesh";
83 break;
84 case Stage::FRAG:
85 stream << "frag";
86 break;
87 default:
88 DE_ASSERT(false);
89 break;
90 }
91
92 return stream;
93 }
94
isShaderStage(Stage stage)95 bool isShaderStage(Stage stage)
96 {
97 return (stage == Stage::TASK || stage == Stage::MESH || stage == Stage::FRAG);
98 }
99
stageToFlags(Stage stage)100 VkPipelineStageFlags stageToFlags(Stage stage)
101 {
102 switch (stage)
103 {
104 case Stage::HOST:
105 return VK_PIPELINE_STAGE_HOST_BIT;
106 case Stage::TRANSFER:
107 return VK_PIPELINE_STAGE_TRANSFER_BIT;
108 case Stage::TASK:
109 return VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT;
110 case Stage::MESH:
111 return VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT;
112 case Stage::FRAG:
113 return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
114 default:
115 DE_ASSERT(false);
116 break;
117 }
118
119 // Unreachable.
120 DE_ASSERT(false);
121 return 0u;
122 }
123
getImageFormat()124 VkFormat getImageFormat()
125 {
126 return VK_FORMAT_R32_UINT;
127 }
128
getImageExtent()129 VkExtent3D getImageExtent()
130 {
131 return makeExtent3D(1u, 1u, 1u);
132 }
133
134 // Types of resources we will use.
135 enum class ResourceType
136 {
137 UNIFORM_BUFFER = 0,
138 STORAGE_BUFFER,
139 STORAGE_IMAGE,
140 SAMPLED_IMAGE,
141 };
142
resourceTypeToDescriptor(ResourceType resType)143 VkDescriptorType resourceTypeToDescriptor(ResourceType resType)
144 {
145 switch (resType)
146 {
147 case ResourceType::UNIFORM_BUFFER:
148 return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
149 case ResourceType::STORAGE_BUFFER:
150 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
151 case ResourceType::STORAGE_IMAGE:
152 return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
153 case ResourceType::SAMPLED_IMAGE:
154 return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
155 default:
156 DE_ASSERT(false);
157 break;
158 }
159
160 // Unreachable.
161 DE_ASSERT(false);
162 return VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR;
163 }
164
165 // Will the test use a specific barrier or a general memory barrier?
166 enum class BarrierType
167 {
168 GENERAL = 0,
169 SPECIFIC,
170 DEPENDENCY,
171 };
172
173 // Types of writes we will use.
174 enum class WriteAccess
175 {
176 HOST_WRITE = 0,
177 TRANSFER_WRITE,
178 SHADER_WRITE,
179 };
180
writeAccessToFlags(WriteAccess access)181 VkAccessFlags writeAccessToFlags(WriteAccess access)
182 {
183 switch (access)
184 {
185 case WriteAccess::HOST_WRITE:
186 return VK_ACCESS_HOST_WRITE_BIT;
187 case WriteAccess::TRANSFER_WRITE:
188 return VK_ACCESS_TRANSFER_WRITE_BIT;
189 case WriteAccess::SHADER_WRITE:
190 return VK_ACCESS_SHADER_WRITE_BIT;
191 default:
192 DE_ASSERT(false);
193 break;
194 }
195
196 // Unreachable.
197 DE_ASSERT(false);
198 return 0u;
199 }
200
201 // Types of reads we will use.
202 enum class ReadAccess
203 {
204 HOST_READ = 0,
205 TRANSFER_READ,
206 SHADER_READ,
207 UNIFORM_READ,
208 };
209
readAccessToFlags(ReadAccess access)210 VkAccessFlags readAccessToFlags(ReadAccess access)
211 {
212 switch (access)
213 {
214 case ReadAccess::HOST_READ:
215 return VK_ACCESS_HOST_READ_BIT;
216 case ReadAccess::TRANSFER_READ:
217 return VK_ACCESS_TRANSFER_READ_BIT;
218 case ReadAccess::SHADER_READ:
219 return VK_ACCESS_SHADER_READ_BIT;
220 case ReadAccess::UNIFORM_READ:
221 return VK_ACCESS_UNIFORM_READ_BIT;
222 default:
223 DE_ASSERT(false);
224 break;
225 }
226
227 // Unreachable.
228 DE_ASSERT(false);
229 return 0u;
230 }
231
232 // Auxiliary functions to verify certain combinations are possible.
233
234 // Check if the writing stage can use the specified write access.
canWriteFromStageAsAccess(Stage writeStage,WriteAccess access)235 bool canWriteFromStageAsAccess(Stage writeStage, WriteAccess access)
236 {
237 switch (writeStage)
238 {
239 case Stage::HOST:
240 return (access == WriteAccess::HOST_WRITE);
241 case Stage::TRANSFER:
242 return (access == WriteAccess::TRANSFER_WRITE);
243 case Stage::TASK: // fallthrough
244 case Stage::MESH: // fallthrough
245 case Stage::FRAG:
246 return (access == WriteAccess::SHADER_WRITE);
247 default:
248 DE_ASSERT(false);
249 break;
250 }
251
252 return false;
253 }
254
255 // Check if the reading stage can use the specified read access.
canReadFromStageAsAccess(Stage readStage,ReadAccess access)256 bool canReadFromStageAsAccess(Stage readStage, ReadAccess access)
257 {
258 switch (readStage)
259 {
260 case Stage::HOST:
261 return (access == ReadAccess::HOST_READ);
262 case Stage::TRANSFER:
263 return (access == ReadAccess::TRANSFER_READ);
264 case Stage::TASK: // fallthrough
265 case Stage::MESH: // fallthrough
266 case Stage::FRAG:
267 return (access == ReadAccess::SHADER_READ || access == ReadAccess::UNIFORM_READ);
268 default:
269 DE_ASSERT(false);
270 break;
271 }
272
273 return false;
274 }
275
276 // Check if reading the given resource type is possible with the given type of read access.
canReadResourceAsAccess(ResourceType resType,ReadAccess access)277 bool canReadResourceAsAccess(ResourceType resType, ReadAccess access)
278 {
279 if (access == ReadAccess::UNIFORM_READ)
280 return (resType == ResourceType::UNIFORM_BUFFER);
281 return true;
282 }
283
284 // Check if writing to the given resource type is possible with the given type of write access.
canWriteResourceAsAccess(ResourceType resType,WriteAccess access)285 bool canWriteResourceAsAccess(ResourceType resType, WriteAccess access)
286 {
287 if (resType == ResourceType::UNIFORM_BUFFER)
288 return (access != WriteAccess::SHADER_WRITE);
289 return true;
290 }
291
292 // Check if the given stage can write to the given resource type.
canWriteTo(Stage stage,ResourceType resType)293 bool canWriteTo(Stage stage, ResourceType resType)
294 {
295 switch (stage)
296 {
297 case Stage::HOST:
298 return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER);
299 case Stage::TRANSFER:
300 return true;
301 case Stage::TASK: // fallthrough
302 case Stage::MESH: // fallthrough
303 case Stage::FRAG:
304 return (resType == ResourceType::STORAGE_BUFFER || resType == ResourceType::STORAGE_IMAGE);
305 default:
306 DE_ASSERT(false);
307 break;
308 }
309
310 return false;
311 }
312
313 // Check if the given stage can read from the given resource type.
canReadFrom(Stage stage,ResourceType resType)314 bool canReadFrom(Stage stage, ResourceType resType)
315 {
316 switch (stage)
317 {
318 case Stage::HOST:
319 return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER);
320 case Stage::TRANSFER: // fallthrough
321 case Stage::TASK: // fallthrough
322 case Stage::MESH: // fallthrough
323 case Stage::FRAG:
324 return true;
325 default:
326 DE_ASSERT(false);
327 break;
328 }
329
330 return false;
331 }
332
333 // Will we need to store the test value in an auxiliar buffer to be read?
needsAuxiliarSourceBuffer(Stage fromStage,Stage toStage)334 bool needsAuxiliarSourceBuffer(Stage fromStage, Stage toStage)
335 {
336 DE_UNREF(toStage);
337 return (fromStage == Stage::TRANSFER);
338 }
339
340 // Will we need to store the read operation result into an auxiliar buffer to be checked?
needsAuxiliarDestBuffer(Stage fromStage,Stage toStage)341 bool needsAuxiliarDestBuffer(Stage fromStage, Stage toStage)
342 {
343 DE_UNREF(fromStage);
344 return (toStage == Stage::TRANSFER);
345 }
346
347 // Needs any auxiliar buffer for any case?
needsAuxiliarBuffer(Stage fromStage,Stage toStage)348 bool needsAuxiliarBuffer(Stage fromStage, Stage toStage)
349 {
350 return (needsAuxiliarSourceBuffer(fromStage, toStage) || needsAuxiliarDestBuffer(fromStage, toStage));
351 }
352
353 // Will the final value be stored in the auxiliar destination buffer?
valueInAuxiliarDestBuffer(Stage toStage)354 bool valueInAuxiliarDestBuffer(Stage toStage)
355 {
356 return (toStage == Stage::TRANSFER);
357 }
358
359 // Will the final value be stored in the resource buffer itself?
valueInResourceBuffer(Stage toStage)360 bool valueInResourceBuffer(Stage toStage)
361 {
362 return (toStage == Stage::HOST);
363 }
364
365 // Will the final value be stored in the color buffer?
valueInColorBuffer(Stage toStage)366 bool valueInColorBuffer(Stage toStage)
367 {
368 return (!valueInAuxiliarDestBuffer(toStage) && !valueInResourceBuffer(toStage));
369 }
370
371 // Image usage flags for the image resource.
resourceImageUsageFlags(ResourceType resourceType)372 VkImageUsageFlags resourceImageUsageFlags(ResourceType resourceType)
373 {
374 VkImageUsageFlags flags = (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
375
376 switch (resourceType)
377 {
378 case ResourceType::STORAGE_IMAGE:
379 flags |= VK_IMAGE_USAGE_STORAGE_BIT;
380 break;
381 case ResourceType::SAMPLED_IMAGE:
382 flags |= VK_IMAGE_USAGE_SAMPLED_BIT;
383 break;
384 default:
385 DE_ASSERT(false);
386 break;
387 }
388
389 return flags;
390 }
391
392 // Buffer usage flags for the buffer resource.
resourceBufferUsageFlags(ResourceType resourceType)393 VkBufferUsageFlags resourceBufferUsageFlags(ResourceType resourceType)
394 {
395 VkBufferUsageFlags flags = (VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
396
397 switch (resourceType)
398 {
399 case ResourceType::UNIFORM_BUFFER:
400 flags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
401 break;
402 case ResourceType::STORAGE_BUFFER:
403 flags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
404 break;
405 default:
406 DE_ASSERT(false);
407 break;
408 }
409
410 return flags;
411 }
412
413 // Returns true if both the write and read stages are shader stages.
fromShaderToShader(Stage fromStage,Stage toStage)414 bool fromShaderToShader(Stage fromStage, Stage toStage)
415 {
416 return (isShaderStage(fromStage) && isShaderStage(toStage));
417 }
418
419 // Supposing we'll use two subpasses, decide the stages of a subpass based on the mandatory stages and the one we're interested in.
subpassStages(Stage wantedStage,bool lastSubpass)420 std::vector<Stage> subpassStages(Stage wantedStage, bool lastSubpass)
421 {
422 std::set<Stage> stages;
423 stages.insert(wantedStage);
424 stages.insert(Stage::MESH); // This one is mandatory.
425 if (lastSubpass)
426 stages.insert(Stage::FRAG); // In the last subpass we always need a fragment shader (passthrough).
427 return std::vector<Stage>(begin(stages), end(stages));
428 }
429
430 // Is the task shader in the list?
hasTask(const std::vector<Stage> & stages)431 bool hasTask(const std::vector<Stage> &stages)
432 {
433 return de::contains(begin(stages), end(stages), Stage::TASK);
434 }
435
436 // Is the frag shader in the list?
hasFrag(const std::vector<Stage> & stages)437 bool hasFrag(const std::vector<Stage> &stages)
438 {
439 return de::contains(begin(stages), end(stages), Stage::FRAG);
440 }
441
442 struct TestParams
443 {
444 Stage fromStage;
445 Stage toStage;
446 ResourceType resourceType;
447 BarrierType barrierType;
448 WriteAccess writeAccess;
449 ReadAccess readAccess;
450 uint32_t testValue;
451
452 protected:
readsOrWritesInvkt::MeshShader::__anon0d35f4610111::TestParams453 bool readsOrWritesIn(Stage stage) const
454 {
455 DE_ASSERT(fromStage != toStage);
456 return (fromStage == stage || toStage == stage);
457 }
458
459 public:
needsTaskvkt::MeshShader::__anon0d35f4610111::TestParams460 bool needsTask() const
461 {
462 return readsOrWritesIn(Stage::TASK);
463 }
464
readsOrWritesInMeshvkt::MeshShader::__anon0d35f4610111::TestParams465 bool readsOrWritesInMesh() const
466 {
467 return readsOrWritesIn(Stage::MESH);
468 }
469
getResourceDeclvkt::MeshShader::__anon0d35f4610111::TestParams470 std::string getResourceDecl() const
471 {
472 const auto imgFormat = ((resourceType == ResourceType::STORAGE_IMAGE) ? ", r32ui" : "");
473 const auto storagePrefix = ((writeAccess == WriteAccess::SHADER_WRITE) ? "" : "readonly ");
474 std::ostringstream decl;
475
476 decl << "layout (set=0, binding=0" << imgFormat << ") ";
477 switch (resourceType)
478 {
479 case ResourceType::UNIFORM_BUFFER:
480 decl << "uniform UniformBuffer { uint value; } ub;";
481 break;
482 case ResourceType::STORAGE_BUFFER:
483 decl << storagePrefix << "buffer StorageBuffer { uint value; } sb;";
484 break;
485 case ResourceType::STORAGE_IMAGE:
486 decl << storagePrefix << "uniform uimage2D si;";
487 break;
488 case ResourceType::SAMPLED_IMAGE:
489 decl << "uniform usampler2D sampled;";
490 break;
491 default:
492 DE_ASSERT(false);
493 break;
494 }
495
496 decl << "\n";
497 return decl.str();
498 }
499
500 struct PushConstantStruct
501 {
502 uint32_t writeVal;
503 uint32_t readVal;
504 };
505
506 // Get declaration for the "pc" push constant block. Must match the structure above.
getPushConstantDeclvkt::MeshShader::__anon0d35f4610111::TestParams507 std::string getPushConstantDecl() const
508 {
509 std::ostringstream pc;
510 pc << "layout (push_constant, std430) uniform PushConstantBlock {\n"
511 << " uint writeVal;\n"
512 << " uint readVal;\n"
513 << "} pc;\n";
514 return pc.str();
515 }
516
getReadStatementvkt::MeshShader::__anon0d35f4610111::TestParams517 std::string getReadStatement(const std::string &outName) const
518 {
519 std::ostringstream statement;
520 statement << " if (pc.readVal > 0u) { " << outName << " = ";
521
522 switch (resourceType)
523 {
524 case ResourceType::UNIFORM_BUFFER:
525 statement << "ub.value";
526 break;
527 case ResourceType::STORAGE_BUFFER:
528 statement << "sb.value";
529 break;
530 case ResourceType::STORAGE_IMAGE:
531 statement << "imageLoad(si, ivec2(0, 0)).x";
532 break;
533 case ResourceType::SAMPLED_IMAGE:
534 statement << "texture(sampled, vec2(0.5, 0.5)).x";
535 break;
536 default:
537 DE_ASSERT(false);
538 break;
539 }
540
541 statement << "; }\n";
542 return statement.str();
543 }
544
getWriteStatementvkt::MeshShader::__anon0d35f4610111::TestParams545 std::string getWriteStatement(const std::string &valueName) const
546 {
547 std::ostringstream statement;
548 statement << " if (pc.writeVal > 0u) { ";
549
550 switch (resourceType)
551 {
552 case ResourceType::STORAGE_BUFFER:
553 statement << "sb.value = " << valueName;
554 break;
555 case ResourceType::STORAGE_IMAGE:
556 statement << "imageStore(si, ivec2(0, 0), uvec4(" << valueName << ", 0, 0, 0))";
557 break;
558 case ResourceType::UNIFORM_BUFFER: // fallthrough
559 case ResourceType::SAMPLED_IMAGE: // fallthrough
560 default:
561 DE_ASSERT(false);
562 break;
563 }
564
565 statement << "; }\n";
566 return statement.str();
567 }
568
getResourceShaderStagesvkt::MeshShader::__anon0d35f4610111::TestParams569 VkShaderStageFlags getResourceShaderStages() const
570 {
571 VkShaderStageFlags flags = 0u;
572
573 if (fromStage == Stage::TASK || toStage == Stage::TASK)
574 flags |= VK_SHADER_STAGE_TASK_BIT_EXT;
575 if (fromStage == Stage::MESH || toStage == Stage::MESH)
576 flags |= VK_SHADER_STAGE_MESH_BIT_EXT;
577 if (fromStage == Stage::FRAG || toStage == Stage::FRAG)
578 flags |= VK_SHADER_STAGE_FRAGMENT_BIT;
579
580 // We assume at least something must be done either on the task or mesh shaders for the tests to be interesting.
581 DE_ASSERT((flags & (VK_SHADER_STAGE_TASK_BIT_EXT | VK_SHADER_STAGE_MESH_BIT_EXT)) != 0u);
582 return flags;
583 }
584
585 // We'll prefer to keep the image in the general layout if it will be written to from a shader stage or if the barrier is going to be a generic memory barrier.
preferGeneralLayoutvkt::MeshShader::__anon0d35f4610111::TestParams586 bool preferGeneralLayout() const
587 {
588 return (isShaderStage(fromStage) || (barrierType == BarrierType::GENERAL) ||
589 (resourceType == ResourceType::STORAGE_IMAGE));
590 }
591
592 // We need two pipelines if both the writing and reading stage are shaders, and either:
593 // - The writing stage comes after the reading stage in the pipeline.
594 // - The barrier to use is not a dependency.
needsTwoPipelinesvkt::MeshShader::__anon0d35f4610111::TestParams595 bool needsTwoPipelines() const
596 {
597 return (fromShaderToShader(fromStage, toStage) &&
598 (static_cast<int>(fromStage) >= static_cast<int>(toStage) || barrierType != BarrierType::DEPENDENCY));
599 }
600
601 // We need to use generic barriers when using subpass self-dependencies (single subpass and pipeline).
602 // Note: barrierType == BarrierType::DEPENDENCY is technically redundant with !needsTwoPipelines().
subpassSelfDependencyvkt::MeshShader::__anon0d35f4610111::TestParams603 bool subpassSelfDependency() const
604 {
605 return (fromShaderToShader(fromStage, toStage) && barrierType == BarrierType::DEPENDENCY &&
606 !needsTwoPipelines());
607 }
608 };
609
610 class MeshShaderSyncCase : public vkt::TestCase
611 {
612 public:
MeshShaderSyncCase(tcu::TestContext & testCtx,const std::string & name,const TestParams & params)613 MeshShaderSyncCase(tcu::TestContext &testCtx, const std::string &name, const TestParams ¶ms)
614 : vkt::TestCase(testCtx, name)
615 , m_params(params)
616 {
617 }
618
~MeshShaderSyncCase(void)619 virtual ~MeshShaderSyncCase(void)
620 {
621 }
622
623 void checkSupport(Context &context) const override;
624 void initPrograms(vk::SourceCollections &programCollection) const override;
625 TestInstance *createInstance(Context &context) const override;
626
627 protected:
628 TestParams m_params;
629 };
630
631 class MeshShaderSyncInstance : public vkt::TestInstance
632 {
633 public:
MeshShaderSyncInstance(Context & context,const TestParams & params)634 MeshShaderSyncInstance(Context &context, const TestParams ¶ms) : vkt::TestInstance(context), m_params(params)
635 {
636 }
~MeshShaderSyncInstance(void)637 virtual ~MeshShaderSyncInstance(void)
638 {
639 }
640
641 tcu::TestStatus iterate(void) override;
642
643 protected:
644 TestParams m_params;
645 };
646
checkSupport(Context & context) const647 void MeshShaderSyncCase::checkSupport(Context &context) const
648 {
649 checkTaskMeshShaderSupportEXT(context, m_params.needsTask(), true);
650
651 if (m_params.writeAccess == WriteAccess::SHADER_WRITE)
652 {
653 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
654 }
655 }
656
initPrograms(vk::SourceCollections & programCollection) const657 void MeshShaderSyncCase::initPrograms(vk::SourceCollections &programCollection) const
658 {
659 const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
660 const bool needsTaskShader = m_params.needsTask();
661 const auto valueStr = de::toString(m_params.testValue);
662 const auto resourceDecl = m_params.getResourceDecl();
663 const auto pcDecl = m_params.getPushConstantDecl();
664 const std::string tdDecl = "struct TaskData { uint value; }; taskPayloadSharedEXT TaskData td;\n";
665
666 if (needsTaskShader)
667 {
668 std::ostringstream task;
669 task << "#version 450\n"
670 << "#extension GL_EXT_mesh_shader : enable\n"
671 << "\n"
672 << "layout(local_size_x=1) in;\n"
673 << "\n"
674 << tdDecl << "\n"
675 << resourceDecl << pcDecl << "\n"
676 << "void main ()\n"
677 << "{\n"
678 << " td.value = 0u;\n"
679 << ((m_params.fromStage == Stage::TASK) ? m_params.getWriteStatement(valueStr) : "")
680 << ((m_params.toStage == Stage::TASK) ? m_params.getReadStatement("td.value") : "")
681 << " EmitMeshTasksEXT(1u, 1u, 1u);\n"
682 << "}\n";
683 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
684 }
685
686 {
687 // In the mesh-to-task case, we need non-passthrough mesh and task shaders but the mesh shader doesn't have a previous task shader.
688 // In the task-to-mesh case, the second pipeline will have the main mesh shader but no previous task shader either.
689 const bool prevTaskInMainMesh =
690 (needsTaskShader && !(m_params.fromStage == Stage::MESH && m_params.toStage == Stage::TASK) &&
691 !(m_params.fromStage == Stage::TASK && m_params.toStage == Stage::MESH));
692 const bool rwInMeshStage = m_params.readsOrWritesInMesh();
693
694 std::ostringstream mesh;
695 mesh << "#version 450\n"
696 << "#extension GL_EXT_mesh_shader : enable\n"
697 << "\n"
698 << "layout(local_size_x=1) in;\n"
699 << "layout(triangles) out;\n"
700 << "layout(max_vertices=3, max_primitives=1) out;\n"
701 << "\n"
702 << (prevTaskInMainMesh ? tdDecl : "") << "layout (location=0) out perprimitiveEXT uint primitiveValue[];\n"
703 << "\n"
704 << (rwInMeshStage ? resourceDecl : "") << (rwInMeshStage ? pcDecl : "") << "\n"
705 << "void main ()\n"
706 << "{\n"
707 << " SetMeshOutputsEXT(3u, 1u);\n"
708 << (prevTaskInMainMesh ? " primitiveValue[0] = td.value;\n" : "")
709 << ((m_params.fromStage == Stage::MESH) ? m_params.getWriteStatement(valueStr) : "")
710 << ((m_params.toStage == Stage::MESH) ? m_params.getReadStatement("primitiveValue[0]") : "") << "\n"
711 << " gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
712 << " gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0, 3.0, 0.0, 1.0);\n"
713 << " gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
714 << " gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
715 << "}\n";
716 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
717 }
718
719 {
720 const bool readFromFrag = (m_params.toStage == Stage::FRAG);
721 const bool writeFromFrag = (m_params.fromStage == Stage::FRAG);
722 const bool rwInFragStage = (readFromFrag || writeFromFrag);
723 std::ostringstream frag;
724
725 frag << "#version 450\n"
726 << "#extension GL_EXT_mesh_shader : enable\n"
727 << "\n"
728 << "layout (location=0) in perprimitiveEXT flat uint primitiveValue;\n"
729 << "layout (location=0) out uvec4 outColor;\n"
730 << "\n"
731 << (rwInFragStage ? resourceDecl : "") << (rwInFragStage ? pcDecl : "") << "\n"
732 << "void main ()\n"
733 << "{\n"
734 << " outColor = uvec4(primitiveValue, 0, 0, 0);\n"
735 << " uint readVal = 0u;\n"
736 << (readFromFrag ? m_params.getReadStatement("readVal") : "")
737 << (readFromFrag ? " outColor = uvec4(readVal, 0, 0, 0);\n" : "")
738 << (writeFromFrag ? m_params.getWriteStatement(valueStr) : "") << "}\n";
739 programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
740 }
741
742 // Passthrough shaders.
743 {
744 const std::string task = "#version 450\n"
745 "#extension GL_EXT_mesh_shader : enable\n"
746 "\n"
747 "layout(local_size_x=1) in;\n"
748 "\n" +
749 tdDecl +
750 "\n"
751 "void main ()\n"
752 "{\n"
753 " td.value = 0u;\n"
754 " EmitMeshTasksEXT(1u, 1u, 1u);\n"
755 "}\n";
756 programCollection.glslSources.add("taskPassthrough") << glu::TaskSource(task) << buildOptions;
757
758 const std::string frag = "#version 450\n"
759 "#extension GL_EXT_mesh_shader : enable\n"
760 "\n"
761 "layout (location=0) in perprimitiveEXT flat uint primitiveValue;\n"
762 "layout (location=0) out uvec4 outColor;\n"
763 "\n"
764 "void main ()\n"
765 "{\n"
766 " outColor = uvec4(primitiveValue, 0, 0, 0);\n"
767 "}\n";
768 programCollection.glslSources.add("fragPassthrough") << glu::FragmentSource(frag) << buildOptions;
769
770 for (int i = 0; i < 2; ++i)
771 {
772 const bool prevTask = (i > 0);
773 const std::string nameSuffix = (prevTask ? "WithTask" : "");
774 const std::string mesh = "#version 450\n"
775 "#extension GL_EXT_mesh_shader : enable\n"
776 "\n"
777 "layout(local_size_x=1) in;\n"
778 "layout(triangles) out;\n"
779 "layout(max_vertices=3, max_primitives=1) out;\n"
780 "\n" +
781 (prevTask ? tdDecl : "") +
782 "layout (location=0) out perprimitiveEXT uint primitiveValue[];\n"
783 "\n"
784 "void main ()\n"
785 "{\n"
786 " SetMeshOutputsEXT(3u, 1u);\n"
787 " " +
788 (prevTask ? "primitiveValue[0] = td.value;" : "primitiveValue[0] = 0u;") +
789 "\n"
790 "\n"
791 " gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
792 " gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0, 3.0, 0.0, 1.0);\n"
793 " gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
794 " gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
795 "}\n";
796 programCollection.glslSources.add("meshPassthrough" + nameSuffix) << glu::MeshSource(mesh) << buildOptions;
797 }
798 }
799 }
800
createInstance(Context & context) const801 TestInstance *MeshShaderSyncCase::createInstance(Context &context) const
802 {
803 return new MeshShaderSyncInstance(context, m_params);
804 }
805
806 // General description behind these tests.
807 //
808 // From To
809 // ==============================
810 // HOST TASK Prepare buffer from host. Only valid for uniform and storage buffers. Read value from task into td.value. Verify color buffer.
811 // HOST MESH Same situation. Read value from mesh into primitiveValue[0]. Verify color buffer.
812 // TRANSFER TASK Prepare auxiliary host-coherent source buffer from host. Copy buffer to buffer or buffer to image. Read from task into td.value. Verify color buffer.
813 // TRANSFER MESH Same initial steps. Read from mesh into primitiveValue[0]. Verify color buffer.
814 // TASK MESH Write value to buffer or image from task shader. Only valid for storage buffers and images. Read from mesh into primitiveValue[0]. Verify color buffer.
815 // TASK FRAG Same write procedure and restrictions. Read from frag into outColor. Verify color buffer.
816 // TASK TRANSFER Same write procedure and restrictions. Prepare auxiliary host-coherent read buffer and copy buffer to buffer or image to buffer. Verify auxiliary buffer.
817 // TASK HOST Due to From/To restrictions, only valid for storage buffers. Same write procedure. Read and verify buffer directly.
818 // MESH FRAG Same as task to frag but the write instructions need to be in the mesh shader.
819 // MESH TRANSFER Same as task to transfer but the write instructions need to be in the mesh shader.
820 // MESH HOST Same as task to host but the write instructions need to be in the mesh shader.
821 //
822 // The following cases require two pipelines
823 // =========================================
824 // MESH TASK Write value to buffer or image from mesh shader. Only valid for storage buffers and images. Read from task into td.value. Verify color buffer.
825 // Sequence: mesh, task, mesh*, frag*.
826 // FRAG TASK Same as mesh to task, but writing from the first fragment shader.
827 // Sequence: mesh*, frag, task, mesh*, frag*.
828 // FRAG MESH Similar to frag to task, but reading from mesh into primitiveValue[0]. Verify color buffer after second fragment shader.
829 // Sequence: mesh*, frag, mesh, frag*.
830 //
831
832 // Create one or two render passes with the right dependencies depending on the test parameters.
createCustomRenderPasses(const DeviceInterface & vkd,VkDevice device,VkFormat colorFormat,const TestParams & params)833 std::vector<Move<VkRenderPass>> createCustomRenderPasses(const DeviceInterface &vkd, VkDevice device,
834 VkFormat colorFormat, const TestParams ¶ms)
835 {
836 std::vector<Move<VkRenderPass>> renderPasses;
837 const bool useDependencies = (params.barrierType == BarrierType::DEPENDENCY);
838 const bool twoPipelines = params.needsTwoPipelines();
839 const bool twoSubpasses = (twoPipelines && useDependencies);
840 const uint32_t pipelineCount = (twoPipelines ? 2u : 1u);
841 const uint32_t subpassCount = (twoSubpasses ? 2u : 1u);
842 const uint32_t renderPassCount = ((twoPipelines && !twoSubpasses) ? 2u : 1u);
843
844 const std::vector<VkAttachmentDescription> attachmentDescs = {{
845 0u, // VkAttachmentDescriptionFlags flags;
846 colorFormat, // VkFormat format;
847 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
848 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
849 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
850 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
851 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
852 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
853 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout;
854 }};
855
856 const std::vector<VkAttachmentReference> attachmentRefs = {{0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}};
857
858 // One or two identical subpasses.
859 const VkSubpassDescription subpassDesc = {
860 0u, // VkSubpassDescriptionFlags flags;
861 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
862 0u, // uint32_t inputAttachmentCount;
863 nullptr, // const VkAttachmentReference* pInputAttachments;
864 static_cast<uint32_t>(attachmentRefs.size()), // uint32_t colorAttachmentCount;
865 de::dataOrNull(attachmentRefs), // const VkAttachmentReference* pColorAttachments;
866 nullptr, // const VkAttachmentReference* pResolveAttachments;
867 nullptr, // const VkAttachmentReference* pDepthStencilAttachment;
868 0u, // uint32_t preserveAttachmentCount;
869 nullptr, // const uint32_t* pPreserveAttachments;
870 };
871
872 const std::vector<VkSubpassDescription> subpassDescs(subpassCount, subpassDesc);
873
874 std::vector<VkSubpassDependency> dependencies;
875 if (fromShaderToShader(params.fromStage, params.toStage) && useDependencies)
876 {
877 const VkSubpassDependency dependency = {
878 0u, // uint32_t srcSubpass;
879 pipelineCount - 1u, // uint32_t dstSubpass;
880 stageToFlags(params.fromStage), // VkPipelineStageFlags srcStageMask;
881 stageToFlags(params.toStage), // VkPipelineStageFlags dstStageMask;
882 writeAccessToFlags(params.writeAccess), // VkAccessFlags srcAccessMask;
883 readAccessToFlags(params.readAccess), // VkAccessFlags dstAccessMask;
884 0u, // VkDependencyFlags dependencyFlags;
885 };
886 dependencies.push_back(dependency);
887 }
888
889 const VkRenderPassCreateInfo createInfo = {
890 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
891 nullptr, // const void* pNext;
892 0u, // VkRenderPassCreateFlags flags;
893 static_cast<uint32_t>(attachmentDescs.size()), // uint32_t attachmentCount;
894 de::dataOrNull(attachmentDescs), // const VkAttachmentDescription* pAttachments;
895 static_cast<uint32_t>(subpassDescs.size()), // uint32_t subpassCount;
896 de::dataOrNull(subpassDescs), // const VkSubpassDescription* pSubpasses;
897 static_cast<uint32_t>(dependencies.size()), // uint32_t dependencyCount;
898 de::dataOrNull(dependencies), // const VkSubpassDependency* pDependencies;
899 };
900
901 for (uint32_t renderPassIdx = 0u; renderPassIdx < renderPassCount; ++renderPassIdx)
902 renderPasses.push_back(createRenderPass(vkd, device, &createInfo));
903
904 return renderPasses;
905 }
906
hostToTransferMemoryBarrier(const DeviceInterface & vkd,VkCommandBuffer cmdBuffer)907 void hostToTransferMemoryBarrier(const DeviceInterface &vkd, VkCommandBuffer cmdBuffer)
908 {
909 const auto barrier = makeMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
910 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &barrier);
911 }
912
transferToHostMemoryBarrier(const DeviceInterface & vkd,VkCommandBuffer cmdBuffer)913 void transferToHostMemoryBarrier(const DeviceInterface &vkd, VkCommandBuffer cmdBuffer)
914 {
915 const auto barrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
916 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &barrier);
917 }
918
iterate(void)919 tcu::TestStatus MeshShaderSyncInstance::iterate(void)
920 {
921 const auto &vkd = m_context.getDeviceInterface();
922 const auto device = m_context.getDevice();
923 auto &alloc = m_context.getDefaultAllocator();
924 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
925 const auto queue = m_context.getUniversalQueue();
926
927 const auto imageFormat = getImageFormat();
928 const auto imageExtent = getImageExtent();
929 const auto colorBufferUsage = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
930 const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
931 const auto colorSRL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
932 const auto bufferSize = static_cast<VkDeviceSize>(sizeof(m_params.testValue));
933 const auto descriptorType = resourceTypeToDescriptor(m_params.resourceType);
934 const auto resourceStages = m_params.getResourceShaderStages();
935 const auto auxiliarBufferUsage = (VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
936 const auto useGeneralLayout = m_params.preferGeneralLayout();
937
938 const auto writeAccessFlags = writeAccessToFlags(m_params.writeAccess);
939 const auto readAccessFlags = readAccessToFlags(m_params.readAccess);
940 const auto fromStageFlags = stageToFlags(m_params.fromStage);
941 const auto toStageFlags = stageToFlags(m_params.toStage);
942
943 // Prepare color buffer.
944 const VkImageCreateInfo colorBufferCreateInfo = {
945 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
946 nullptr, // const void* pNext;
947 0u, // VkImageCreateFlags flags;
948 VK_IMAGE_TYPE_2D, // VkImageType imageType;
949 imageFormat, // VkFormat format;
950 imageExtent, // VkExtent3D extent;
951 1u, // uint32_t mipLevels;
952 1u, // uint32_t arrayLayers;
953 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
954 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
955 colorBufferUsage, // VkImageUsageFlags usage;
956 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
957 0u, // uint32_t queueFamilyIndexCount;
958 nullptr, // const uint32_t* pQueueFamilyIndices;
959 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
960 };
961 ImageWithMemory colorBuffer(vkd, device, alloc, colorBufferCreateInfo, MemoryRequirement::Any);
962 const auto colorBufferView =
963 makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
964
965 // Main resource.
966 using ImageWithMemoryPtr = de::MovePtr<ImageWithMemory>;
967 using BufferWithMemoryPtr = de::MovePtr<BufferWithMemory>;
968
969 ImageWithMemoryPtr imageResource;
970 Move<VkImageView> imageResourceView;
971 VkImageLayout imageDescriptorLayout =
972 (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
973 VkImageLayout currentLayout = VK_IMAGE_LAYOUT_UNDEFINED;
974 BufferWithMemoryPtr bufferResource;
975
976 bool useImageResource = false;
977 bool useBufferResource = false;
978
979 switch (m_params.resourceType)
980 {
981 case ResourceType::UNIFORM_BUFFER:
982 case ResourceType::STORAGE_BUFFER:
983 useBufferResource = true;
984 break;
985 case ResourceType::STORAGE_IMAGE:
986 case ResourceType::SAMPLED_IMAGE:
987 useImageResource = true;
988 break;
989 default:
990 DE_ASSERT(false);
991 break;
992 }
993
994 // One resource needed.
995 DE_ASSERT(useImageResource != useBufferResource);
996
997 if (useImageResource)
998 {
999 const auto resourceImageUsage = resourceImageUsageFlags(m_params.resourceType);
1000
1001 const VkImageCreateInfo resourceCreateInfo = {
1002 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1003 nullptr, // const void* pNext;
1004 0u, // VkImageCreateFlags flags;
1005 VK_IMAGE_TYPE_2D, // VkImageType imageType;
1006 imageFormat, // VkFormat format;
1007 imageExtent, // VkExtent3D extent;
1008 1u, // uint32_t mipLevels;
1009 1u, // uint32_t arrayLayers;
1010 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1011 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
1012 resourceImageUsage, // VkImageUsageFlags usage;
1013 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1014 0u, // uint32_t queueFamilyIndexCount;
1015 nullptr, // const uint32_t* pQueueFamilyIndices;
1016 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1017 };
1018 imageResource =
1019 ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, resourceCreateInfo, MemoryRequirement::Any));
1020 imageResourceView =
1021 makeImageView(vkd, device, imageResource->get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
1022 }
1023 else
1024 {
1025 const auto resourceBufferUsage = resourceBufferUsageFlags(m_params.resourceType);
1026 const auto resourceBufferCreateInfo = makeBufferCreateInfo(bufferSize, resourceBufferUsage);
1027 bufferResource = BufferWithMemoryPtr(
1028 new BufferWithMemory(vkd, device, alloc, resourceBufferCreateInfo, MemoryRequirement::HostVisible));
1029 }
1030
1031 Move<VkSampler> sampler;
1032 if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
1033 {
1034 const VkSamplerCreateInfo samplerCreateInfo = {
1035 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType;
1036 nullptr, // const void* pNext;
1037 0u, // VkSamplerCreateFlags flags;
1038 VK_FILTER_NEAREST, // VkFilter magFilter;
1039 VK_FILTER_NEAREST, // VkFilter minFilter;
1040 VK_SAMPLER_MIPMAP_MODE_NEAREST, // VkSamplerMipmapMode mipmapMode;
1041 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeU;
1042 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeV;
1043 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW;
1044 0.0f, // float mipLodBias;
1045 VK_FALSE, // VkBool32 anisotropyEnable;
1046 1.0f, // float maxAnisotropy;
1047 VK_FALSE, // VkBool32 compareEnable;
1048 VK_COMPARE_OP_NEVER, // VkCompareOp compareOp;
1049 0.0f, // float minLod;
1050 0.0f, // float maxLod;
1051 VK_BORDER_COLOR_INT_TRANSPARENT_BLACK, // VkBorderColor borderColor;
1052 VK_FALSE, // VkBool32 unnormalizedCoordinates;
1053 };
1054 sampler = createSampler(vkd, device, &samplerCreateInfo);
1055 }
1056
1057 // Auxiliary host-coherent buffer for some cases. Being host-coherent lets us avoid extra barriers that would "pollute" synchronization tests.
1058 BufferWithMemoryPtr hostCoherentBuffer;
1059 void *hostCoherentDataPtr = nullptr;
1060 if (needsAuxiliarBuffer(m_params.fromStage, m_params.toStage))
1061 {
1062 const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage);
1063 hostCoherentBuffer =
1064 BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo,
1065 (MemoryRequirement::HostVisible | MemoryRequirement::Coherent)));
1066 hostCoherentDataPtr = hostCoherentBuffer->getAllocation().getHostPtr();
1067 }
1068
1069 // Descriptor pool.
1070 Move<VkDescriptorPool> descriptorPool;
1071 {
1072 DescriptorPoolBuilder poolBuilder;
1073 poolBuilder.addType(descriptorType);
1074 descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1075 }
1076
1077 // Descriptor set layout.
1078 Move<VkDescriptorSetLayout> setLayout;
1079 {
1080 DescriptorSetLayoutBuilder layoutBuilder;
1081 layoutBuilder.addSingleBinding(descriptorType, resourceStages);
1082 setLayout = layoutBuilder.build(vkd, device);
1083 }
1084
1085 // Descriptor set.
1086 const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
1087
1088 // Update descriptor set.
1089 {
1090 DescriptorSetUpdateBuilder updateBuilder;
1091 const auto location = DescriptorSetUpdateBuilder::Location::binding(0u);
1092
1093 switch (descriptorType)
1094 {
1095 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1096 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1097 {
1098 const auto bufferInfo = makeDescriptorBufferInfo(bufferResource->get(), 0ull, bufferSize);
1099 updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &bufferInfo);
1100 }
1101 break;
1102 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1103 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1104 {
1105 auto descriptorImageInfo =
1106 makeDescriptorImageInfo(sampler.get(), imageResourceView.get(), imageDescriptorLayout);
1107 updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &descriptorImageInfo);
1108 }
1109 break;
1110 default:
1111 DE_ASSERT(false);
1112 break;
1113 }
1114
1115 updateBuilder.update(vkd, device);
1116 }
1117
1118 // Render passes and framebuffers.
1119 const auto renderPasses = createCustomRenderPasses(vkd, device, imageFormat, m_params);
1120 const bool multiRenderPass = (renderPasses.size() > 1u);
1121 DE_ASSERT(renderPasses.size() > 0u);
1122
1123 std::vector<Move<VkFramebuffer>> framebuffers;
1124 framebuffers.reserve(renderPasses.size());
1125
1126 for (const auto &renderPass : renderPasses)
1127 framebuffers.push_back(makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), imageExtent.width,
1128 imageExtent.height));
1129
1130 // Viewports and scissors.
1131 std::vector<VkViewport> viewports(1u, makeViewport(imageExtent));
1132 std::vector<VkRect2D> scissors(1u, makeRect2D(imageExtent));
1133
1134 using PushConstantStruct = TestParams::PushConstantStruct;
1135
1136 // Pipeline layout.
1137 const auto pcSize = static_cast<uint32_t>(sizeof(PushConstantStruct));
1138 const auto pcRange = makePushConstantRange(resourceStages, 0u, pcSize);
1139 const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get(), &pcRange);
1140
1141 // Shader modules, pipelines and pipeline layouts.
1142 const auto twoPipelines = m_params.needsTwoPipelines();
1143 const auto selfDeps = m_params.subpassSelfDependency();
1144
1145 // Both at the same time does not make sense.
1146 DE_ASSERT(!(twoPipelines && selfDeps));
1147
1148 const auto pipelineCount = (twoPipelines ? 2u : 1u);
1149 const auto drawCount = (selfDeps ? 2u : 1u);
1150 const auto iterationCount = std::max(pipelineCount, drawCount);
1151
1152 std::vector<Move<VkPipeline>> pipelines;
1153 pipelines.reserve(pipelineCount);
1154
1155 // Shader modules.
1156 const auto &binaries = m_context.getBinaryCollection();
1157
1158 Move<VkShaderModule> taskShader;
1159 if (m_params.needsTask())
1160 taskShader = createShaderModule(vkd, device, binaries.get("task"));
1161
1162 const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
1163 const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
1164 const auto taskPassthroughShader = createShaderModule(vkd, device, binaries.get("taskPassthrough"));
1165 const auto fragPassthroughShader = createShaderModule(vkd, device, binaries.get("fragPassthrough"));
1166 const auto meshPassthroughShader = createShaderModule(vkd, device, binaries.get("meshPassthrough"));
1167 const auto meshPassthroughWithTaskShader = createShaderModule(vkd, device, binaries.get("meshPassthroughWithTask"));
1168
1169 if (pipelineCount == 1u)
1170 {
1171 // Pipeline.
1172 pipelines.push_back(makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(),
1173 fragShader.get(), renderPasses.at(0u).get(), viewports, scissors));
1174 }
1175 else if (pipelineCount == 2u)
1176 {
1177 // Mandatory stages in each pipeline: the first pipeline will contain the "from" stage (write) and the second one the "to" stage (read).
1178 const std::vector<Stage> mandatoryStages{m_params.fromStage, m_params.toStage};
1179
1180 // One pipeline per mandatory stage.
1181 for (uint32_t pipelineIdx = 0u; pipelineIdx < pipelineCount; ++pipelineIdx)
1182 {
1183 const auto &stage = mandatoryStages.at(pipelineIdx);
1184
1185 VkShaderModule taskModule = DE_NULL;
1186 VkShaderModule meshModule = DE_NULL;
1187 VkShaderModule fragModule = DE_NULL;
1188
1189 const bool lastSubpass = (pipelineIdx == pipelineCount - 1u);
1190 const auto pipelineStages = subpassStages(stage, lastSubpass);
1191 const bool hasTaskShader = hasTask(pipelineStages);
1192 const bool hasFragShader = hasFrag(pipelineStages);
1193
1194 // Decide which shaders to use for this one.
1195 if (hasTaskShader)
1196 taskModule = ((stage == Stage::TASK) ? taskShader.get() : taskPassthroughShader.get());
1197
1198 if (stage == Stage::MESH)
1199 meshModule = meshShader.get();
1200 else
1201 {
1202 meshModule = (hasTaskShader ? meshPassthroughWithTaskShader.get() : meshPassthroughShader.get());
1203 }
1204
1205 if (hasFragShader)
1206 fragModule = ((stage == Stage::FRAG) ? fragShader.get() : fragPassthroughShader.get());
1207
1208 // Create pipeline. When using multiple render passes, the subpass is always zero. When using a single render pass, each pipeline is prepared for one subpass.
1209 const auto renderPass = (multiRenderPass ? renderPasses.at(pipelineIdx).get() : renderPasses[0].get());
1210 const auto subpass = (multiRenderPass ? 0u : pipelineIdx);
1211
1212 pipelines.push_back(makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskModule, meshModule,
1213 fragModule, renderPass, viewports, scissors, subpass));
1214 }
1215 }
1216 else
1217 {
1218 DE_ASSERT(false);
1219 }
1220
1221 // Command pool and buffer.
1222 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
1223 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1224 const auto cmdBuffer = cmdBufferPtr.get();
1225
1226 beginCommandBuffer(vkd, cmdBuffer);
1227
1228 if (m_params.fromStage == Stage::HOST)
1229 {
1230 // Prepare buffer from host when the source stage is the host.
1231 DE_ASSERT(useBufferResource);
1232
1233 auto &resourceBufferAlloc = bufferResource->getAllocation();
1234 void *resourceBufferDataPtr = resourceBufferAlloc.getHostPtr();
1235
1236 deMemcpy(resourceBufferDataPtr, &m_params.testValue, sizeof(m_params.testValue));
1237 flushAlloc(vkd, device, resourceBufferAlloc);
1238 }
1239 else if (m_params.fromStage == Stage::TRANSFER)
1240 {
1241 // Put value in host-coherent buffer and transfer it to the resource buffer or image.
1242 deMemcpy(hostCoherentDataPtr, &m_params.testValue, sizeof(m_params.testValue));
1243 hostToTransferMemoryBarrier(vkd, cmdBuffer);
1244
1245 if (useBufferResource)
1246 {
1247 const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize);
1248 vkd.cmdCopyBuffer(cmdBuffer, hostCoherentBuffer->get(), bufferResource->get(), 1u, ©Region);
1249 }
1250 else
1251 {
1252 // Move image to the right layout for transfer.
1253 const auto newLayout = (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1254 if (newLayout != currentLayout)
1255 {
1256 const auto preCopyBarrier = makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, currentLayout,
1257 newLayout, imageResource->get(), colorSRR);
1258 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1259 VK_PIPELINE_STAGE_TRANSFER_BIT, &preCopyBarrier);
1260 currentLayout = newLayout;
1261 }
1262 const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1263 vkd.cmdCopyBufferToImage(cmdBuffer, hostCoherentBuffer->get(), imageResource->get(), currentLayout, 1u,
1264 ©Region);
1265 }
1266 }
1267 else if (isShaderStage(m_params.fromStage))
1268 {
1269 // The image or buffer will be written to from shaders. Images need to be in the right layout.
1270 if (useImageResource)
1271 {
1272 const auto newLayout = VK_IMAGE_LAYOUT_GENERAL;
1273 if (newLayout != currentLayout)
1274 {
1275 const auto preWriteBarrier =
1276 makeImageMemoryBarrier(0u, (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT), currentLayout,
1277 newLayout, imageResource->get(), colorSRR);
1278 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, fromStageFlags,
1279 &preWriteBarrier);
1280 currentLayout = newLayout;
1281 }
1282 }
1283 }
1284 else
1285 {
1286 DE_ASSERT(false);
1287 }
1288
1289 // If the resource is going to be read from shaders and written from a non-shader stage, we'll insert the main barrier before running the pipeline.
1290 if (isShaderStage(m_params.toStage) && !isShaderStage(m_params.fromStage))
1291 {
1292 if (m_params.barrierType == BarrierType::GENERAL)
1293 {
1294 const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1295 cmdPipelineMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &memoryBarrier);
1296 }
1297 else if (m_params.barrierType == BarrierType::SPECIFIC)
1298 {
1299 if (useBufferResource)
1300 {
1301 const auto bufferBarrier =
1302 makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize);
1303 cmdPipelineBufferMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &bufferBarrier);
1304 }
1305 else
1306 {
1307 const auto newLayout =
1308 (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
1309 const auto imageBarrier = makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout,
1310 newLayout, imageResource->get(), colorSRR);
1311
1312 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &imageBarrier);
1313 currentLayout = newLayout;
1314 }
1315 }
1316 // For subpass dependencies, they have already been included in the render pass or loop below.
1317 }
1318
1319 // Run the pipeline.
1320 if (!multiRenderPass)
1321 beginRenderPass(vkd, cmdBuffer, renderPasses[0].get(), framebuffers[0].get(), scissors.at(0), tcu::UVec4(0u));
1322
1323 vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u,
1324 &descriptorSet.get(), 0u, nullptr);
1325
1326 for (uint32_t iterationIdx = 0u; iterationIdx < iterationCount; ++iterationIdx)
1327 {
1328 if (iterationIdx > 0u && !multiRenderPass && twoPipelines)
1329 vkd.cmdNextSubpass(cmdBuffer, VK_SUBPASS_CONTENTS_INLINE);
1330
1331 if (multiRenderPass)
1332 beginRenderPass(vkd, cmdBuffer, renderPasses.at(iterationIdx).get(), framebuffers.at(iterationIdx).get(),
1333 scissors.at(0), tcu::UVec4(0u));
1334
1335 if (twoPipelines || iterationIdx == 0u)
1336 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelines.at(iterationIdx).get());
1337
1338 PushConstantStruct pcData;
1339 if (selfDeps)
1340 {
1341 // First draw writes, second draw reads.
1342 pcData.writeVal = 1u - iterationIdx;
1343 pcData.readVal = iterationIdx;
1344 }
1345 else
1346 {
1347 // Otherwise reads and writes freely according to the pipeline shaders.
1348 pcData.writeVal = 1u;
1349 pcData.readVal = 1u;
1350 }
1351 vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), resourceStages, 0u, pcSize, &pcData);
1352 vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1353
1354 if (multiRenderPass)
1355 endRenderPass(vkd, cmdBuffer);
1356
1357 // If there are self-dependencies or multiple render passes, synchronize resource between draw calls.
1358 if ((multiRenderPass || selfDeps) && iterationIdx == 0u)
1359 {
1360 // In the case of self-dependencies, the barrier type is BarrierType::DEPENDENCY and we'll insert a general barrier because:
1361 // * VUID-vkCmdPipelineBarrier-bufferMemoryBarrierCount-01178 forbids using buffer barriers inside render passes.
1362 // * VUID-vkCmdPipelineBarrier-image-04073 forbids using image memory barriers inside render passes with resources that are not attachments.
1363 if (m_params.barrierType == BarrierType::GENERAL || m_params.barrierType == BarrierType::DEPENDENCY)
1364 {
1365 const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1366 cmdPipelineMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &memoryBarrier);
1367 }
1368 else if (m_params.barrierType == BarrierType::SPECIFIC)
1369 {
1370 if (useBufferResource)
1371 {
1372 const auto bufferBarrier = makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags,
1373 bufferResource->get(), 0ull, bufferSize);
1374 cmdPipelineBufferMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &bufferBarrier);
1375 }
1376 else
1377 {
1378 // Note: the image will only be read from shader stages or from the transfer stage.
1379 DE_ASSERT(useGeneralLayout);
1380 const auto newLayout = VK_IMAGE_LAYOUT_GENERAL;
1381 const auto imageBarrier = makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout,
1382 newLayout, imageResource->get(), colorSRR);
1383
1384 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &imageBarrier);
1385 currentLayout = newLayout;
1386 }
1387 }
1388 else
1389 {
1390 DE_ASSERT(false);
1391 }
1392
1393 if (multiRenderPass)
1394 {
1395 // Sync color attachment writes.
1396 const auto colorWritesBarrier =
1397 makeMemoryBarrier(VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1398 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1399 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, &colorWritesBarrier);
1400 }
1401 }
1402 }
1403
1404 if (!multiRenderPass)
1405 endRenderPass(vkd, cmdBuffer);
1406
1407 // If the resource was written to from shaders and will be read from a non-shader stage, insert the main barrier after running the pipeline.
1408 if (isShaderStage(m_params.fromStage) && !isShaderStage(m_params.toStage))
1409 {
1410 if (m_params.barrierType == BarrierType::GENERAL)
1411 {
1412 const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1413 cmdPipelineMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &memoryBarrier);
1414 }
1415 else if (m_params.barrierType == BarrierType::SPECIFIC)
1416 {
1417 if (useBufferResource)
1418 {
1419 const auto bufferBarrier =
1420 makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize);
1421 cmdPipelineBufferMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &bufferBarrier);
1422 }
1423 else
1424 {
1425 // Note: the image will only be read from shader stages or from the transfer stage.
1426 const auto newLayout =
1427 (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1428 const auto imageBarrier = makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout,
1429 newLayout, imageResource->get(), colorSRR);
1430
1431 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &imageBarrier);
1432 currentLayout = newLayout;
1433 }
1434 }
1435 // For subpass dependencies, they have already been included in the render pass and loop.
1436 }
1437
1438 // Read resource from the destination stage if needed.
1439 if (m_params.toStage == Stage::HOST)
1440 {
1441 // Nothing to do. The test value should be in the resource buffer already, which is host-visible.
1442 }
1443 else if (m_params.toStage == Stage::TRANSFER)
1444 {
1445 // Copy value from resource to host-coherent buffer to be verified later.
1446 if (useBufferResource)
1447 {
1448 const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize);
1449 vkd.cmdCopyBuffer(cmdBuffer, bufferResource->get(), hostCoherentBuffer->get(), 1u, ©Region);
1450 }
1451 else
1452 {
1453 const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1454 vkd.cmdCopyImageToBuffer(cmdBuffer, imageResource->get(), currentLayout, hostCoherentBuffer->get(), 1u,
1455 ©Region);
1456 }
1457
1458 transferToHostMemoryBarrier(vkd, cmdBuffer);
1459 }
1460
1461 // If the output value will be available in the color buffer, take the chance to transfer its contents to a host-coherent buffer.
1462 BufferWithMemoryPtr colorVerificationBuffer;
1463 void *colorVerificationDataPtr = nullptr;
1464
1465 if (valueInColorBuffer(m_params.toStage))
1466 {
1467 const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage);
1468 colorVerificationBuffer =
1469 BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo,
1470 (MemoryRequirement::HostVisible | MemoryRequirement::Coherent)));
1471 colorVerificationDataPtr = colorVerificationBuffer->getAllocation().getHostPtr();
1472
1473 const auto srcAccess = (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1474 const auto dstAccess = VK_ACCESS_TRANSFER_READ_BIT;
1475 const auto colorBarrier =
1476 makeImageMemoryBarrier(srcAccess, dstAccess, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1477 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorBuffer.get(), colorSRR);
1478 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1479 VK_PIPELINE_STAGE_TRANSFER_BIT, &colorBarrier);
1480
1481 const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1482 vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1483 colorVerificationBuffer->get(), 1u, ©Region);
1484
1485 transferToHostMemoryBarrier(vkd, cmdBuffer);
1486 }
1487
1488 endCommandBuffer(vkd, cmdBuffer);
1489 submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1490
1491 // Verify output resources as needed.
1492
1493 if (valueInAuxiliarDestBuffer(m_params.toStage))
1494 {
1495 uint32_t bufferValue;
1496 deMemcpy(&bufferValue, hostCoherentDataPtr, sizeof(bufferValue));
1497
1498 if (bufferValue != m_params.testValue)
1499 {
1500 std::ostringstream msg;
1501 msg << "Unexpected value in auxiliar host-coherent buffer: found " << bufferValue << " and expected "
1502 << m_params.testValue;
1503 TCU_FAIL(msg.str());
1504 }
1505 }
1506
1507 if (valueInResourceBuffer(m_params.toStage))
1508 {
1509 auto &resourceBufferAlloc = bufferResource->getAllocation();
1510 void *resourceBufferDataPtr = resourceBufferAlloc.getHostPtr();
1511 uint32_t bufferValue;
1512
1513 invalidateAlloc(vkd, device, resourceBufferAlloc);
1514 deMemcpy(&bufferValue, resourceBufferDataPtr, sizeof(bufferValue));
1515
1516 if (bufferValue != m_params.testValue)
1517 {
1518 std::ostringstream msg;
1519 msg << "Unexpected value in resource buffer: found " << bufferValue << " and expected "
1520 << m_params.testValue;
1521 TCU_FAIL(msg.str());
1522 }
1523 }
1524
1525 if (valueInColorBuffer(m_params.toStage))
1526 {
1527 uint32_t bufferValue;
1528 deMemcpy(&bufferValue, colorVerificationDataPtr, sizeof(bufferValue));
1529
1530 if (bufferValue != m_params.testValue)
1531 {
1532 std::ostringstream msg;
1533 msg << "Unexpected value in color verification buffer: found " << bufferValue << " and expected "
1534 << m_params.testValue;
1535 TCU_FAIL(msg.str());
1536 }
1537 }
1538
1539 return tcu::TestStatus::pass("Pass");
1540 }
1541
1542 // Specific test to check a barrier that crosses secondary command buffers and goes from compute to task.
1543 class BarrierAcrossSecondaryCase : public vkt::TestCase
1544 {
1545 public:
BarrierAcrossSecondaryCase(tcu::TestContext & testCtx,const std::string & name)1546 BarrierAcrossSecondaryCase(tcu::TestContext &testCtx, const std::string &name) : vkt::TestCase(testCtx, name)
1547 {
1548 }
~BarrierAcrossSecondaryCase(void)1549 virtual ~BarrierAcrossSecondaryCase(void)
1550 {
1551 }
1552
1553 void checkSupport(Context &context) const override;
1554 TestInstance *createInstance(Context &context) const override;
1555 void initPrograms(vk::SourceCollections &programCollection) const override;
1556
1557 static constexpr uint32_t kLocalSize = 128u;
1558 static constexpr uint32_t kNumWorkGroups = 16384u;
1559 };
1560
1561 class BarrierAcrossSecondaryInstance : public vkt::TestInstance
1562 {
1563 public:
BarrierAcrossSecondaryInstance(Context & context)1564 BarrierAcrossSecondaryInstance(Context &context) : vkt::TestInstance(context)
1565 {
1566 }
~BarrierAcrossSecondaryInstance(void)1567 virtual ~BarrierAcrossSecondaryInstance(void)
1568 {
1569 }
1570
1571 tcu::TestStatus iterate(void) override;
1572 };
1573
checkSupport(Context & context) const1574 void BarrierAcrossSecondaryCase::checkSupport(Context &context) const
1575 {
1576 checkTaskMeshShaderSupportEXT(context, true, true);
1577 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
1578 }
1579
createInstance(Context & context) const1580 TestInstance *BarrierAcrossSecondaryCase::createInstance(Context &context) const
1581 {
1582 return new BarrierAcrossSecondaryInstance(context);
1583 }
1584
initPrograms(vk::SourceCollections & programCollection) const1585 void BarrierAcrossSecondaryCase::initPrograms(vk::SourceCollections &programCollection) const
1586 {
1587 const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1588
1589 const std::string descriptorDecl = "layout (set=0, binding=0, std430) buffer OutputBlock {\n"
1590 " uint values[];\n"
1591 "} outBuffer;\n"
1592 "layout (set=0, binding=1, std430) buffer VerificationBlock {\n"
1593 " uint values[];\n"
1594 "} verificationBuffer;\n";
1595
1596 // The compute shader will fill the output buffer.
1597 std::ostringstream comp;
1598 comp << "#version 450\n"
1599 << "layout(local_size_x=" << kLocalSize << ") in;\n"
1600 << descriptorDecl << "void main ()\n"
1601 << "{\n"
1602 << " outBuffer.values[gl_GlobalInvocationID.x] = gl_GlobalInvocationID.x;\n"
1603 << "}\n";
1604 programCollection.glslSources.add("comp") << glu::ComputeSource(comp.str());
1605
1606 // The task shader will read it, verify its contents and write the verification buffer.
1607 std::ostringstream task;
1608 task << "#version 450\n"
1609 << "#extension GL_EXT_mesh_shader : enable\n"
1610 << "layout(local_size_x=" << kLocalSize << ") in;\n"
1611 << descriptorDecl << "void main ()\n"
1612 << "{\n"
1613 << " const uint verifResult = ((outBuffer.values[gl_GlobalInvocationID.x] == gl_GlobalInvocationID.x) ? 1u "
1614 ": 0u);\n"
1615 << " verificationBuffer.values[gl_GlobalInvocationID.x] = verifResult;\n"
1616 << " EmitMeshTasksEXT(0u, 0u, 0u);\n"
1617 << "}\n";
1618 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1619
1620 std::ostringstream mesh;
1621 mesh << "#version 450\n"
1622 << "#extension GL_EXT_mesh_shader : enable\n"
1623 << "\n"
1624 << "layout(local_size_x=1) in;\n"
1625 << "layout(triangles) out;\n"
1626 << "layout(max_vertices=3, max_primitives=1) out;\n"
1627 << "\n"
1628 << "void main ()\n"
1629 << "{\n"
1630 << " SetMeshOutputsEXT(0u, 0u);\n"
1631 << "}\n";
1632 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1633 }
1634
iterate(void)1635 tcu::TestStatus BarrierAcrossSecondaryInstance::iterate(void)
1636 {
1637 const auto &vkd = m_context.getDeviceInterface();
1638 const auto device = m_context.getDevice();
1639 auto &alloc = m_context.getDefaultAllocator();
1640 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
1641 const auto queue = m_context.getUniversalQueue();
1642 const auto kLocalSize = BarrierAcrossSecondaryCase::kLocalSize;
1643 const auto kNumWorkGroups = BarrierAcrossSecondaryCase::kNumWorkGroups;
1644 const auto bindingStages = (VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_TASK_BIT_EXT);
1645 const auto extent = makeExtent3D(1u, 1u, 1u);
1646
1647 // Output buffer.
1648 const auto outputBufferSize = static_cast<VkDeviceSize>(kLocalSize * kNumWorkGroups * sizeof(uint32_t));
1649 const auto outputBufferInfo = makeBufferCreateInfo(outputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1650 BufferWithMemory outputBuffer(vkd, device, alloc, outputBufferInfo, MemoryRequirement::HostVisible);
1651 auto &outputBufferAlloc = outputBuffer.getAllocation();
1652 void *outputBufferData = outputBufferAlloc.getHostPtr();
1653
1654 // Verification buffer.
1655 const auto verificationBufferSize = outputBufferSize;
1656 const auto verificationBufferInfo = outputBufferInfo;
1657 BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
1658 auto &verificationBufferAlloc = verificationBuffer.getAllocation();
1659 void *verificationBufferData = verificationBufferAlloc.getHostPtr();
1660
1661 // Prepare buffer data.
1662 deMemset(outputBufferData, 0, static_cast<size_t>(outputBufferSize));
1663 deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
1664 flushAlloc(vkd, device, outputBufferAlloc);
1665 flushAlloc(vkd, device, verificationBufferAlloc);
1666
1667 // Descriptor set layout.
1668 DescriptorSetLayoutBuilder setLayoutBuilder;
1669 setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bindingStages);
1670 setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bindingStages);
1671 const auto setLayout = setLayoutBuilder.build(vkd, device);
1672
1673 // Pipeline layout.
1674 const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
1675
1676 // Descriptor pool and set.
1677 DescriptorPoolBuilder poolBuilder;
1678 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u);
1679 const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1680 const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
1681
1682 // Update descriptor set.
1683 DescriptorSetUpdateBuilder updateBuilder;
1684 const auto outputBufferDescInfo = makeDescriptorBufferInfo(outputBuffer.get(), 0ull, outputBufferSize);
1685 const auto verificationBufferDescInfo =
1686 makeDescriptorBufferInfo(verificationBuffer.get(), 0ull, verificationBufferSize);
1687 updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
1688 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescInfo);
1689 updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
1690 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &verificationBufferDescInfo);
1691 updateBuilder.update(vkd, device);
1692
1693 // Graphics pipeline auxiliary data.
1694 const auto renderPass = makeRenderPass(vkd, device);
1695 const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), 0u, nullptr, extent.width, extent.height);
1696 const std::vector<VkViewport> viewports(1u, makeViewport(extent));
1697 const std::vector<VkRect2D> scissors(1u, makeRect2D(extent));
1698
1699 // Create pipelines.
1700 const auto &binaries = m_context.getBinaryCollection();
1701 const auto compModule = createShaderModule(vkd, device, binaries.get("comp"));
1702 const auto taskModule = createShaderModule(vkd, device, binaries.get("task"));
1703 const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
1704
1705 const auto computePipeline = makeComputePipeline(vkd, device, pipelineLayout.get(), compModule.get());
1706 const auto meshPipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskModule.get(),
1707 meshModule.get(), DE_NULL, renderPass.get(), viewports, scissors);
1708
1709 // Command pool and command buffers.
1710 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
1711 const auto primaryCmdBuffer = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1712 const auto compCmdBuffer = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_SECONDARY);
1713 const auto meshCmdBuffer = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_SECONDARY);
1714
1715 // Use compute pipeline and record barrier to task shader.
1716 {
1717 const auto cmdBuffer = compCmdBuffer.get();
1718 const auto comp2TaskBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
1719
1720 beginSecondaryCommandBuffer(vkd, cmdBuffer);
1721 vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout.get(), 0u, 1u,
1722 &descriptorSet.get(), 0u, nullptr);
1723 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.get());
1724 vkd.cmdDispatch(cmdBuffer, kNumWorkGroups, 1u, 1u);
1725 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1726 VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT, &comp2TaskBarrier);
1727 endCommandBuffer(vkd, cmdBuffer);
1728 }
1729
1730 // Use mesh pipeline and record barrier to host.
1731 {
1732 const auto cmdBuffer = meshCmdBuffer.get();
1733
1734 beginSecondaryCommandBuffer(vkd, cmdBuffer, renderPass.get(), framebuffer.get());
1735 vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u,
1736 &descriptorSet.get(), 0u, nullptr);
1737 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, meshPipeline.get());
1738 vkd.cmdDrawMeshTasksEXT(cmdBuffer, kNumWorkGroups, 1u, 1u);
1739 endCommandBuffer(vkd, cmdBuffer);
1740 }
1741
1742 // Use both secondary command buffers.
1743 {
1744 const auto cmdBuffer = primaryCmdBuffer.get();
1745 const auto task2HostBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1746
1747 beginCommandBuffer(vkd, cmdBuffer);
1748 vkd.cmdExecuteCommands(cmdBuffer, 1u, &compCmdBuffer.get());
1749 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u),
1750 VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS);
1751 vkd.cmdExecuteCommands(cmdBuffer, 1u, &meshCmdBuffer.get());
1752 endRenderPass(vkd, cmdBuffer);
1753 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT, VK_PIPELINE_STAGE_HOST_BIT,
1754 &task2HostBarrier);
1755 endCommandBuffer(vkd, cmdBuffer);
1756 submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1757 }
1758
1759 // Verify buffer contents.
1760 invalidateAlloc(vkd, device, verificationBufferAlloc);
1761 const std::vector<uint32_t> expectedResult(kNumWorkGroups * kLocalSize, 1u);
1762
1763 if (deMemCmp(expectedResult.data(), verificationBufferData, de::dataSize(expectedResult)) != 0)
1764 TCU_FAIL("Unexpected values found in verification buffer");
1765
1766 return tcu::TestStatus::pass("Pass");
1767 }
1768
1769 } // namespace
1770
createMeshShaderSyncTestsEXT(tcu::TestContext & testCtx)1771 tcu::TestCaseGroup *createMeshShaderSyncTestsEXT(tcu::TestContext &testCtx)
1772 {
1773 const struct
1774 {
1775 Stage fromStage;
1776 Stage toStage;
1777 } stageCombinations[] = {
1778 // Combinations where the source and destination stages involve mesh shaders.
1779 // Note: this could be tested procedurally.
1780 {Stage::HOST, Stage::TASK},
1781 {Stage::HOST, Stage::MESH},
1782 {Stage::TRANSFER, Stage::TASK},
1783 {Stage::TRANSFER, Stage::MESH},
1784 {Stage::TASK, Stage::MESH},
1785 {Stage::TASK, Stage::FRAG},
1786 {Stage::TASK, Stage::TRANSFER},
1787 {Stage::TASK, Stage::HOST},
1788 {Stage::MESH, Stage::FRAG},
1789 {Stage::MESH, Stage::TRANSFER},
1790 {Stage::MESH, Stage::HOST},
1791
1792 // These require two pipelines.
1793 {Stage::MESH, Stage::TASK},
1794 {Stage::FRAG, Stage::TASK},
1795 {Stage::FRAG, Stage::MESH},
1796 };
1797
1798 const struct
1799 {
1800 ResourceType resourceType;
1801 const char *name;
1802 } resourceTypes[] = {
1803 {ResourceType::UNIFORM_BUFFER, "uniform_buffer"},
1804 {ResourceType::STORAGE_BUFFER, "storage_buffer"},
1805 {ResourceType::STORAGE_IMAGE, "storage_image"},
1806 {ResourceType::SAMPLED_IMAGE, "sampled_image"},
1807 };
1808
1809 const struct
1810 {
1811 BarrierType barrierType;
1812 const char *name;
1813 } barrierTypes[] = {
1814 {BarrierType::GENERAL, "memory_barrier"},
1815 {BarrierType::SPECIFIC, "specific_barrier"},
1816 {BarrierType::DEPENDENCY, "subpass_dependency"},
1817 };
1818
1819 const struct
1820 {
1821 WriteAccess writeAccess;
1822 const char *name;
1823 } writeAccesses[] = {
1824 {WriteAccess::HOST_WRITE, "host_write"},
1825 {WriteAccess::TRANSFER_WRITE, "transfer_write"},
1826 {WriteAccess::SHADER_WRITE, "shader_write"},
1827 };
1828
1829 const struct
1830 {
1831 ReadAccess readAccess;
1832 const char *name;
1833 } readAccesses[] = {
1834 {ReadAccess::HOST_READ, "host_read"},
1835 {ReadAccess::TRANSFER_READ, "transfer_read"},
1836 {ReadAccess::SHADER_READ, "shader_read"},
1837 {ReadAccess::UNIFORM_READ, "uniform_read"},
1838 };
1839
1840 uint32_t testValue = 1628510124u;
1841
1842 GroupPtr mainGroup(new tcu::TestCaseGroup(testCtx, "synchronization"));
1843
1844 for (const auto &stageCombination : stageCombinations)
1845 {
1846 const std::string combinationName =
1847 de::toString(stageCombination.fromStage) + "_to_" + de::toString(stageCombination.toStage);
1848 GroupPtr combinationGroup(new tcu::TestCaseGroup(testCtx, combinationName.c_str()));
1849
1850 for (const auto &resourceCase : resourceTypes)
1851 {
1852 if (!canWriteTo(stageCombination.fromStage, resourceCase.resourceType))
1853 continue;
1854
1855 if (!canReadFrom(stageCombination.toStage, resourceCase.resourceType))
1856 continue;
1857
1858 GroupPtr resourceGroup(new tcu::TestCaseGroup(testCtx, resourceCase.name));
1859
1860 for (const auto &barrierCase : barrierTypes)
1861 {
1862 const auto shaderToShader = fromShaderToShader(stageCombination.fromStage, stageCombination.toStage);
1863 const auto barrierIsDependency = (barrierCase.barrierType == BarrierType::DEPENDENCY);
1864
1865 // Subpass dependencies can only be used in shader to shader situations.
1866 if (barrierIsDependency && !shaderToShader)
1867 continue;
1868
1869 GroupPtr barrierGroup(new tcu::TestCaseGroup(testCtx, barrierCase.name));
1870
1871 for (const auto &writeCase : writeAccesses)
1872 for (const auto &readCase : readAccesses)
1873 {
1874 if (!canReadResourceAsAccess(resourceCase.resourceType, readCase.readAccess))
1875 continue;
1876 if (!canWriteResourceAsAccess(resourceCase.resourceType, writeCase.writeAccess))
1877 continue;
1878 if (!canReadFromStageAsAccess(stageCombination.toStage, readCase.readAccess))
1879 continue;
1880 if (!canWriteFromStageAsAccess(stageCombination.fromStage, writeCase.writeAccess))
1881 continue;
1882
1883 const std::string accessCaseName = writeCase.name + std::string("_") + readCase.name;
1884
1885 const TestParams testParams = {
1886 stageCombination.fromStage, // Stage fromStage;
1887 stageCombination.toStage, // Stage toStage;
1888 resourceCase.resourceType, // ResourceType resourceType;
1889 barrierCase.barrierType, // BarrierType barrierType;
1890 writeCase.writeAccess, // WriteAccess writeAccess;
1891 readCase.readAccess, // ReadAccess readAccess;
1892 testValue++, // uint32_t testValue;
1893 };
1894
1895 barrierGroup->addChild(new MeshShaderSyncCase(testCtx, accessCaseName, testParams));
1896 }
1897
1898 resourceGroup->addChild(barrierGroup.release());
1899 }
1900
1901 combinationGroup->addChild(resourceGroup.release());
1902 }
1903
1904 mainGroup->addChild(combinationGroup.release());
1905 }
1906
1907 {
1908 // Additional synchronization tests
1909 GroupPtr otherGroup(new tcu::TestCaseGroup(testCtx, "other"));
1910
1911 // Check synchronizing compute to task across secondary command buffer boundaries
1912 otherGroup->addChild(new BarrierAcrossSecondaryCase(testCtx, "barrier_across_secondary"));
1913
1914 mainGroup->addChild(otherGroup.release());
1915 }
1916
1917 return mainGroup.release();
1918 }
1919
1920 } // namespace MeshShader
1921 } // namespace vkt
1922