1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017-2019 The Khronos Group Inc.
6  * Copyright (c) 2018-2019 NVIDIA Corporation
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Vulkan Memory Model tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMemoryModelTests.hpp"
26 #include "vktMemoryModelPadding.hpp"
27 #include "vktMemoryModelSharedLayout.hpp"
28 #include "vktAmberTestCase.hpp"
29 
30 #include "vkBufferWithMemory.hpp"
31 #include "vkImageWithMemory.hpp"
32 #include "vkQueryUtil.hpp"
33 #include "vkBuilderUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkObjUtil.hpp"
37 
38 #include "vktTestCase.hpp"
39 
40 #include "deDefs.h"
41 #include "deMath.h"
42 #include "deSharedPtr.hpp"
43 #include "deString.h"
44 
45 #include "tcuTestCase.hpp"
46 #include "tcuTestLog.hpp"
47 
48 #include <string>
49 #include <sstream>
50 
51 namespace vkt
52 {
53 namespace MemoryModel
54 {
55 namespace
56 {
57 using namespace vk;
58 using namespace std;
59 
60 typedef enum
61 {
62     TT_MP = 0, // message passing
63     TT_WAR,    // write-after-read hazard
64 } TestType;
65 
66 typedef enum
67 {
68     ST_FENCE_FENCE = 0,
69     ST_FENCE_ATOMIC,
70     ST_ATOMIC_FENCE,
71     ST_ATOMIC_ATOMIC,
72     ST_CONTROL_BARRIER,
73     ST_CONTROL_AND_MEMORY_BARRIER,
74 } SyncType;
75 
76 typedef enum
77 {
78     SC_BUFFER = 0,
79     SC_IMAGE,
80     SC_WORKGROUP,
81     SC_PHYSBUFFER,
82 } StorageClass;
83 
84 typedef enum
85 {
86     SCOPE_DEVICE = 0,
87     SCOPE_QUEUEFAMILY,
88     SCOPE_WORKGROUP,
89     SCOPE_SUBGROUP,
90 } Scope;
91 
92 typedef enum
93 {
94     STAGE_COMPUTE = 0,
95     STAGE_VERTEX,
96     STAGE_FRAGMENT,
97 } Stage;
98 
99 typedef enum
100 {
101     DATA_TYPE_UINT = 0,
102     DATA_TYPE_UINT64,
103     DATA_TYPE_FLOAT32,
104     DATA_TYPE_FLOAT64,
105 } DataType;
106 
107 const VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
108 const VkFlags allPipelineStages =
109     VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
110 
111 struct CaseDef
112 {
113     bool payloadMemLocal;
114     bool guardMemLocal;
115     bool coherent;
116     bool core11;
117     bool atomicRMW;
118     TestType testType;
119     StorageClass payloadSC;
120     StorageClass guardSC;
121     Scope scope;
122     SyncType syncType;
123     Stage stage;
124     DataType dataType;
125     bool transitive;
126     bool transitiveVis;
127 };
128 
129 class MemoryModelTestInstance : public TestInstance
130 {
131 public:
132     MemoryModelTestInstance(Context &context, const CaseDef &data);
133     ~MemoryModelTestInstance(void);
134     tcu::TestStatus iterate(void);
135 
136 private:
137     CaseDef m_data;
138 
139     enum
140     {
141         WIDTH  = 256,
142         HEIGHT = 256
143     };
144 };
145 
MemoryModelTestInstance(Context & context,const CaseDef & data)146 MemoryModelTestInstance::MemoryModelTestInstance(Context &context, const CaseDef &data)
147     : vkt::TestInstance(context)
148     , m_data(data)
149 {
150 }
151 
~MemoryModelTestInstance(void)152 MemoryModelTestInstance::~MemoryModelTestInstance(void)
153 {
154 }
155 
156 class MemoryModelTestCase : public TestCase
157 {
158 public:
159     MemoryModelTestCase(tcu::TestContext &context, const char *name, const CaseDef data);
160     ~MemoryModelTestCase(void);
161     virtual void initPrograms(SourceCollections &programCollection) const;
162     virtual void initProgramsTransitive(SourceCollections &programCollection) const;
163     virtual TestInstance *createInstance(Context &context) const;
164     virtual void checkSupport(Context &context) const;
165 
166 private:
167     CaseDef m_data;
168 };
169 
MemoryModelTestCase(tcu::TestContext & context,const char * name,const CaseDef data)170 MemoryModelTestCase::MemoryModelTestCase(tcu::TestContext &context, const char *name, const CaseDef data)
171     : vkt::TestCase(context, name)
172     , m_data(data)
173 {
174 }
175 
~MemoryModelTestCase(void)176 MemoryModelTestCase::~MemoryModelTestCase(void)
177 {
178 }
179 
checkSupport(Context & context) const180 void MemoryModelTestCase::checkSupport(Context &context) const
181 {
182     if (!context.contextSupports(vk::ApiVersion(0, 1, 1, 0)))
183     {
184         TCU_THROW(NotSupportedError, "Vulkan 1.1 not supported");
185     }
186 
187     if (!m_data.core11)
188     {
189         if (!context.getVulkanMemoryModelFeatures().vulkanMemoryModel)
190         {
191             TCU_THROW(NotSupportedError, "vulkanMemoryModel not supported");
192         }
193 
194         if (m_data.scope == SCOPE_DEVICE && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelDeviceScope)
195         {
196             TCU_THROW(NotSupportedError, "vulkanMemoryModelDeviceScope not supported");
197         }
198     }
199 
200     if (m_data.scope == SCOPE_SUBGROUP)
201     {
202         // Check for subgroup support for scope_subgroup tests.
203         VkPhysicalDeviceSubgroupProperties subgroupProperties;
204         subgroupProperties.sType               = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
205         subgroupProperties.pNext               = DE_NULL;
206         subgroupProperties.supportedOperations = 0;
207 
208         VkPhysicalDeviceProperties2 properties;
209         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
210         properties.pNext = &subgroupProperties;
211 
212         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
213 
214         if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) ||
215             !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) ||
216             !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
217         {
218             TCU_THROW(NotSupportedError, "Subgroup features not supported");
219         }
220 
221         VkShaderStageFlags stage = VK_SHADER_STAGE_COMPUTE_BIT;
222         if (m_data.stage == STAGE_VERTEX)
223         {
224             stage = VK_SHADER_STAGE_VERTEX_BIT;
225         }
226         else if (m_data.stage == STAGE_COMPUTE)
227         {
228             stage = VK_SHADER_STAGE_COMPUTE_BIT;
229         }
230         else if (m_data.stage == STAGE_FRAGMENT)
231         {
232             stage = VK_SHADER_STAGE_FRAGMENT_BIT;
233         }
234 
235         if ((subgroupProperties.supportedStages & stage) == 0)
236         {
237             TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
238         }
239     }
240     if (m_data.dataType == DATA_TYPE_UINT64)
241     {
242         if (!context.getDeviceFeatures().shaderInt64)
243         {
244             TCU_THROW(NotSupportedError, "64-bit integer in shaders not supported");
245         }
246         if (!context.getShaderAtomicInt64Features().shaderBufferInt64Atomics &&
247             (m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER))
248         {
249             TCU_THROW(NotSupportedError, "64-bit integer buffer atomics not supported");
250         }
251         if (!context.getShaderAtomicInt64Features().shaderSharedInt64Atomics && m_data.guardSC == SC_WORKGROUP)
252         {
253             TCU_THROW(NotSupportedError, "64-bit integer shared atomics not supported");
254         }
255     }
256 
257     if (m_data.dataType == DATA_TYPE_FLOAT32)
258     {
259         if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float"))
260             TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float");
261 
262         if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
263             (!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics))
264         {
265             TCU_THROW(NotSupportedError,
266                       "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
267         }
268 
269         if (m_data.guardSC == SC_IMAGE && (!context.getShaderAtomicFloatFeaturesEXT().shaderImageFloat32Atomics))
270         {
271             TCU_THROW(NotSupportedError,
272                       "VkShaderAtomicFloat32: 32-bit floating point image atomic operations not supported");
273         }
274 
275         if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics))
276         {
277             TCU_THROW(NotSupportedError,
278                       "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
279         }
280     }
281 
282     if (m_data.dataType == DATA_TYPE_FLOAT64)
283     {
284         if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float"))
285             TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float");
286 
287         if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
288             (!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics))
289         {
290             TCU_THROW(NotSupportedError,
291                       "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
292         }
293 
294         if (m_data.guardSC == SC_IMAGE || m_data.payloadSC == SC_IMAGE)
295         {
296             TCU_THROW(NotSupportedError,
297                       "VkShaderAtomicFloat64: 64-bit floating point image atomic operations not supported");
298         }
299 
300         if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics))
301         {
302             TCU_THROW(NotSupportedError,
303                       "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
304         }
305     }
306 
307     if (m_data.transitive && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelAvailabilityVisibilityChains)
308         TCU_THROW(NotSupportedError, "vulkanMemoryModelAvailabilityVisibilityChains not supported");
309 
310     if ((m_data.payloadSC == SC_PHYSBUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
311         !context.isBufferDeviceAddressSupported())
312         TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
313 
314     if (m_data.stage == STAGE_VERTEX)
315     {
316         if (!context.getDeviceFeatures().vertexPipelineStoresAndAtomics)
317         {
318             TCU_THROW(NotSupportedError, "vertexPipelineStoresAndAtomics not supported");
319         }
320     }
321     if (m_data.stage == STAGE_FRAGMENT)
322     {
323         if (!context.getDeviceFeatures().fragmentStoresAndAtomics)
324         {
325             TCU_THROW(NotSupportedError, "fragmentStoresAndAtomics not supported");
326         }
327     }
328 }
329 
initPrograms(SourceCollections & programCollection) const330 void MemoryModelTestCase::initPrograms(SourceCollections &programCollection) const
331 {
332     if (m_data.transitive)
333     {
334         initProgramsTransitive(programCollection);
335         return;
336     }
337     DE_ASSERT(!m_data.transitiveVis);
338 
339     Scope invocationMapping = m_data.scope;
340     if ((m_data.scope == SCOPE_DEVICE || m_data.scope == SCOPE_QUEUEFAMILY) &&
341         (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP))
342     {
343         invocationMapping = SCOPE_WORKGROUP;
344     }
345 
346     const char *scopeStr;
347     switch (m_data.scope)
348     {
349     default:
350         DE_ASSERT(0); // fall through
351     case SCOPE_DEVICE:
352         scopeStr = "gl_ScopeDevice";
353         break;
354     case SCOPE_QUEUEFAMILY:
355         scopeStr = "gl_ScopeQueueFamily";
356         break;
357     case SCOPE_WORKGROUP:
358         scopeStr = "gl_ScopeWorkgroup";
359         break;
360     case SCOPE_SUBGROUP:
361         scopeStr = "gl_ScopeSubgroup";
362         break;
363     }
364 
365     const char *typeStr = (m_data.dataType == DATA_TYPE_UINT64)  ? "uint64_t" :
366                           (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" :
367                           (m_data.dataType == DATA_TYPE_FLOAT64) ? "double" :
368                                                                    "uint";
369     const bool intType  = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64);
370 
371     // Construct storageSemantics strings. Both release and acquire
372     // always have the payload storage class. They only include the
373     // guard storage class if they're using FENCE for that side of the
374     // sync.
375     std::stringstream storageSemanticsRelease;
376     switch (m_data.payloadSC)
377     {
378     default:
379         DE_ASSERT(0);   // fall through
380     case SC_PHYSBUFFER: // fall through
381     case SC_BUFFER:
382         storageSemanticsRelease << "gl_StorageSemanticsBuffer";
383         break;
384     case SC_IMAGE:
385         storageSemanticsRelease << "gl_StorageSemanticsImage";
386         break;
387     case SC_WORKGROUP:
388         storageSemanticsRelease << "gl_StorageSemanticsShared";
389         break;
390     }
391     std::stringstream storageSemanticsAcquire;
392     storageSemanticsAcquire << storageSemanticsRelease.str();
393     if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
394     {
395         switch (m_data.guardSC)
396         {
397         default:
398             DE_ASSERT(0);   // fall through
399         case SC_PHYSBUFFER: // fall through
400         case SC_BUFFER:
401             storageSemanticsRelease << " | gl_StorageSemanticsBuffer";
402             break;
403         case SC_IMAGE:
404             storageSemanticsRelease << " | gl_StorageSemanticsImage";
405             break;
406         case SC_WORKGROUP:
407             storageSemanticsRelease << " | gl_StorageSemanticsShared";
408             break;
409         }
410     }
411     if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
412     {
413         switch (m_data.guardSC)
414         {
415         default:
416             DE_ASSERT(0);   // fall through
417         case SC_PHYSBUFFER: // fall through
418         case SC_BUFFER:
419             storageSemanticsAcquire << " | gl_StorageSemanticsBuffer";
420             break;
421         case SC_IMAGE:
422             storageSemanticsAcquire << " | gl_StorageSemanticsImage";
423             break;
424         case SC_WORKGROUP:
425             storageSemanticsAcquire << " | gl_StorageSemanticsShared";
426             break;
427         }
428     }
429 
430     std::stringstream semanticsRelease, semanticsAcquire, semanticsAcquireRelease;
431 
432     semanticsRelease << "gl_SemanticsRelease";
433     semanticsAcquire << "gl_SemanticsAcquire";
434     semanticsAcquireRelease << "gl_SemanticsAcquireRelease";
435     if (!m_data.coherent && m_data.testType != TT_WAR)
436     {
437         DE_ASSERT(!m_data.core11);
438         semanticsRelease << " | gl_SemanticsMakeAvailable";
439         semanticsAcquire << " | gl_SemanticsMakeVisible";
440         semanticsAcquireRelease << " | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible";
441     }
442 
443     std::stringstream css;
444     css << "#version 450 core\n";
445     if (!m_data.core11)
446     {
447         css << "#pragma use_vulkan_memory_model\n";
448     }
449     if (!intType)
450     {
451         css << "#extension GL_EXT_shader_atomic_float : enable\n"
452                "#extension GL_KHR_memory_scope_semantics : enable\n";
453     }
454     css << "#extension GL_KHR_shader_subgroup_basic : enable\n"
455            "#extension GL_KHR_shader_subgroup_shuffle : enable\n"
456            "#extension GL_KHR_shader_subgroup_ballot : enable\n"
457            "#extension GL_KHR_memory_scope_semantics : enable\n"
458            "#extension GL_ARB_gpu_shader_int64 : enable\n"
459            "#extension GL_EXT_buffer_reference : enable\n"
460            "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
461            "layout(constant_id = 0) const int DIM = 1;\n"
462            "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
463            "struct S { "
464         << typeStr << " x[DIM*DIM]; };\n";
465 
466     if (m_data.stage == STAGE_COMPUTE)
467     {
468         css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
469     }
470 
471     const char *memqual = "";
472     if (m_data.coherent)
473     {
474         if (m_data.core11)
475         {
476             // Vulkan 1.1 only has "coherent", use it regardless of scope
477             memqual = "coherent";
478         }
479         else
480         {
481             switch (m_data.scope)
482             {
483             default:
484                 DE_ASSERT(0); // fall through
485             case SCOPE_DEVICE:
486                 memqual = "devicecoherent";
487                 break;
488             case SCOPE_QUEUEFAMILY:
489                 memqual = "queuefamilycoherent";
490                 break;
491             case SCOPE_WORKGROUP:
492                 memqual = "workgroupcoherent";
493                 break;
494             case SCOPE_SUBGROUP:
495                 memqual = "subgroupcoherent";
496                 break;
497             }
498         }
499     }
500     else
501     {
502         DE_ASSERT(!m_data.core11);
503         memqual = "nonprivate";
504     }
505 
506     stringstream pushConstMembers;
507 
508     // Declare payload, guard, and fail resources
509     switch (m_data.payloadSC)
510     {
511     default:
512         DE_ASSERT(0); // fall through
513     case SC_PHYSBUFFER:
514         css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n";
515         pushConstMembers << "   layout(offset = 0) PayloadRef payloadref;\n";
516         break;
517     case SC_BUFFER:
518         css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n";
519         break;
520     case SC_IMAGE:
521         if (intType)
522             css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n";
523         else
524             css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n";
525         break;
526     case SC_WORKGROUP:
527         css << "shared S payload;\n";
528         break;
529     }
530     if (m_data.syncType != ST_CONTROL_AND_MEMORY_BARRIER && m_data.syncType != ST_CONTROL_BARRIER)
531     {
532         // The guard variable is only accessed with atomics and need not be declared coherent.
533         switch (m_data.guardSC)
534         {
535         default:
536             DE_ASSERT(0); // fall through
537         case SC_PHYSBUFFER:
538             css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n";
539             pushConstMembers << "layout(offset = 8) GuardRef guard;\n";
540             break;
541         case SC_BUFFER:
542             css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n";
543             break;
544         case SC_IMAGE:
545             if (intType)
546                 css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n";
547             else
548                 css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n";
549             break;
550         case SC_WORKGROUP:
551             css << "shared S guard;\n";
552             break;
553         }
554     }
555 
556     css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
557 
558     if (pushConstMembers.str().size() != 0)
559     {
560         css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n";
561     }
562 
563     css << "void main()\n"
564            "{\n"
565            "   bool pass = true;\n"
566            "   bool skip = false;\n";
567 
568     if (m_data.payloadSC == SC_PHYSBUFFER)
569         css << "   " << memqual << " PayloadRef payload = payloadref;\n";
570 
571     if (m_data.stage == STAGE_FRAGMENT)
572     {
573         // Kill helper invocations so they don't load outside the bounds of the SSBO.
574         // Helper pixels are also initially "active" and if a thread gets one as its
575         // partner in SCOPE_SUBGROUP mode, it can't run the test.
576         css << "   if (gl_HelperInvocation) { return; }\n";
577     }
578 
579     // Compute coordinates based on the storage class and scope.
580     // For workgroup scope, we pair up LocalInvocationID and DIM-1-LocalInvocationID.
581     // For device scope, we pair up GlobalInvocationID and DIM*NUMWORKGROUPS-1-GlobalInvocationID.
582     // For subgroup scope, we pair up LocalInvocationID and LocalInvocationID from subgroupId^(subgroupSize-1)
583     switch (invocationMapping)
584     {
585     default:
586         DE_ASSERT(0); // fall through
587     case SCOPE_SUBGROUP:
588         // If the partner invocation isn't active, the shuffle below will be undefined. Bail.
589         css << "   uvec4 ballot = subgroupBallot(true);\n"
590                "   if (!subgroupBallotBitExtract(ballot, gl_SubgroupInvocationID^(gl_SubgroupSize-1))) { return; }\n";
591 
592         switch (m_data.stage)
593         {
594         default:
595             DE_ASSERT(0); // fall through
596         case STAGE_COMPUTE:
597             css << "   ivec2 localId           = ivec2(gl_LocalInvocationID.xy);\n"
598                    "   ivec2 partnerLocalId    = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
599                    "   uint sharedCoord        = localId.y * DIM + localId.x;\n"
600                    "   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
601                    "   uint bufferCoord        = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + "
602                    "gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
603                    "   uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + "
604                    "gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
605                    "   ivec2 imageCoord        = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
606                    "   ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
607             break;
608         case STAGE_VERTEX:
609             css << "   uint bufferCoord        = gl_VertexIndex;\n"
610                    "   uint partnerBufferCoord = subgroupShuffleXor(gl_VertexIndex, gl_SubgroupSize-1);\n"
611                    "   ivec2 imageCoord        = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / "
612                    "(DIM*NUM_WORKGROUP_EACH_DIM));\n"
613                    "   ivec2 partnerImageCoord = subgroupShuffleXor(imageCoord, gl_SubgroupSize-1);\n"
614                    "   gl_PointSize            = 1.0f;\n"
615                    "   gl_Position             = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
616             break;
617         case STAGE_FRAGMENT:
618             css << "   ivec2 localId        = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
619                    "   ivec2 groupId        = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
620                    "   ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
621                    "   ivec2 partnerGroupId = subgroupShuffleXor(groupId, gl_SubgroupSize-1);\n"
622                    "   uint sharedCoord     = localId.y * DIM + localId.x;\n"
623                    "   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
624                    "   uint bufferCoord     = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
625                    "   uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + "
626                    "partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
627                    "   ivec2 imageCoord     = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
628                    "   ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
629             break;
630         }
631         break;
632     case SCOPE_WORKGROUP:
633         css << "   ivec2 localId           = ivec2(gl_LocalInvocationID.xy);\n"
634                "   ivec2 partnerLocalId    = ivec2(DIM-1)-ivec2(gl_LocalInvocationID.xy);\n"
635                "   uint sharedCoord        = localId.y * DIM + localId.x;\n"
636                "   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
637                "   uint bufferCoord        = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + "
638                "sharedCoord;\n"
639                "   uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + "
640                "partnerSharedCoord;\n"
641                "   ivec2 imageCoord        = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
642                "   ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
643         break;
644     case SCOPE_QUEUEFAMILY:
645     case SCOPE_DEVICE:
646         switch (m_data.stage)
647         {
648         default:
649             DE_ASSERT(0); // fall through
650         case STAGE_COMPUTE:
651             css << "   ivec2 globalId          = ivec2(gl_GlobalInvocationID.xy);\n"
652                    "   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - "
653                    "ivec2(gl_GlobalInvocationID.xy);\n"
654                    "   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
655                    "   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
656                    "   ivec2 imageCoord        = globalId;\n"
657                    "   ivec2 partnerImageCoord = partnerGlobalId;\n";
658             break;
659         case STAGE_VERTEX:
660             css << "   ivec2 globalId          = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / "
661                    "(DIM*NUM_WORKGROUP_EACH_DIM));\n"
662                    "   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - globalId;\n"
663                    "   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
664                    "   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
665                    "   ivec2 imageCoord        = globalId;\n"
666                    "   ivec2 partnerImageCoord = partnerGlobalId;\n"
667                    "   gl_PointSize            = 1.0f;\n"
668                    "   gl_Position             = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
669             break;
670         case STAGE_FRAGMENT:
671             css << "   ivec2 localId       = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
672                    "   ivec2 groupId       = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
673                    "   ivec2 partnerLocalId = ivec2(DIM-1)-localId;\n"
674                    "   ivec2 partnerGroupId = groupId;\n"
675                    "   uint sharedCoord    = localId.y * DIM + localId.x;\n"
676                    "   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
677                    "   uint bufferCoord    = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
678                    "   uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + "
679                    "partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
680                    "   ivec2 imageCoord    = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
681                    "   ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
682             break;
683         }
684         break;
685     }
686 
687     // Initialize shared memory, followed by a barrier
688     if (m_data.payloadSC == SC_WORKGROUP)
689     {
690         css << "   payload.x[sharedCoord] = 0;\n";
691     }
692     if (m_data.guardSC == SC_WORKGROUP)
693     {
694         css << "   guard.x[sharedCoord] = 0;\n";
695     }
696     if (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP)
697     {
698         switch (invocationMapping)
699         {
700         default:
701             DE_ASSERT(0); // fall through
702         case SCOPE_SUBGROUP:
703             css << "   subgroupBarrier();\n";
704             break;
705         case SCOPE_WORKGROUP:
706             css << "   barrier();\n";
707             break;
708         }
709     }
710 
711     if (m_data.testType == TT_MP)
712     {
713         if (intType)
714         {
715             // Store payload
716             switch (m_data.payloadSC)
717             {
718             default:
719                 DE_ASSERT(0);   // fall through
720             case SC_PHYSBUFFER: // fall through
721             case SC_BUFFER:
722                 css << "   payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n";
723                 break;
724             case SC_IMAGE:
725                 css << "   imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, "
726                        "partnerImageCoord).x>>31), 0, 0, 0));\n";
727                 break;
728             case SC_WORKGROUP:
729                 css << "   payload.x[sharedCoord] = bufferCoord + (payload.x[partnerSharedCoord]>>31);\n";
730                 break;
731             }
732         }
733         else
734         {
735             // Store payload
736             switch (m_data.payloadSC)
737             {
738             default:
739                 DE_ASSERT(0);   // fall through
740             case SC_PHYSBUFFER: // fall through
741             case SC_BUFFER:
742                 css << "   payload.x[bufferCoord] = " << typeStr
743                     << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n";
744                 break;
745             case SC_IMAGE:
746                 css << "   imageStore(payload, imageCoord, vec4(" << typeStr
747                     << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x))>>31)), 0, 0, "
748                        "0)); \n";
749                 break;
750             case SC_WORKGROUP:
751                 css << "   payload.x[sharedCoord] = " << typeStr
752                     << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerSharedCoord])))>>31);\n";
753                 break;
754             }
755         }
756     }
757     else
758     {
759         DE_ASSERT(m_data.testType == TT_WAR);
760         // Load payload
761         switch (m_data.payloadSC)
762         {
763         default:
764             DE_ASSERT(0);   // fall through
765         case SC_PHYSBUFFER: // fall through
766         case SC_BUFFER:
767             css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n";
768             break;
769         case SC_IMAGE:
770             css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n";
771             break;
772         case SC_WORKGROUP:
773             css << "   " << typeStr << " r = payload.x[partnerSharedCoord];\n";
774             break;
775         }
776     }
777     if (m_data.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
778     {
779         // Acquire and release separate from control barrier
780         css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", "
781             << semanticsRelease.str()
782             << ");\n"
783                "   controlBarrier("
784             << scopeStr
785             << ", gl_ScopeInvocation, 0, 0);\n"
786                "   memoryBarrier("
787             << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
788     }
789     else if (m_data.syncType == ST_CONTROL_BARRIER)
790     {
791         // Control barrier performs both acquire and release
792         css << "   controlBarrier(" << scopeStr << ", " << scopeStr << ", " << storageSemanticsRelease.str() << " | "
793             << storageSemanticsAcquire.str() << ", " << semanticsAcquireRelease.str() << ");\n";
794     }
795     else
796     {
797         // Don't type cast for 64 bit image atomics
798         const char *typeCastStr =
799             (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr;
800         // Release barrier
801         std::stringstream atomicReleaseSemantics;
802         if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
803         {
804             css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", "
805                 << semanticsRelease.str() << ");\n";
806             atomicReleaseSemantics << ", 0, 0";
807         }
808         else
809         {
810             atomicReleaseSemantics << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str();
811         }
812         // Atomic store guard
813         if (m_data.atomicRMW)
814         {
815             switch (m_data.guardSC)
816             {
817             default:
818                 DE_ASSERT(0);   // fall through
819             case SC_PHYSBUFFER: // fall through
820             case SC_BUFFER:
821                 css << "   atomicExchange(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr
822                     << atomicReleaseSemantics.str() << ");\n";
823                 break;
824             case SC_IMAGE:
825                 css << "   imageAtomicExchange(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr
826                     << atomicReleaseSemantics.str() << ");\n";
827                 break;
828             case SC_WORKGROUP:
829                 css << "   atomicExchange(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr
830                     << atomicReleaseSemantics.str() << ");\n";
831                 break;
832             }
833         }
834         else
835         {
836             switch (m_data.guardSC)
837             {
838             default:
839                 DE_ASSERT(0);   // fall through
840             case SC_PHYSBUFFER: // fall through
841             case SC_BUFFER:
842                 css << "   atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr
843                     << atomicReleaseSemantics.str() << ");\n";
844                 break;
845             case SC_IMAGE:
846                 css << "   imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr
847                     << atomicReleaseSemantics.str() << ");\n";
848                 break;
849             case SC_WORKGROUP:
850                 css << "   atomicStore(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr
851                     << atomicReleaseSemantics.str() << ");\n";
852                 break;
853             }
854         }
855 
856         std::stringstream atomicAcquireSemantics;
857         if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
858         {
859             atomicAcquireSemantics << ", 0, 0";
860         }
861         else
862         {
863             atomicAcquireSemantics << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str();
864         }
865         // Atomic load guard
866         if (m_data.atomicRMW)
867         {
868             switch (m_data.guardSC)
869             {
870             default:
871                 DE_ASSERT(0);   // fall through
872             case SC_PHYSBUFFER: // fall through
873             case SC_BUFFER:
874                 css << "   skip = atomicExchange(guard.x[partnerBufferCoord], " << typeStr << "(2u), " << scopeStr
875                     << atomicAcquireSemantics.str() << ") == 0;\n";
876                 break;
877             case SC_IMAGE:
878                 css << "   skip = imageAtomicExchange(guard, partnerImageCoord, " << typeCastStr << "(2u), " << scopeStr
879                     << atomicAcquireSemantics.str() << ") == 0;\n";
880                 break;
881             case SC_WORKGROUP:
882                 css << "   skip = atomicExchange(guard.x[partnerSharedCoord], " << typeStr << "(2u), " << scopeStr
883                     << atomicAcquireSemantics.str() << ") == 0;\n";
884                 break;
885             }
886         }
887         else
888         {
889             switch (m_data.guardSC)
890             {
891             default:
892                 DE_ASSERT(0);   // fall through
893             case SC_PHYSBUFFER: // fall through
894             case SC_BUFFER:
895                 css << "   skip = atomicLoad(guard.x[partnerBufferCoord], " << scopeStr << atomicAcquireSemantics.str()
896                     << ") == 0;\n";
897                 break;
898             case SC_IMAGE:
899                 css << "   skip = imageAtomicLoad(guard, partnerImageCoord, " << scopeStr
900                     << atomicAcquireSemantics.str() << ") == 0;\n";
901                 break;
902             case SC_WORKGROUP:
903                 css << "   skip = atomicLoad(guard.x[partnerSharedCoord], " << scopeStr << atomicAcquireSemantics.str()
904                     << ") == 0;\n";
905                 break;
906             }
907         }
908         // Acquire barrier
909         if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
910         {
911             css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", "
912                 << semanticsAcquire.str() << ");\n";
913         }
914     }
915     if (m_data.testType == TT_MP)
916     {
917         // Load payload
918         switch (m_data.payloadSC)
919         {
920         default:
921             DE_ASSERT(0);   // fall through
922         case SC_PHYSBUFFER: // fall through
923         case SC_BUFFER:
924             css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n";
925             break;
926         case SC_IMAGE:
927             css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n";
928             break;
929         case SC_WORKGROUP:
930             css << "   " << typeStr << " r = payload.x[partnerSharedCoord];\n";
931             break;
932         }
933         css << "   if (!skip && r != " << typeStr
934             << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n"
935                "}\n";
936     }
937     else
938     {
939         DE_ASSERT(m_data.testType == TT_WAR);
940         // Store payload, only if the partner invocation has already done its read
941         css << "   if (!skip) {\n   ";
942         switch (m_data.payloadSC)
943         {
944         default:
945             DE_ASSERT(0);   // fall through
946         case SC_PHYSBUFFER: // fall through
947         case SC_BUFFER:
948             css << "   payload.x[bufferCoord] = " << typeStr << "(bufferCoord);\n";
949             break;
950         case SC_IMAGE:
951             if (intType)
952             {
953                 css << "   imageStore(payload, imageCoord, uvec4(bufferCoord, 0, 0, 0));\n";
954             }
955             else
956             {
957                 css << "   imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord), 0, 0, 0));\n";
958             }
959             break;
960         case SC_WORKGROUP:
961             css << "   payload.x[sharedCoord] = " << typeStr << "(bufferCoord);\n";
962             break;
963         }
964         css << "   }\n"
965                "   if (r != 0) { fail.x[bufferCoord] = 1; }\n"
966                "}\n";
967     }
968 
969     // Draw a fullscreen triangle strip based on gl_VertexIndex
970     std::stringstream vss;
971     vss << "#version 450 core\n"
972            "vec2 coords[4] = {ivec2(-1,-1), ivec2(-1, 1), ivec2(1, -1), ivec2(1, 1)};\n"
973            "void main() { gl_Position = vec4(coords[gl_VertexIndex], 0, 1); }\n";
974 
975     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
976 
977     switch (m_data.stage)
978     {
979     default:
980         DE_ASSERT(0); // fall through
981     case STAGE_COMPUTE:
982         programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
983         break;
984     case STAGE_VERTEX:
985         programCollection.glslSources.add("test") << glu::VertexSource(css.str()) << buildOptions;
986         break;
987     case STAGE_FRAGMENT:
988         programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
989         programCollection.glslSources.add("test") << glu::FragmentSource(css.str()) << buildOptions;
990         break;
991     }
992 }
993 
initProgramsTransitive(SourceCollections & programCollection) const994 void MemoryModelTestCase::initProgramsTransitive(SourceCollections &programCollection) const
995 {
996     Scope invocationMapping = m_data.scope;
997 
998     const char *typeStr = (m_data.dataType == DATA_TYPE_UINT64)  ? "uint64_t" :
999                           (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" :
1000                           (m_data.dataType == DATA_TYPE_FLOAT64) ? "double" :
1001                                                                    "uint";
1002     const bool intType  = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64);
1003 
1004     // Construct storageSemantics strings. Both release and acquire
1005     // always have the payload storage class. They only include the
1006     // guard storage class if they're using FENCE for that side of the
1007     // sync.
1008     std::stringstream storageSemanticsPayload;
1009     switch (m_data.payloadSC)
1010     {
1011     default:
1012         DE_ASSERT(0);   // fall through
1013     case SC_PHYSBUFFER: // fall through
1014     case SC_BUFFER:
1015         storageSemanticsPayload << "gl_StorageSemanticsBuffer";
1016         break;
1017     case SC_IMAGE:
1018         storageSemanticsPayload << "gl_StorageSemanticsImage";
1019         break;
1020     }
1021     std::stringstream storageSemanticsGuard;
1022     switch (m_data.guardSC)
1023     {
1024     default:
1025         DE_ASSERT(0);   // fall through
1026     case SC_PHYSBUFFER: // fall through
1027     case SC_BUFFER:
1028         storageSemanticsGuard << "gl_StorageSemanticsBuffer";
1029         break;
1030     case SC_IMAGE:
1031         storageSemanticsGuard << "gl_StorageSemanticsImage";
1032         break;
1033     }
1034     std::stringstream storageSemanticsAll;
1035     storageSemanticsAll << storageSemanticsPayload.str() << " | " << storageSemanticsGuard.str();
1036 
1037     std::stringstream css;
1038     css << "#version 450 core\n";
1039     css << "#pragma use_vulkan_memory_model\n";
1040     if (!intType)
1041     {
1042         css << "#extension GL_EXT_shader_atomic_float : enable\n"
1043                "#extension GL_KHR_memory_scope_semantics : enable\n";
1044     }
1045     css << "#extension GL_KHR_shader_subgroup_basic : enable\n"
1046            "#extension GL_KHR_shader_subgroup_shuffle : enable\n"
1047            "#extension GL_KHR_shader_subgroup_ballot : enable\n"
1048            "#extension GL_KHR_memory_scope_semantics : enable\n"
1049            "#extension GL_ARB_gpu_shader_int64 : enable\n"
1050            "#extension GL_EXT_buffer_reference : enable\n"
1051            "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
1052            "layout(constant_id = 0) const int DIM = 1;\n"
1053            "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
1054            "shared bool sharedSkip;\n";
1055 
1056     css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
1057 
1058     const char *memqual  = "";
1059     const char *semAvail = "";
1060     const char *semVis   = "";
1061     if (m_data.coherent)
1062     {
1063         memqual = "workgroupcoherent";
1064     }
1065     else
1066     {
1067         memqual  = "nonprivate";
1068         semAvail = " | gl_SemanticsMakeAvailable";
1069         semVis   = " | gl_SemanticsMakeVisible";
1070     }
1071 
1072     stringstream pushConstMembers;
1073 
1074     // Declare payload, guard, and fail resources
1075     switch (m_data.payloadSC)
1076     {
1077     default:
1078         DE_ASSERT(0); // fall through
1079     case SC_PHYSBUFFER:
1080         css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n";
1081         pushConstMembers << "   layout(offset = 0) PayloadRef payloadref;\n";
1082         break;
1083     case SC_BUFFER:
1084         css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n";
1085         break;
1086     case SC_IMAGE:
1087         if (intType)
1088             css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n";
1089         else
1090             css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n";
1091         break;
1092     }
1093     // The guard variable is only accessed with atomics and need not be declared coherent.
1094     switch (m_data.guardSC)
1095     {
1096     default:
1097         DE_ASSERT(0); // fall through
1098     case SC_PHYSBUFFER:
1099         css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n";
1100         pushConstMembers << "layout(offset = 8) GuardRef guard;\n";
1101         break;
1102     case SC_BUFFER:
1103         css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n";
1104         break;
1105     case SC_IMAGE:
1106         if (intType)
1107             css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n";
1108         else
1109             css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n";
1110         break;
1111     }
1112 
1113     css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
1114 
1115     if (pushConstMembers.str().size() != 0)
1116     {
1117         css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n";
1118     }
1119 
1120     css << "void main()\n"
1121            "{\n"
1122            "   bool pass = true;\n"
1123            "   bool skip = false;\n"
1124            "   sharedSkip = false;\n";
1125 
1126     if (m_data.payloadSC == SC_PHYSBUFFER)
1127         css << "   " << memqual << " PayloadRef payload = payloadref;\n";
1128 
1129     // Compute coordinates based on the storage class and scope.
1130     switch (invocationMapping)
1131     {
1132     default:
1133         DE_ASSERT(0); // fall through
1134     case SCOPE_DEVICE:
1135         css << "   ivec2 globalId          = ivec2(gl_GlobalInvocationID.xy);\n"
1136                "   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
1137                "   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
1138                "   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
1139                "   ivec2 imageCoord        = globalId;\n"
1140                "   ivec2 partnerImageCoord = partnerGlobalId;\n"
1141                "   ivec2 globalId00          = ivec2(DIM) * ivec2(gl_WorkGroupID.xy);\n"
1142                "   ivec2 partnerGlobalId00   = ivec2(DIM) * (ivec2(NUM_WORKGROUP_EACH_DIM-1) - "
1143                "ivec2(gl_WorkGroupID.xy));\n"
1144                "   uint bufferCoord00        = globalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId00.x;\n"
1145                "   uint partnerBufferCoord00 = partnerGlobalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + "
1146                "partnerGlobalId00.x;\n"
1147                "   ivec2 imageCoord00        = globalId00;\n"
1148                "   ivec2 partnerImageCoord00 = partnerGlobalId00;\n";
1149         break;
1150     }
1151 
1152     // Store payload
1153     if (intType)
1154     {
1155         switch (m_data.payloadSC)
1156         {
1157         default:
1158             DE_ASSERT(0);   // fall through
1159         case SC_PHYSBUFFER: // fall through
1160         case SC_BUFFER:
1161             css << "   payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n";
1162             break;
1163         case SC_IMAGE:
1164             css << "   imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, "
1165                    "partnerImageCoord).x>>31), 0, 0, 0));\n";
1166             break;
1167         }
1168     }
1169     else
1170     {
1171         switch (m_data.payloadSC)
1172         {
1173         default:
1174             DE_ASSERT(0);   // fall through
1175         case SC_PHYSBUFFER: // fall through
1176         case SC_BUFFER:
1177             css << "   payload.x[bufferCoord] = " << typeStr
1178                 << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n";
1179             break;
1180         case SC_IMAGE:
1181             css << "   imageStore(payload, imageCoord, vec4(" << typeStr
1182                 << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x)>>31))), 0, 0, 0)); "
1183                    "\n";
1184             break;
1185         }
1186     }
1187 
1188     // Sync to other threads in the workgroup
1189     css << "   controlBarrier(gl_ScopeWorkgroup, "
1190            "gl_ScopeWorkgroup, "
1191         << storageSemanticsPayload.str()
1192         << " | gl_StorageSemanticsShared, "
1193            "gl_SemanticsAcquireRelease"
1194         << semAvail << ");\n";
1195 
1196     // Device-scope release/availability in invocation(0,0)
1197     css << "   if (all(equal(gl_LocalInvocationID.xy, ivec2(0,0)))) {\n";
1198     const char *typeCastStr =
1199         (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr;
1200     if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_ATOMIC_FENCE)
1201     {
1202         switch (m_data.guardSC)
1203         {
1204         default:
1205             DE_ASSERT(0);   // fall through
1206         case SC_PHYSBUFFER: // fall through
1207         case SC_BUFFER:
1208             css << "       atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, "
1209                 << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n";
1210             break;
1211         case SC_IMAGE:
1212             css << "       imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, "
1213                 << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n";
1214             break;
1215         }
1216     }
1217     else
1218     {
1219         css << "       memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str()
1220             << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n";
1221         switch (m_data.guardSC)
1222         {
1223         default:
1224             DE_ASSERT(0);   // fall through
1225         case SC_PHYSBUFFER: // fall through
1226         case SC_BUFFER:
1227             css << "       atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, 0, 0);\n";
1228             break;
1229         case SC_IMAGE:
1230             css << "       imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, 0, 0);\n";
1231             break;
1232         }
1233     }
1234 
1235     // Device-scope acquire/visibility either in invocation(0,0) or in every invocation
1236     if (!m_data.transitiveVis)
1237     {
1238         css << "   }\n";
1239     }
1240     if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_FENCE_ATOMIC)
1241     {
1242         switch (m_data.guardSC)
1243         {
1244         default:
1245             DE_ASSERT(0);   // fall through
1246         case SC_PHYSBUFFER: // fall through
1247         case SC_BUFFER:
1248             css << "       skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, "
1249                 << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n";
1250             break;
1251         case SC_IMAGE:
1252             css << "       skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, "
1253                 << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n";
1254             break;
1255         }
1256     }
1257     else
1258     {
1259         switch (m_data.guardSC)
1260         {
1261         default:
1262             DE_ASSERT(0);   // fall through
1263         case SC_PHYSBUFFER: // fall through
1264         case SC_BUFFER:
1265             css << "       skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, 0, 0) == 0;\n";
1266             break;
1267         case SC_IMAGE:
1268             css << "       skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, 0, 0) == 0;\n";
1269             break;
1270         }
1271         css << "       memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str()
1272             << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible);\n";
1273     }
1274 
1275     // If invocation(0,0) did the acquire then store "skip" to shared memory and
1276     // synchronize with the workgroup
1277     if (m_data.transitiveVis)
1278     {
1279         css << "       sharedSkip = skip;\n";
1280         css << "   }\n";
1281 
1282         css << "   controlBarrier(gl_ScopeWorkgroup, "
1283                "gl_ScopeWorkgroup, "
1284             << storageSemanticsPayload.str()
1285             << " | gl_StorageSemanticsShared, "
1286                "gl_SemanticsAcquireRelease"
1287             << semVis << ");\n";
1288         css << "   skip = sharedSkip;\n";
1289     }
1290 
1291     // Load payload
1292     switch (m_data.payloadSC)
1293     {
1294     default:
1295         DE_ASSERT(0);   // fall through
1296     case SC_PHYSBUFFER: // fall through
1297     case SC_BUFFER:
1298         css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n";
1299         break;
1300     case SC_IMAGE:
1301         css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n";
1302         break;
1303     }
1304     css << "   if (!skip && r != " << typeStr
1305         << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n"
1306            "}\n";
1307 
1308     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
1309 
1310     programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
1311 }
1312 
createInstance(Context & context) const1313 TestInstance *MemoryModelTestCase::createInstance(Context &context) const
1314 {
1315     return new MemoryModelTestInstance(context, m_data);
1316 }
1317 
iterate(void)1318 tcu::TestStatus MemoryModelTestInstance::iterate(void)
1319 {
1320     const DeviceInterface &vk = m_context.getDeviceInterface();
1321     const VkDevice device     = m_context.getDevice();
1322     Allocator &allocator      = m_context.getDefaultAllocator();
1323 
1324     VkPhysicalDeviceProperties2 properties;
1325     properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1326     properties.pNext = NULL;
1327 
1328     m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
1329 
1330     uint32_t DIM                    = 31;
1331     uint32_t NUM_WORKGROUP_EACH_DIM = 8;
1332     // If necessary, shrink workgroup size to fit HW limits
1333     if (DIM * DIM > properties.properties.limits.maxComputeWorkGroupInvocations)
1334     {
1335         DIM = (uint32_t)deFloatSqrt((float)properties.properties.limits.maxComputeWorkGroupInvocations);
1336     }
1337     uint32_t NUM_INVOCATIONS = (DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM);
1338 
1339     VkDeviceSize bufferSizes[3];
1340     de::MovePtr<BufferWithMemory> buffers[3];
1341     vk::VkDescriptorBufferInfo bufferDescriptors[3];
1342     de::MovePtr<BufferWithMemory> copyBuffer;
1343 
1344     for (uint32_t i = 0; i < 3; ++i)
1345     {
1346         size_t elementSize = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ?
1347                                  sizeof(uint64_t) :
1348                                  sizeof(uint32_t);
1349         // buffer2 is the "fail" buffer, and is always uint
1350         if (i == 2)
1351             elementSize = sizeof(uint32_t);
1352         bufferSizes[i] = NUM_INVOCATIONS * elementSize;
1353 
1354         vk::VkFlags usageFlags = vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1355                                  VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1356 
1357         bool memoryDeviceAddress = false;
1358 
1359         bool local;
1360         switch (i)
1361         {
1362         default:
1363             DE_ASSERT(0); // fall through
1364         case 0:
1365             if (m_data.payloadSC != SC_BUFFER && m_data.payloadSC != SC_PHYSBUFFER)
1366                 continue;
1367             local = m_data.payloadMemLocal;
1368             if (m_data.payloadSC == SC_PHYSBUFFER)
1369             {
1370                 usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1371                 if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
1372                     memoryDeviceAddress = true;
1373             }
1374             break;
1375         case 1:
1376             if (m_data.guardSC != SC_BUFFER && m_data.guardSC != SC_PHYSBUFFER)
1377                 continue;
1378             local = m_data.guardMemLocal;
1379             if (m_data.guardSC == SC_PHYSBUFFER)
1380             {
1381                 usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1382                 if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
1383                     memoryDeviceAddress = true;
1384             }
1385             break;
1386         case 2:
1387             local = true;
1388             break;
1389         }
1390 
1391         try
1392         {
1393             buffers[i] = de::MovePtr<BufferWithMemory>(
1394                 new BufferWithMemory(vk, device, allocator, makeBufferCreateInfo(bufferSizes[i], usageFlags),
1395                                      (memoryDeviceAddress ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any) |
1396                                          (local ? MemoryRequirement::Local : MemoryRequirement::NonLocal)));
1397         }
1398         catch (const tcu::NotSupportedError &)
1399         {
1400             if (!local)
1401             {
1402                 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
1403             }
1404             throw;
1405         }
1406         bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, bufferSizes[i]);
1407     }
1408 
1409     // Try to use cached host memory for the buffer the CPU will read from, else fallback to host visible.
1410     try
1411     {
1412         copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1413             vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT),
1414             MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1415     }
1416     catch (const tcu::NotSupportedError &)
1417     {
1418         copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1419             vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT),
1420             MemoryRequirement::HostVisible));
1421     }
1422 
1423     VkFormat imageFormat;
1424     switch (m_data.dataType)
1425     {
1426     case DATA_TYPE_UINT:
1427     case DATA_TYPE_UINT64:
1428         imageFormat = VK_FORMAT_R32_UINT;
1429         break;
1430     case DATA_TYPE_FLOAT32:
1431     case DATA_TYPE_FLOAT64:
1432         imageFormat = VK_FORMAT_R32_SFLOAT;
1433         break;
1434     default:
1435         TCU_FAIL("Invalid data type.");
1436     }
1437 
1438     const VkImageCreateInfo imageCreateInfo = {
1439         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1440         DE_NULL,                             // const void* pNext;
1441         (VkImageCreateFlags)0u,              // VkImageCreateFlags flags;
1442         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
1443         imageFormat,                         // VkFormat format;
1444         {
1445             DIM * NUM_WORKGROUP_EACH_DIM, // uint32_t width;
1446             DIM * NUM_WORKGROUP_EACH_DIM, // uint32_t height;
1447             1u                            // uint32_t depth;
1448         },                                // VkExtent3D    extent;
1449         1u,                               // uint32_t  mipLevels;
1450         1u,                               // uint32_t  arrayLayers;
1451         VK_SAMPLE_COUNT_1_BIT,            // VkSampleCountFlagBits samples;
1452         VK_IMAGE_TILING_OPTIMAL,          // VkImageTiling tiling;
1453         VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
1454             VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
1455         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
1456         0u,                                  // uint32_t  queueFamilyIndexCount;
1457         DE_NULL,                             // const uint32_t*   pQueueFamilyIndices;
1458         VK_IMAGE_LAYOUT_UNDEFINED            // VkImageLayout initialLayout;
1459     };
1460     VkImageViewCreateInfo imageViewCreateInfo = {
1461         VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
1462         DE_NULL,                                  // const void* pNext;
1463         (VkImageViewCreateFlags)0u,               // VkImageViewCreateFlags  flags;
1464         DE_NULL,                                  // VkImage image;
1465         VK_IMAGE_VIEW_TYPE_2D,                    // VkImageViewType viewType;
1466         imageFormat,                              // VkFormat format;
1467         {
1468             VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
1469             VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
1470             VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
1471             VK_COMPONENT_SWIZZLE_A  // VkComponentSwizzle a;
1472         },                          // VkComponentMapping  components;
1473         {
1474             VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
1475             0u,                        // uint32_t   baseMipLevel;
1476             1u,                        // uint32_t   levelCount;
1477             0u,                        // uint32_t   baseArrayLayer;
1478             1u                         // uint32_t   layerCount;
1479         }                              // VkImageSubresourceRange subresourceRange;
1480     };
1481 
1482     de::MovePtr<ImageWithMemory> images[2];
1483     Move<VkImageView> imageViews[2];
1484     vk::VkDescriptorImageInfo imageDescriptors[2];
1485 
1486     for (uint32_t i = 0; i < 2; ++i)
1487     {
1488 
1489         bool local;
1490         switch (i)
1491         {
1492         default:
1493             DE_ASSERT(0); // fall through
1494         case 0:
1495             if (m_data.payloadSC != SC_IMAGE)
1496                 continue;
1497             local = m_data.payloadMemLocal;
1498             break;
1499         case 1:
1500             if (m_data.guardSC != SC_IMAGE)
1501                 continue;
1502             local = m_data.guardMemLocal;
1503             break;
1504         }
1505 
1506         try
1507         {
1508             images[i] = de::MovePtr<ImageWithMemory>(
1509                 new ImageWithMemory(vk, device, allocator, imageCreateInfo,
1510                                     local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
1511         }
1512         catch (const tcu::NotSupportedError &)
1513         {
1514             if (!local)
1515             {
1516                 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
1517             }
1518             throw;
1519         }
1520         imageViewCreateInfo.image = **images[i];
1521         imageViews[i]             = createImageView(vk, device, &imageViewCreateInfo, NULL);
1522 
1523         imageDescriptors[i] = makeDescriptorImageInfo(DE_NULL, *imageViews[i], VK_IMAGE_LAYOUT_GENERAL);
1524     }
1525 
1526     vk::DescriptorSetLayoutBuilder layoutBuilder;
1527 
1528     switch (m_data.payloadSC)
1529     {
1530     default:
1531     case SC_BUFFER:
1532         layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
1533         break;
1534     case SC_IMAGE:
1535         layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages);
1536         break;
1537     }
1538     switch (m_data.guardSC)
1539     {
1540     default:
1541     case SC_BUFFER:
1542         layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
1543         break;
1544     case SC_IMAGE:
1545         layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages);
1546         break;
1547     }
1548     layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
1549 
1550     vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1551 
1552     vk::Unique<vk::VkDescriptorPool> descriptorPool(
1553         vk::DescriptorPoolBuilder()
1554             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3u)
1555             .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u)
1556             .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1557     vk::Unique<vk::VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1558 
1559     vk::DescriptorSetUpdateBuilder setUpdateBuilder;
1560     switch (m_data.payloadSC)
1561     {
1562     default:
1563         DE_ASSERT(0); // fall through
1564     case SC_PHYSBUFFER:
1565     case SC_WORKGROUP:
1566         break;
1567     case SC_BUFFER:
1568         setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
1569                                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1570         break;
1571     case SC_IMAGE:
1572         setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
1573                                      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[0]);
1574         break;
1575     }
1576     switch (m_data.guardSC)
1577     {
1578     default:
1579         DE_ASSERT(0); // fall through
1580     case SC_PHYSBUFFER:
1581     case SC_WORKGROUP:
1582         break;
1583     case SC_BUFFER:
1584         setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
1585                                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[1]);
1586         break;
1587     case SC_IMAGE:
1588         setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
1589                                      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[1]);
1590         break;
1591     }
1592     setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(2),
1593                                  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[2]);
1594 
1595     setUpdateBuilder.update(vk, device);
1596 
1597     const VkPushConstantRange pushConstRange = {
1598         allShaderStages, // VkShaderStageFlags    stageFlags
1599         0,               // uint32_t                offset
1600         16               // uint32_t                size
1601     };
1602 
1603     const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
1604         VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
1605         DE_NULL,                                       // pNext
1606         (VkPipelineLayoutCreateFlags)0,
1607         1,                          // setLayoutCount
1608         &descriptorSetLayout.get(), // pSetLayouts
1609         1u,                         // pushConstantRangeCount
1610         &pushConstRange,            // pPushConstantRanges
1611     };
1612 
1613     Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1614 
1615     Move<VkPipeline> pipeline;
1616     Move<VkRenderPass> renderPass;
1617     Move<VkFramebuffer> framebuffer;
1618 
1619     VkPipelineBindPoint bindPoint =
1620         m_data.stage == STAGE_COMPUTE ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
1621 
1622     const uint32_t specData[2] = {DIM, NUM_WORKGROUP_EACH_DIM};
1623 
1624     const vk::VkSpecializationMapEntry entries[3] = {
1625         {0, sizeof(uint32_t) * 0, sizeof(uint32_t)},
1626         {1, sizeof(uint32_t) * 1, sizeof(uint32_t)},
1627     };
1628 
1629     const vk::VkSpecializationInfo specInfo = {
1630         2,                // mapEntryCount
1631         entries,          // pMapEntries
1632         sizeof(specData), // dataSize
1633         specData          // pData
1634     };
1635 
1636     if (m_data.stage == STAGE_COMPUTE)
1637     {
1638         const Unique<VkShaderModule> shader(
1639             createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
1640 
1641         const VkPipelineShaderStageCreateInfo shaderCreateInfo = {
1642             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1643             DE_NULL,
1644             (VkPipelineShaderStageCreateFlags)0,
1645             VK_SHADER_STAGE_COMPUTE_BIT, // stage
1646             *shader,                     // shader
1647             "main",
1648             &specInfo, // pSpecializationInfo
1649         };
1650 
1651         const VkComputePipelineCreateInfo pipelineCreateInfo = {
1652             VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1653             DE_NULL,
1654             0u,                // flags
1655             shaderCreateInfo,  // cs
1656             *pipelineLayout,   // layout
1657             (vk::VkPipeline)0, // basePipelineHandle
1658             0u,                // basePipelineIndex
1659         };
1660         pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1661     }
1662     else
1663     {
1664 
1665         const vk::VkSubpassDescription subpassDesc = {
1666             (vk::VkSubpassDescriptionFlags)0,
1667             vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
1668             0u,                                  // inputCount
1669             DE_NULL,                             // pInputAttachments
1670             0u,                                  // colorCount
1671             DE_NULL,                             // pColorAttachments
1672             DE_NULL,                             // pResolveAttachments
1673             DE_NULL,                             // depthStencilAttachment
1674             0u,                                  // preserveCount
1675             DE_NULL,                             // pPreserveAttachments
1676 
1677         };
1678         const vk::VkRenderPassCreateInfo renderPassParams = {
1679             vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType
1680             DE_NULL,                                       // pNext
1681             (vk::VkRenderPassCreateFlags)0,
1682             0u,           // attachmentCount
1683             DE_NULL,      // pAttachments
1684             1u,           // subpassCount
1685             &subpassDesc, // pSubpasses
1686             0u,           // dependencyCount
1687             DE_NULL,      // pDependencies
1688         };
1689 
1690         renderPass = createRenderPass(vk, device, &renderPassParams);
1691 
1692         const vk::VkFramebufferCreateInfo framebufferParams = {
1693             vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType
1694             DE_NULL,                                       // pNext
1695             (vk::VkFramebufferCreateFlags)0,
1696             *renderPass,                  // renderPass
1697             0u,                           // attachmentCount
1698             DE_NULL,                      // pAttachments
1699             DIM * NUM_WORKGROUP_EACH_DIM, // width
1700             DIM * NUM_WORKGROUP_EACH_DIM, // height
1701             1u,                           // layers
1702         };
1703 
1704         framebuffer = createFramebuffer(vk, device, &framebufferParams);
1705 
1706         const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = {
1707             VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1708             DE_NULL,                                                   // const void* pNext;
1709             (VkPipelineVertexInputStateCreateFlags)0,                  // VkPipelineVertexInputStateCreateFlags flags;
1710             0u,                                                        // uint32_t vertexBindingDescriptionCount;
1711             DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1712             0u,      // uint32_t vertexAttributeDescriptionCount;
1713             DE_NULL  // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
1714         };
1715 
1716         const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo = {
1717             VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
1718             DE_NULL,                                                     // const void* pNext;
1719             (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
1720             (m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST :
1721                                              VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology topology;
1722             VK_FALSE                                                               // VkBool32 primitiveRestartEnable;
1723         };
1724 
1725         const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo = {
1726             VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
1727             DE_NULL,                                                    // const void* pNext;
1728             (VkPipelineRasterizationStateCreateFlags)0,          // VkPipelineRasterizationStateCreateFlags flags;
1729             VK_FALSE,                                            // VkBool32 depthClampEnable;
1730             (m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE, // VkBool32 rasterizerDiscardEnable;
1731             VK_POLYGON_MODE_FILL,                                // VkPolygonMode polygonMode;
1732             VK_CULL_MODE_NONE,                                   // VkCullModeFlags cullMode;
1733             VK_FRONT_FACE_CLOCKWISE,                             // VkFrontFace frontFace;
1734             VK_FALSE,                                            // VkBool32 depthBiasEnable;
1735             0.0f,                                                // float depthBiasConstantFactor;
1736             0.0f,                                                // float depthBiasClamp;
1737             0.0f,                                                // float depthBiasSlopeFactor;
1738             1.0f                                                 // float lineWidth;
1739         };
1740 
1741         const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo = {
1742             VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType                          sType
1743             DE_NULL,                                                  // const void*                              pNext
1744             0u,                                                       // VkPipelineMultisampleStateCreateFlags    flags
1745             VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits                    rasterizationSamples
1746             VK_FALSE,              // VkBool32                                 sampleShadingEnable
1747             1.0f,                  // float                                    minSampleShading
1748             DE_NULL,               // const VkSampleMask*                      pSampleMask
1749             VK_FALSE,              // VkBool32                                 alphaToCoverageEnable
1750             VK_FALSE               // VkBool32                                 alphaToOneEnable
1751         };
1752 
1753         VkViewport viewport = makeViewport(DIM * NUM_WORKGROUP_EACH_DIM, DIM * NUM_WORKGROUP_EACH_DIM);
1754         VkRect2D scissor    = makeRect2D(DIM * NUM_WORKGROUP_EACH_DIM, DIM * NUM_WORKGROUP_EACH_DIM);
1755 
1756         const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = {
1757             VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType                             sType
1758             DE_NULL,                                               // const void*                                 pNext
1759             (VkPipelineViewportStateCreateFlags)0,                 // VkPipelineViewportStateCreateFlags          flags
1760             1u,        // uint32_t                                    viewportCount
1761             &viewport, // const VkViewport*                           pViewports
1762             1u,        // uint32_t                                    scissorCount
1763             &scissor   // const VkRect2D*                             pScissors
1764         };
1765 
1766         Move<VkShaderModule> fs;
1767         Move<VkShaderModule> vs;
1768 
1769         uint32_t numStages;
1770         if (m_data.stage == STAGE_VERTEX)
1771         {
1772             vs        = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1773             fs        = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1774             numStages = 1u;
1775         }
1776         else
1777         {
1778             vs        = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1779             fs        = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1780             numStages = 2u;
1781         }
1782 
1783         const VkPipelineShaderStageCreateInfo shaderCreateInfo[2] = {
1784             {
1785                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, DE_NULL, (VkPipelineShaderStageCreateFlags)0,
1786                 VK_SHADER_STAGE_VERTEX_BIT, // stage
1787                 *vs,                        // shader
1788                 "main",
1789                 &specInfo, // pSpecializationInfo
1790             },
1791             {
1792                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, DE_NULL, (VkPipelineShaderStageCreateFlags)0,
1793                 VK_SHADER_STAGE_FRAGMENT_BIT, // stage
1794                 *fs,                          // shader
1795                 "main",
1796                 &specInfo, // pSpecializationInfo
1797             }};
1798 
1799         const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo = {
1800             VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
1801             DE_NULL,                                         // const void* pNext;
1802             (VkPipelineCreateFlags)0,                        // VkPipelineCreateFlags flags;
1803             numStages,                                       // uint32_t stageCount;
1804             &shaderCreateInfo[0],                            // const VkPipelineShaderStageCreateInfo* pStages;
1805             &vertexInputStateCreateInfo,   // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
1806             &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
1807             DE_NULL,                       // const VkPipelineTessellationStateCreateInfo* pTessellationState;
1808             &viewportStateCreateInfo,      // const VkPipelineViewportStateCreateInfo* pViewportState;
1809             &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
1810             &multisampleStateCreateInfo,   // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
1811             DE_NULL,                       // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
1812             DE_NULL,                       // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
1813             DE_NULL,                       // const VkPipelineDynamicStateCreateInfo* pDynamicState;
1814             pipelineLayout.get(),          // VkPipelineLayout layout;
1815             renderPass.get(),              // VkRenderPass renderPass;
1816             0u,                            // uint32_t subpass;
1817             DE_NULL,                       // VkPipeline basePipelineHandle;
1818             0                              // int basePipelineIndex;
1819         };
1820 
1821         pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1822     }
1823 
1824     const VkQueue queue             = m_context.getUniversalQueue();
1825     Move<VkCommandPool> cmdPool     = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1826                                                         m_context.getUniversalQueueFamilyIndex());
1827     Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1828 
1829     VkBufferDeviceAddressInfo addrInfo = {
1830         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
1831         DE_NULL,                                      // const void*  pNext;
1832         0,                                            // VkBuffer            buffer
1833     };
1834 
1835     VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1836     VkClearValue clearColor       = makeClearValueColorU32(0, 0, 0, 0);
1837 
1838     VkMemoryBarrier memBarrier = {
1839         VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1840         DE_NULL,                          // pNext
1841         0u,                               // srcAccessMask
1842         0u,                               // dstAccessMask
1843     };
1844 
1845     const VkBufferCopy copyParams = {
1846         (VkDeviceSize)0u, // srcOffset
1847         (VkDeviceSize)0u, // dstOffset
1848         bufferSizes[2]    // size
1849     };
1850 
1851     uint32_t NUM_SUBMITS = 4;
1852 
1853     for (uint32_t x = 0; x < NUM_SUBMITS; ++x)
1854     {
1855         beginCommandBuffer(vk, *cmdBuffer, 0u);
1856 
1857         if (x == 0)
1858             vk.cmdFillBuffer(*cmdBuffer, **buffers[2], 0, bufferSizes[2], 0);
1859 
1860         for (uint32_t i = 0; i < 2; ++i)
1861         {
1862             if (!images[i])
1863                 continue;
1864 
1865             const VkImageMemoryBarrier imageBarrier = {
1866                 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType        sType
1867                 DE_NULL,                                // const void*            pNext
1868                 0u,                                     // VkAccessFlags        srcAccessMask
1869                 VK_ACCESS_TRANSFER_WRITE_BIT,           // VkAccessFlags        dstAccessMask
1870                 VK_IMAGE_LAYOUT_UNDEFINED,              // VkImageLayout        oldLayout
1871                 VK_IMAGE_LAYOUT_GENERAL,                // VkImageLayout        newLayout
1872                 VK_QUEUE_FAMILY_IGNORED,                // uint32_t                srcQueueFamilyIndex
1873                 VK_QUEUE_FAMILY_IGNORED,                // uint32_t                dstQueueFamilyIndex
1874                 **images[i],                            // VkImage                image
1875                 {
1876                     VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags    aspectMask
1877                     0u,                        // uint32_t                baseMipLevel
1878                     1u,                        // uint32_t                mipLevels,
1879                     0u,                        // uint32_t                baseArray
1880                     1u,                        // uint32_t                arraySize
1881                 }};
1882 
1883             vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1884                                   (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 0,
1885                                   (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrier);
1886         }
1887 
1888         vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1889         vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1890 
1891         if (m_data.payloadSC == SC_PHYSBUFFER)
1892         {
1893             addrInfo.buffer      = **buffers[0];
1894             VkDeviceAddress addr = vk.getBufferDeviceAddress(device, &addrInfo);
1895             vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages, 0, sizeof(VkDeviceSize), &addr);
1896         }
1897         if (m_data.guardSC == SC_PHYSBUFFER)
1898         {
1899             addrInfo.buffer      = **buffers[1];
1900             VkDeviceAddress addr = vk.getBufferDeviceAddress(device, &addrInfo);
1901             vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages, 8, sizeof(VkDeviceSize), &addr);
1902         }
1903 
1904         for (uint32_t iters = 0; iters < 50; ++iters)
1905         {
1906             for (uint32_t i = 0; i < 2; ++i)
1907             {
1908                 if (buffers[i])
1909                     vk.cmdFillBuffer(*cmdBuffer, **buffers[i], 0, bufferSizes[i], 0);
1910                 if (images[i])
1911                     vk.cmdClearColorImage(*cmdBuffer, **images[i], VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1,
1912                                           &range);
1913             }
1914 
1915             memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1916             memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1917             vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages, 0, 1, &memBarrier, 0,
1918                                   DE_NULL, 0, DE_NULL);
1919 
1920             if (m_data.stage == STAGE_COMPUTE)
1921             {
1922                 vk.cmdDispatch(*cmdBuffer, NUM_WORKGROUP_EACH_DIM, NUM_WORKGROUP_EACH_DIM, 1);
1923             }
1924             else
1925             {
1926                 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
1927                                 makeRect2D(DIM * NUM_WORKGROUP_EACH_DIM, DIM * NUM_WORKGROUP_EACH_DIM), 0, DE_NULL,
1928                                 VK_SUBPASS_CONTENTS_INLINE);
1929                 // Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1930                 if (m_data.stage == STAGE_VERTEX)
1931                 {
1932                     vk.cmdDraw(*cmdBuffer, DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM, 1u, 0u, 0u);
1933                 }
1934                 else
1935                 {
1936                     vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1937                 }
1938                 endRenderPass(vk, *cmdBuffer);
1939             }
1940 
1941             memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1942             memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1943             vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &memBarrier, 0,
1944                                   DE_NULL, 0, DE_NULL);
1945         }
1946 
1947         if (x == NUM_SUBMITS - 1)
1948         {
1949             vk.cmdCopyBuffer(*cmdBuffer, **buffers[2], **copyBuffer, 1, &copyParams);
1950             memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1951             memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
1952             vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1,
1953                                   &memBarrier, 0, DE_NULL, 0, DE_NULL);
1954         }
1955 
1956         endCommandBuffer(vk, *cmdBuffer);
1957 
1958         submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1959 
1960         m_context.resetCommandPoolForVKSC(device, *cmdPool);
1961     }
1962 
1963     tcu::TestLog &log = m_context.getTestContext().getLog();
1964 
1965     uint32_t *ptr = (uint32_t *)copyBuffer->getAllocation().getHostPtr();
1966     invalidateAlloc(vk, device, copyBuffer->getAllocation());
1967     qpTestResult res = QP_TEST_RESULT_PASS;
1968 
1969     uint32_t numErrors = 0;
1970     for (uint32_t i = 0; i < NUM_INVOCATIONS; ++i)
1971     {
1972         if (ptr[i] != 0)
1973         {
1974             if (numErrors < 256)
1975             {
1976                 log << tcu::TestLog::Message << "Failed invocation: " << i << tcu::TestLog::EndMessage;
1977             }
1978             numErrors++;
1979             res = QP_TEST_RESULT_FAIL;
1980         }
1981     }
1982 
1983     if (numErrors)
1984     {
1985         log << tcu::TestLog::Message << "Total Errors: " << numErrors << tcu::TestLog::EndMessage;
1986     }
1987 
1988     return tcu::TestStatus(res, qpGetTestResultName(res));
1989 }
1990 
1991 #ifndef CTS_USES_VULKANSC
checkPermutedIndexTestSupport(Context & context,std::string testName)1992 void checkPermutedIndexTestSupport(Context &context, std::string testName)
1993 {
1994     DE_UNREF(testName);
1995 
1996     const auto maxComputeWorkGroupCount       = context.getDeviceProperties().limits.maxComputeWorkGroupCount;
1997     const auto maxComputeWorkGroupSize        = context.getDeviceProperties().limits.maxComputeWorkGroupSize;
1998     const auto maxComputeWorkGroupInvocations = context.getDeviceProperties().limits.maxComputeWorkGroupInvocations;
1999 
2000     if (maxComputeWorkGroupCount[0] < 256u)
2001         TCU_THROW(NotSupportedError, "Minimum of 256 required for maxComputeWorkGroupCount.x");
2002 
2003     if (maxComputeWorkGroupSize[0] < 256u)
2004         TCU_THROW(NotSupportedError, "Minimum of 256 required for maxComputeWorkGroupSize.x");
2005 
2006     if (maxComputeWorkGroupInvocations < 256u)
2007         TCU_THROW(NotSupportedError, "Minimum of 256 required for maxComputeWorkGroupInvocations");
2008 }
2009 
createPermutedIndexTests(tcu::TestContext & testCtx)2010 tcu::TestCaseGroup *createPermutedIndexTests(tcu::TestContext &testCtx)
2011 {
2012     de::MovePtr<tcu::TestCaseGroup> permutedIndex(new tcu::TestCaseGroup(testCtx, "permuted_index"));
2013     static const char dataDir[]      = "memory_model/message_passing/permuted_index";
2014     static const std::string cases[] = {"barrier", "release_acquire", "release_acquire_atomic_payload"};
2015 
2016     for (const auto &test : cases)
2017     {
2018         cts_amber::AmberTestCase *testCase =
2019             cts_amber::createAmberTestCase(testCtx, test.c_str(), dataDir, (test + ".amber").c_str());
2020         testCase->setCheckSupportCallback(checkPermutedIndexTestSupport);
2021 
2022         permutedIndex->addChild(testCase);
2023     }
2024 
2025     return permutedIndex.release();
2026 }
2027 #endif // CTS_USES_VULKANSC
2028 
2029 } // namespace
2030 
createTests(tcu::TestContext & testCtx,const std::string & name)2031 tcu::TestCaseGroup *createTests(tcu::TestContext &testCtx, const std::string &name)
2032 {
2033     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, name.c_str()));
2034 
2035     typedef struct
2036     {
2037         uint32_t value;
2038         const char *name;
2039     } TestGroupCase;
2040 
2041     TestGroupCase ttCases[] = {
2042         {TT_MP, "message_passing"},
2043         {TT_WAR, "write_after_read"},
2044     };
2045 
2046     TestGroupCase core11Cases[] = {
2047         // Supported by Vulkan1.1
2048         {1, "core11"},
2049         // Requires VK_KHR_vulkan_memory_model extension
2050         {0, "ext"},
2051     };
2052 
2053     TestGroupCase dtCases[] = {
2054         // uint32_t atomics
2055         {DATA_TYPE_UINT, "u32"},
2056         // uint64_t atomics
2057         {DATA_TYPE_UINT64, "u64"},
2058         // float32 atomics
2059         {DATA_TYPE_FLOAT32, "f32"},
2060         // float64 atomics
2061         {DATA_TYPE_FLOAT64, "f64"},
2062     };
2063 
2064     TestGroupCase cohCases[] = {
2065         // coherent payload variable
2066         {1, "coherent"},
2067         // noncoherent payload variable
2068         {0, "noncoherent"},
2069     };
2070 
2071     TestGroupCase stCases[] = {
2072         // release fence, acquire fence
2073         {ST_FENCE_FENCE, "fence_fence"},
2074         // release fence, atomic acquire
2075         {ST_FENCE_ATOMIC, "fence_atomic"},
2076         // atomic release, acquire fence
2077         {ST_ATOMIC_FENCE, "atomic_fence"},
2078         // atomic release, atomic acquire
2079         {ST_ATOMIC_ATOMIC, "atomic_atomic"},
2080         // control barrier
2081         {ST_CONTROL_BARRIER, "control_barrier"},
2082         // control barrier with release/acquire
2083         {ST_CONTROL_AND_MEMORY_BARRIER, "control_and_memory_barrier"},
2084     };
2085 
2086     TestGroupCase rmwCases[] = {
2087         {0, "atomicwrite"},
2088         {1, "atomicrmw"},
2089     };
2090 
2091     TestGroupCase scopeCases[] = {
2092         {SCOPE_DEVICE, "device"},
2093         {SCOPE_QUEUEFAMILY, "queuefamily"},
2094         {SCOPE_WORKGROUP, "workgroup"},
2095         {SCOPE_SUBGROUP, "subgroup"},
2096     };
2097 
2098     TestGroupCase plCases[] = {
2099         // payload variable in non-local memory
2100         {0, "payload_nonlocal"},
2101         // payload variable in local memory
2102         {1, "payload_local"},
2103     };
2104 
2105     TestGroupCase pscCases[] = {
2106         // payload variable in buffer memory
2107         {SC_BUFFER, "buffer"},
2108         // payload variable in image memory
2109         {SC_IMAGE, "image"},
2110         // payload variable in workgroup memory
2111         {SC_WORKGROUP, "workgroup"},
2112         // payload variable in physical storage buffer memory
2113         {SC_PHYSBUFFER, "physbuffer"},
2114     };
2115 
2116     TestGroupCase glCases[] = {
2117         // guard variable in non-local memory
2118         {0, "guard_nonlocal"},
2119         // guard variable in local memory
2120         {1, "guard_local"},
2121     };
2122 
2123     TestGroupCase gscCases[] = {
2124         // guard variable in buffer memory
2125         {SC_BUFFER, "buffer"},
2126         // guard variable in image memory
2127         {SC_IMAGE, "image"},
2128         // guard variable in workgroup memory
2129         {SC_WORKGROUP, "workgroup"},
2130         // guard variable in physical storage buffer memory
2131         {SC_PHYSBUFFER, "physbuffer"},
2132     };
2133 
2134     TestGroupCase stageCases[] = {
2135         {STAGE_COMPUTE, "comp"},
2136         {STAGE_VERTEX, "vert"},
2137         {STAGE_FRAGMENT, "frag"},
2138     };
2139 
2140     for (int ttNdx = 0; ttNdx < DE_LENGTH_OF_ARRAY(ttCases); ttNdx++)
2141     {
2142         de::MovePtr<tcu::TestCaseGroup> ttGroup(new tcu::TestCaseGroup(testCtx, ttCases[ttNdx].name));
2143 
2144 #ifndef CTS_USES_VULKANSC
2145         // Permuted index tests for message passing.
2146         if (ttCases[ttNdx].value == TT_MP)
2147             ttGroup->addChild(createPermutedIndexTests(testCtx));
2148 #endif // CTS_USES_VULKANSC
2149 
2150         for (int core11Ndx = 0; core11Ndx < DE_LENGTH_OF_ARRAY(core11Cases); core11Ndx++)
2151         {
2152             de::MovePtr<tcu::TestCaseGroup> core11Group(new tcu::TestCaseGroup(testCtx, core11Cases[core11Ndx].name));
2153             for (int dtNdx = 0; dtNdx < DE_LENGTH_OF_ARRAY(dtCases); dtNdx++)
2154             {
2155                 de::MovePtr<tcu::TestCaseGroup> dtGroup(new tcu::TestCaseGroup(testCtx, dtCases[dtNdx].name));
2156                 for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
2157                 {
2158                     de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name));
2159                     for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
2160                     {
2161                         de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name));
2162                         for (int rmwNdx = 0; rmwNdx < DE_LENGTH_OF_ARRAY(rmwCases); rmwNdx++)
2163                         {
2164                             de::MovePtr<tcu::TestCaseGroup> rmwGroup(
2165                                 new tcu::TestCaseGroup(testCtx, rmwCases[rmwNdx].name));
2166                             for (int scopeNdx = 0; scopeNdx < DE_LENGTH_OF_ARRAY(scopeCases); scopeNdx++)
2167                             {
2168                                 de::MovePtr<tcu::TestCaseGroup> scopeGroup(
2169                                     new tcu::TestCaseGroup(testCtx, scopeCases[scopeNdx].name));
2170                                 for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
2171                                 {
2172                                     de::MovePtr<tcu::TestCaseGroup> plGroup(
2173                                         new tcu::TestCaseGroup(testCtx, plCases[plNdx].name));
2174                                     for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
2175                                     {
2176                                         de::MovePtr<tcu::TestCaseGroup> pscGroup(
2177                                             new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name));
2178                                         for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
2179                                         {
2180                                             de::MovePtr<tcu::TestCaseGroup> glGroup(
2181                                                 new tcu::TestCaseGroup(testCtx, glCases[glNdx].name));
2182                                             for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
2183                                             {
2184                                                 de::MovePtr<tcu::TestCaseGroup> gscGroup(
2185                                                     new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name));
2186                                                 for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases);
2187                                                      stageNdx++)
2188                                                 {
2189                                                     CaseDef c = {
2190                                                         !!plCases[plNdx].value,               // bool payloadMemLocal;
2191                                                         !!glCases[glNdx].value,               // bool guardMemLocal;
2192                                                         !!cohCases[cohNdx].value,             // bool coherent;
2193                                                         !!core11Cases[core11Ndx].value,       // bool core11;
2194                                                         !!rmwCases[rmwNdx].value,             // bool atomicRMW;
2195                                                         (TestType)ttCases[ttNdx].value,       // TestType testType;
2196                                                         (StorageClass)pscCases[pscNdx].value, // StorageClass payloadSC;
2197                                                         (StorageClass)gscCases[gscNdx].value, // StorageClass guardSC;
2198                                                         (Scope)scopeCases[scopeNdx].value,    // Scope scope;
2199                                                         (SyncType)stCases[stNdx].value,       // SyncType syncType;
2200                                                         (Stage)stageCases[stageNdx].value,    // Stage stage;
2201                                                         (DataType)dtCases[dtNdx].value,       // DataType dataType;
2202                                                         false,                                // bool transitive;
2203                                                         false,                                // bool transitiveVis;
2204                                                     };
2205 
2206                                                     // Mustpass11 tests should only exercise things we expect to work on
2207                                                     // existing implementations. Exclude noncoherent tests which require
2208                                                     // new extensions, and assume atomic synchronization wouldn't work
2209                                                     // (i.e. atomics may be implemented as relaxed atomics). Exclude
2210                                                     // queuefamily scope which doesn't exist in Vulkan 1.1. Exclude
2211                                                     // physical storage buffer which doesn't support the legacy decorations.
2212                                                     if (c.core11 &&
2213                                                         (c.coherent == 0 || c.syncType == ST_FENCE_ATOMIC ||
2214                                                          c.syncType == ST_ATOMIC_FENCE ||
2215                                                          c.syncType == ST_ATOMIC_ATOMIC ||
2216                                                          c.dataType == DATA_TYPE_UINT64 ||
2217                                                          c.dataType == DATA_TYPE_FLOAT64 ||
2218                                                          c.scope == SCOPE_QUEUEFAMILY || c.payloadSC == SC_PHYSBUFFER ||
2219                                                          c.guardSC == SC_PHYSBUFFER))
2220                                                     {
2221                                                         continue;
2222                                                     }
2223 
2224                                                     if (c.stage != STAGE_COMPUTE && c.scope == SCOPE_WORKGROUP)
2225                                                     {
2226                                                         continue;
2227                                                     }
2228 
2229                                                     // Don't exercise local and non-local for workgroup memory
2230                                                     // Also don't exercise workgroup memory for non-compute stages
2231                                                     if (c.payloadSC == SC_WORKGROUP &&
2232                                                         (c.payloadMemLocal != 0 || c.stage != STAGE_COMPUTE))
2233                                                     {
2234                                                         continue;
2235                                                     }
2236                                                     if (c.guardSC == SC_WORKGROUP &&
2237                                                         (c.guardMemLocal != 0 || c.stage != STAGE_COMPUTE))
2238                                                     {
2239                                                         continue;
2240                                                     }
2241                                                     // Can't do control barrier with larger than workgroup scope, or non-compute stages
2242                                                     if ((c.syncType == ST_CONTROL_BARRIER ||
2243                                                          c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
2244                                                         (c.scope == SCOPE_DEVICE || c.scope == SCOPE_QUEUEFAMILY ||
2245                                                          c.stage != STAGE_COMPUTE))
2246                                                     {
2247                                                         continue;
2248                                                     }
2249 
2250                                                     // Limit RMW atomics to ST_ATOMIC_ATOMIC, just to reduce # of test cases
2251                                                     if (c.atomicRMW && c.syncType != ST_ATOMIC_ATOMIC)
2252                                                     {
2253                                                         continue;
2254                                                     }
2255 
2256                                                     // uint64/float32/float64 testing is primarily for atomics, so only test it for ST_ATOMIC_ATOMIC
2257                                                     const bool atomicTesting = (c.dataType == DATA_TYPE_UINT64 ||
2258                                                                                 c.dataType == DATA_TYPE_FLOAT32 ||
2259                                                                                 c.dataType == DATA_TYPE_FLOAT64);
2260                                                     if (atomicTesting && c.syncType != ST_ATOMIC_ATOMIC)
2261                                                     {
2262                                                         continue;
2263                                                     }
2264 
2265                                                     // No 64-bit image types, so skip tests with both payload and guard in image memory
2266                                                     if (c.dataType == DATA_TYPE_UINT64 && c.payloadSC == SC_IMAGE &&
2267                                                         c.guardSC == SC_IMAGE)
2268                                                     {
2269                                                         continue;
2270                                                     }
2271 
2272                                                     // No support for atomic operations on 64-bit floating point images
2273                                                     if (c.dataType == DATA_TYPE_FLOAT64 &&
2274                                                         (c.payloadSC == SC_IMAGE || c.guardSC == SC_IMAGE))
2275                                                     {
2276                                                         continue;
2277                                                     }
2278                                                     // Control barrier tests don't use a guard variable, so only run them with gsc,gl==0
2279                                                     if ((c.syncType == ST_CONTROL_BARRIER ||
2280                                                          c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
2281                                                         (c.guardSC != 0 || c.guardMemLocal != 0))
2282                                                     {
2283                                                         continue;
2284                                                     }
2285 
2286                                                     gscGroup->addChild(
2287                                                         new MemoryModelTestCase(testCtx, stageCases[stageNdx].name, c));
2288                                                 }
2289                                                 glGroup->addChild(gscGroup.release());
2290                                             }
2291                                             pscGroup->addChild(glGroup.release());
2292                                         }
2293                                         plGroup->addChild(pscGroup.release());
2294                                     }
2295                                     scopeGroup->addChild(plGroup.release());
2296                                 }
2297                                 rmwGroup->addChild(scopeGroup.release());
2298                             }
2299                             stGroup->addChild(rmwGroup.release());
2300                         }
2301                         cohGroup->addChild(stGroup.release());
2302                     }
2303                     dtGroup->addChild(cohGroup.release());
2304                 }
2305                 core11Group->addChild(dtGroup.release());
2306             }
2307             ttGroup->addChild(core11Group.release());
2308         }
2309         group->addChild(ttGroup.release());
2310     }
2311 
2312     TestGroupCase transVisCases[] = {
2313         // destination invocation acquires
2314         {0, "nontransvis"},
2315         // invocation 0,0 acquires
2316         {1, "transvis"},
2317     };
2318 
2319     de::MovePtr<tcu::TestCaseGroup> transGroup(new tcu::TestCaseGroup(testCtx, "transitive"));
2320     for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
2321     {
2322         de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name));
2323         for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
2324         {
2325             de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name));
2326             for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
2327             {
2328                 de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name));
2329                 for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
2330                 {
2331                     de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name));
2332                     for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
2333                     {
2334                         de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name));
2335                         for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
2336                         {
2337                             de::MovePtr<tcu::TestCaseGroup> gscGroup(
2338                                 new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name));
2339                             for (int visNdx = 0; visNdx < DE_LENGTH_OF_ARRAY(transVisCases); visNdx++)
2340                             {
2341                                 CaseDef c = {
2342                                     !!plCases[plNdx].value,               // bool payloadMemLocal;
2343                                     !!glCases[glNdx].value,               // bool guardMemLocal;
2344                                     !!cohCases[cohNdx].value,             // bool coherent;
2345                                     false,                                // bool core11;
2346                                     false,                                // bool atomicRMW;
2347                                     TT_MP,                                // TestType testType;
2348                                     (StorageClass)pscCases[pscNdx].value, // StorageClass payloadSC;
2349                                     (StorageClass)gscCases[gscNdx].value, // StorageClass guardSC;
2350                                     SCOPE_DEVICE,                         // Scope scope;
2351                                     (SyncType)stCases[stNdx].value,       // SyncType syncType;
2352                                     STAGE_COMPUTE,                        // Stage stage;
2353                                     DATA_TYPE_UINT,                       // DataType dataType;
2354                                     true,                                 // bool transitive;
2355                                     !!transVisCases[visNdx].value,        // bool transitiveVis;
2356                                 };
2357                                 if (c.payloadSC == SC_WORKGROUP || c.guardSC == SC_WORKGROUP)
2358                                 {
2359                                     continue;
2360                                 }
2361                                 if (c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
2362                                 {
2363                                     continue;
2364                                 }
2365                                 gscGroup->addChild(new MemoryModelTestCase(testCtx, transVisCases[visNdx].name, c));
2366                             }
2367                             glGroup->addChild(gscGroup.release());
2368                         }
2369                         pscGroup->addChild(glGroup.release());
2370                     }
2371                     plGroup->addChild(pscGroup.release());
2372                 }
2373                 stGroup->addChild(plGroup.release());
2374             }
2375             cohGroup->addChild(stGroup.release());
2376         }
2377         transGroup->addChild(cohGroup.release());
2378     }
2379     group->addChild(transGroup.release());
2380 
2381     // Padding tests.
2382     group->addChild(createPaddingTests(testCtx));
2383     // Shared memory layout tests.
2384     group->addChild(createSharedMemoryLayoutTests(testCtx));
2385 
2386     return group.release();
2387 }
2388 
2389 } // namespace MemoryModel
2390 } // namespace vkt
2391