1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017-2019 The Khronos Group Inc.
6 * Copyright (c) 2018-2019 NVIDIA Corporation
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Vulkan Memory Model tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktMemoryModelTests.hpp"
26 #include "vktMemoryModelPadding.hpp"
27 #include "vktMemoryModelSharedLayout.hpp"
28 #include "vktAmberTestCase.hpp"
29
30 #include "vkBufferWithMemory.hpp"
31 #include "vkImageWithMemory.hpp"
32 #include "vkQueryUtil.hpp"
33 #include "vkBuilderUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkObjUtil.hpp"
37
38 #include "vktTestCase.hpp"
39
40 #include "deDefs.h"
41 #include "deMath.h"
42 #include "deSharedPtr.hpp"
43 #include "deString.h"
44
45 #include "tcuTestCase.hpp"
46 #include "tcuTestLog.hpp"
47
48 #include <string>
49 #include <sstream>
50
51 namespace vkt
52 {
53 namespace MemoryModel
54 {
55 namespace
56 {
57 using namespace vk;
58 using namespace std;
59
60 typedef enum
61 {
62 TT_MP = 0, // message passing
63 TT_WAR, // write-after-read hazard
64 } TestType;
65
66 typedef enum
67 {
68 ST_FENCE_FENCE = 0,
69 ST_FENCE_ATOMIC,
70 ST_ATOMIC_FENCE,
71 ST_ATOMIC_ATOMIC,
72 ST_CONTROL_BARRIER,
73 ST_CONTROL_AND_MEMORY_BARRIER,
74 } SyncType;
75
76 typedef enum
77 {
78 SC_BUFFER = 0,
79 SC_IMAGE,
80 SC_WORKGROUP,
81 SC_PHYSBUFFER,
82 } StorageClass;
83
84 typedef enum
85 {
86 SCOPE_DEVICE = 0,
87 SCOPE_QUEUEFAMILY,
88 SCOPE_WORKGROUP,
89 SCOPE_SUBGROUP,
90 } Scope;
91
92 typedef enum
93 {
94 STAGE_COMPUTE = 0,
95 STAGE_VERTEX,
96 STAGE_FRAGMENT,
97 } Stage;
98
99 typedef enum
100 {
101 DATA_TYPE_UINT = 0,
102 DATA_TYPE_UINT64,
103 DATA_TYPE_FLOAT32,
104 DATA_TYPE_FLOAT64,
105 } DataType;
106
107 const VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
108 const VkFlags allPipelineStages =
109 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
110
111 struct CaseDef
112 {
113 bool payloadMemLocal;
114 bool guardMemLocal;
115 bool coherent;
116 bool core11;
117 bool atomicRMW;
118 TestType testType;
119 StorageClass payloadSC;
120 StorageClass guardSC;
121 Scope scope;
122 SyncType syncType;
123 Stage stage;
124 DataType dataType;
125 bool transitive;
126 bool transitiveVis;
127 };
128
129 class MemoryModelTestInstance : public TestInstance
130 {
131 public:
132 MemoryModelTestInstance(Context &context, const CaseDef &data);
133 ~MemoryModelTestInstance(void);
134 tcu::TestStatus iterate(void);
135
136 private:
137 CaseDef m_data;
138
139 enum
140 {
141 WIDTH = 256,
142 HEIGHT = 256
143 };
144 };
145
MemoryModelTestInstance(Context & context,const CaseDef & data)146 MemoryModelTestInstance::MemoryModelTestInstance(Context &context, const CaseDef &data)
147 : vkt::TestInstance(context)
148 , m_data(data)
149 {
150 }
151
~MemoryModelTestInstance(void)152 MemoryModelTestInstance::~MemoryModelTestInstance(void)
153 {
154 }
155
156 class MemoryModelTestCase : public TestCase
157 {
158 public:
159 MemoryModelTestCase(tcu::TestContext &context, const char *name, const CaseDef data);
160 ~MemoryModelTestCase(void);
161 virtual void initPrograms(SourceCollections &programCollection) const;
162 virtual void initProgramsTransitive(SourceCollections &programCollection) const;
163 virtual TestInstance *createInstance(Context &context) const;
164 virtual void checkSupport(Context &context) const;
165
166 private:
167 CaseDef m_data;
168 };
169
MemoryModelTestCase(tcu::TestContext & context,const char * name,const CaseDef data)170 MemoryModelTestCase::MemoryModelTestCase(tcu::TestContext &context, const char *name, const CaseDef data)
171 : vkt::TestCase(context, name)
172 , m_data(data)
173 {
174 }
175
~MemoryModelTestCase(void)176 MemoryModelTestCase::~MemoryModelTestCase(void)
177 {
178 }
179
checkSupport(Context & context) const180 void MemoryModelTestCase::checkSupport(Context &context) const
181 {
182 if (!context.contextSupports(vk::ApiVersion(0, 1, 1, 0)))
183 {
184 TCU_THROW(NotSupportedError, "Vulkan 1.1 not supported");
185 }
186
187 if (!m_data.core11)
188 {
189 if (!context.getVulkanMemoryModelFeatures().vulkanMemoryModel)
190 {
191 TCU_THROW(NotSupportedError, "vulkanMemoryModel not supported");
192 }
193
194 if (m_data.scope == SCOPE_DEVICE && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelDeviceScope)
195 {
196 TCU_THROW(NotSupportedError, "vulkanMemoryModelDeviceScope not supported");
197 }
198 }
199
200 if (m_data.scope == SCOPE_SUBGROUP)
201 {
202 // Check for subgroup support for scope_subgroup tests.
203 VkPhysicalDeviceSubgroupProperties subgroupProperties;
204 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
205 subgroupProperties.pNext = DE_NULL;
206 subgroupProperties.supportedOperations = 0;
207
208 VkPhysicalDeviceProperties2 properties;
209 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
210 properties.pNext = &subgroupProperties;
211
212 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
213
214 if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) ||
215 !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) ||
216 !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
217 {
218 TCU_THROW(NotSupportedError, "Subgroup features not supported");
219 }
220
221 VkShaderStageFlags stage = VK_SHADER_STAGE_COMPUTE_BIT;
222 if (m_data.stage == STAGE_VERTEX)
223 {
224 stage = VK_SHADER_STAGE_VERTEX_BIT;
225 }
226 else if (m_data.stage == STAGE_COMPUTE)
227 {
228 stage = VK_SHADER_STAGE_COMPUTE_BIT;
229 }
230 else if (m_data.stage == STAGE_FRAGMENT)
231 {
232 stage = VK_SHADER_STAGE_FRAGMENT_BIT;
233 }
234
235 if ((subgroupProperties.supportedStages & stage) == 0)
236 {
237 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
238 }
239 }
240 if (m_data.dataType == DATA_TYPE_UINT64)
241 {
242 if (!context.getDeviceFeatures().shaderInt64)
243 {
244 TCU_THROW(NotSupportedError, "64-bit integer in shaders not supported");
245 }
246 if (!context.getShaderAtomicInt64Features().shaderBufferInt64Atomics &&
247 (m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER))
248 {
249 TCU_THROW(NotSupportedError, "64-bit integer buffer atomics not supported");
250 }
251 if (!context.getShaderAtomicInt64Features().shaderSharedInt64Atomics && m_data.guardSC == SC_WORKGROUP)
252 {
253 TCU_THROW(NotSupportedError, "64-bit integer shared atomics not supported");
254 }
255 }
256
257 if (m_data.dataType == DATA_TYPE_FLOAT32)
258 {
259 if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float"))
260 TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float");
261
262 if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
263 (!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics))
264 {
265 TCU_THROW(NotSupportedError,
266 "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
267 }
268
269 if (m_data.guardSC == SC_IMAGE && (!context.getShaderAtomicFloatFeaturesEXT().shaderImageFloat32Atomics))
270 {
271 TCU_THROW(NotSupportedError,
272 "VkShaderAtomicFloat32: 32-bit floating point image atomic operations not supported");
273 }
274
275 if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics))
276 {
277 TCU_THROW(NotSupportedError,
278 "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
279 }
280 }
281
282 if (m_data.dataType == DATA_TYPE_FLOAT64)
283 {
284 if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float"))
285 TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float");
286
287 if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
288 (!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics))
289 {
290 TCU_THROW(NotSupportedError,
291 "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
292 }
293
294 if (m_data.guardSC == SC_IMAGE || m_data.payloadSC == SC_IMAGE)
295 {
296 TCU_THROW(NotSupportedError,
297 "VkShaderAtomicFloat64: 64-bit floating point image atomic operations not supported");
298 }
299
300 if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics))
301 {
302 TCU_THROW(NotSupportedError,
303 "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
304 }
305 }
306
307 if (m_data.transitive && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelAvailabilityVisibilityChains)
308 TCU_THROW(NotSupportedError, "vulkanMemoryModelAvailabilityVisibilityChains not supported");
309
310 if ((m_data.payloadSC == SC_PHYSBUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
311 !context.isBufferDeviceAddressSupported())
312 TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
313
314 if (m_data.stage == STAGE_VERTEX)
315 {
316 if (!context.getDeviceFeatures().vertexPipelineStoresAndAtomics)
317 {
318 TCU_THROW(NotSupportedError, "vertexPipelineStoresAndAtomics not supported");
319 }
320 }
321 if (m_data.stage == STAGE_FRAGMENT)
322 {
323 if (!context.getDeviceFeatures().fragmentStoresAndAtomics)
324 {
325 TCU_THROW(NotSupportedError, "fragmentStoresAndAtomics not supported");
326 }
327 }
328 }
329
initPrograms(SourceCollections & programCollection) const330 void MemoryModelTestCase::initPrograms(SourceCollections &programCollection) const
331 {
332 if (m_data.transitive)
333 {
334 initProgramsTransitive(programCollection);
335 return;
336 }
337 DE_ASSERT(!m_data.transitiveVis);
338
339 Scope invocationMapping = m_data.scope;
340 if ((m_data.scope == SCOPE_DEVICE || m_data.scope == SCOPE_QUEUEFAMILY) &&
341 (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP))
342 {
343 invocationMapping = SCOPE_WORKGROUP;
344 }
345
346 const char *scopeStr;
347 switch (m_data.scope)
348 {
349 default:
350 DE_ASSERT(0); // fall through
351 case SCOPE_DEVICE:
352 scopeStr = "gl_ScopeDevice";
353 break;
354 case SCOPE_QUEUEFAMILY:
355 scopeStr = "gl_ScopeQueueFamily";
356 break;
357 case SCOPE_WORKGROUP:
358 scopeStr = "gl_ScopeWorkgroup";
359 break;
360 case SCOPE_SUBGROUP:
361 scopeStr = "gl_ScopeSubgroup";
362 break;
363 }
364
365 const char *typeStr = (m_data.dataType == DATA_TYPE_UINT64) ? "uint64_t" :
366 (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" :
367 (m_data.dataType == DATA_TYPE_FLOAT64) ? "double" :
368 "uint";
369 const bool intType = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64);
370
371 // Construct storageSemantics strings. Both release and acquire
372 // always have the payload storage class. They only include the
373 // guard storage class if they're using FENCE for that side of the
374 // sync.
375 std::stringstream storageSemanticsRelease;
376 switch (m_data.payloadSC)
377 {
378 default:
379 DE_ASSERT(0); // fall through
380 case SC_PHYSBUFFER: // fall through
381 case SC_BUFFER:
382 storageSemanticsRelease << "gl_StorageSemanticsBuffer";
383 break;
384 case SC_IMAGE:
385 storageSemanticsRelease << "gl_StorageSemanticsImage";
386 break;
387 case SC_WORKGROUP:
388 storageSemanticsRelease << "gl_StorageSemanticsShared";
389 break;
390 }
391 std::stringstream storageSemanticsAcquire;
392 storageSemanticsAcquire << storageSemanticsRelease.str();
393 if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
394 {
395 switch (m_data.guardSC)
396 {
397 default:
398 DE_ASSERT(0); // fall through
399 case SC_PHYSBUFFER: // fall through
400 case SC_BUFFER:
401 storageSemanticsRelease << " | gl_StorageSemanticsBuffer";
402 break;
403 case SC_IMAGE:
404 storageSemanticsRelease << " | gl_StorageSemanticsImage";
405 break;
406 case SC_WORKGROUP:
407 storageSemanticsRelease << " | gl_StorageSemanticsShared";
408 break;
409 }
410 }
411 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
412 {
413 switch (m_data.guardSC)
414 {
415 default:
416 DE_ASSERT(0); // fall through
417 case SC_PHYSBUFFER: // fall through
418 case SC_BUFFER:
419 storageSemanticsAcquire << " | gl_StorageSemanticsBuffer";
420 break;
421 case SC_IMAGE:
422 storageSemanticsAcquire << " | gl_StorageSemanticsImage";
423 break;
424 case SC_WORKGROUP:
425 storageSemanticsAcquire << " | gl_StorageSemanticsShared";
426 break;
427 }
428 }
429
430 std::stringstream semanticsRelease, semanticsAcquire, semanticsAcquireRelease;
431
432 semanticsRelease << "gl_SemanticsRelease";
433 semanticsAcquire << "gl_SemanticsAcquire";
434 semanticsAcquireRelease << "gl_SemanticsAcquireRelease";
435 if (!m_data.coherent && m_data.testType != TT_WAR)
436 {
437 DE_ASSERT(!m_data.core11);
438 semanticsRelease << " | gl_SemanticsMakeAvailable";
439 semanticsAcquire << " | gl_SemanticsMakeVisible";
440 semanticsAcquireRelease << " | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible";
441 }
442
443 std::stringstream css;
444 css << "#version 450 core\n";
445 if (!m_data.core11)
446 {
447 css << "#pragma use_vulkan_memory_model\n";
448 }
449 if (!intType)
450 {
451 css << "#extension GL_EXT_shader_atomic_float : enable\n"
452 "#extension GL_KHR_memory_scope_semantics : enable\n";
453 }
454 css << "#extension GL_KHR_shader_subgroup_basic : enable\n"
455 "#extension GL_KHR_shader_subgroup_shuffle : enable\n"
456 "#extension GL_KHR_shader_subgroup_ballot : enable\n"
457 "#extension GL_KHR_memory_scope_semantics : enable\n"
458 "#extension GL_ARB_gpu_shader_int64 : enable\n"
459 "#extension GL_EXT_buffer_reference : enable\n"
460 "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
461 "layout(constant_id = 0) const int DIM = 1;\n"
462 "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
463 "struct S { "
464 << typeStr << " x[DIM*DIM]; };\n";
465
466 if (m_data.stage == STAGE_COMPUTE)
467 {
468 css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
469 }
470
471 const char *memqual = "";
472 if (m_data.coherent)
473 {
474 if (m_data.core11)
475 {
476 // Vulkan 1.1 only has "coherent", use it regardless of scope
477 memqual = "coherent";
478 }
479 else
480 {
481 switch (m_data.scope)
482 {
483 default:
484 DE_ASSERT(0); // fall through
485 case SCOPE_DEVICE:
486 memqual = "devicecoherent";
487 break;
488 case SCOPE_QUEUEFAMILY:
489 memqual = "queuefamilycoherent";
490 break;
491 case SCOPE_WORKGROUP:
492 memqual = "workgroupcoherent";
493 break;
494 case SCOPE_SUBGROUP:
495 memqual = "subgroupcoherent";
496 break;
497 }
498 }
499 }
500 else
501 {
502 DE_ASSERT(!m_data.core11);
503 memqual = "nonprivate";
504 }
505
506 stringstream pushConstMembers;
507
508 // Declare payload, guard, and fail resources
509 switch (m_data.payloadSC)
510 {
511 default:
512 DE_ASSERT(0); // fall through
513 case SC_PHYSBUFFER:
514 css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n";
515 pushConstMembers << " layout(offset = 0) PayloadRef payloadref;\n";
516 break;
517 case SC_BUFFER:
518 css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n";
519 break;
520 case SC_IMAGE:
521 if (intType)
522 css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n";
523 else
524 css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n";
525 break;
526 case SC_WORKGROUP:
527 css << "shared S payload;\n";
528 break;
529 }
530 if (m_data.syncType != ST_CONTROL_AND_MEMORY_BARRIER && m_data.syncType != ST_CONTROL_BARRIER)
531 {
532 // The guard variable is only accessed with atomics and need not be declared coherent.
533 switch (m_data.guardSC)
534 {
535 default:
536 DE_ASSERT(0); // fall through
537 case SC_PHYSBUFFER:
538 css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n";
539 pushConstMembers << "layout(offset = 8) GuardRef guard;\n";
540 break;
541 case SC_BUFFER:
542 css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n";
543 break;
544 case SC_IMAGE:
545 if (intType)
546 css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n";
547 else
548 css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n";
549 break;
550 case SC_WORKGROUP:
551 css << "shared S guard;\n";
552 break;
553 }
554 }
555
556 css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
557
558 if (pushConstMembers.str().size() != 0)
559 {
560 css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n";
561 }
562
563 css << "void main()\n"
564 "{\n"
565 " bool pass = true;\n"
566 " bool skip = false;\n";
567
568 if (m_data.payloadSC == SC_PHYSBUFFER)
569 css << " " << memqual << " PayloadRef payload = payloadref;\n";
570
571 if (m_data.stage == STAGE_FRAGMENT)
572 {
573 // Kill helper invocations so they don't load outside the bounds of the SSBO.
574 // Helper pixels are also initially "active" and if a thread gets one as its
575 // partner in SCOPE_SUBGROUP mode, it can't run the test.
576 css << " if (gl_HelperInvocation) { return; }\n";
577 }
578
579 // Compute coordinates based on the storage class and scope.
580 // For workgroup scope, we pair up LocalInvocationID and DIM-1-LocalInvocationID.
581 // For device scope, we pair up GlobalInvocationID and DIM*NUMWORKGROUPS-1-GlobalInvocationID.
582 // For subgroup scope, we pair up LocalInvocationID and LocalInvocationID from subgroupId^(subgroupSize-1)
583 switch (invocationMapping)
584 {
585 default:
586 DE_ASSERT(0); // fall through
587 case SCOPE_SUBGROUP:
588 // If the partner invocation isn't active, the shuffle below will be undefined. Bail.
589 css << " uvec4 ballot = subgroupBallot(true);\n"
590 " if (!subgroupBallotBitExtract(ballot, gl_SubgroupInvocationID^(gl_SubgroupSize-1))) { return; }\n";
591
592 switch (m_data.stage)
593 {
594 default:
595 DE_ASSERT(0); // fall through
596 case STAGE_COMPUTE:
597 css << " ivec2 localId = ivec2(gl_LocalInvocationID.xy);\n"
598 " ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
599 " uint sharedCoord = localId.y * DIM + localId.x;\n"
600 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
601 " uint bufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + "
602 "gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
603 " uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + "
604 "gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
605 " ivec2 imageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
606 " ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
607 break;
608 case STAGE_VERTEX:
609 css << " uint bufferCoord = gl_VertexIndex;\n"
610 " uint partnerBufferCoord = subgroupShuffleXor(gl_VertexIndex, gl_SubgroupSize-1);\n"
611 " ivec2 imageCoord = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / "
612 "(DIM*NUM_WORKGROUP_EACH_DIM));\n"
613 " ivec2 partnerImageCoord = subgroupShuffleXor(imageCoord, gl_SubgroupSize-1);\n"
614 " gl_PointSize = 1.0f;\n"
615 " gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
616 break;
617 case STAGE_FRAGMENT:
618 css << " ivec2 localId = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
619 " ivec2 groupId = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
620 " ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
621 " ivec2 partnerGroupId = subgroupShuffleXor(groupId, gl_SubgroupSize-1);\n"
622 " uint sharedCoord = localId.y * DIM + localId.x;\n"
623 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
624 " uint bufferCoord = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
625 " uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + "
626 "partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
627 " ivec2 imageCoord = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
628 " ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
629 break;
630 }
631 break;
632 case SCOPE_WORKGROUP:
633 css << " ivec2 localId = ivec2(gl_LocalInvocationID.xy);\n"
634 " ivec2 partnerLocalId = ivec2(DIM-1)-ivec2(gl_LocalInvocationID.xy);\n"
635 " uint sharedCoord = localId.y * DIM + localId.x;\n"
636 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
637 " uint bufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + "
638 "sharedCoord;\n"
639 " uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + "
640 "partnerSharedCoord;\n"
641 " ivec2 imageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
642 " ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
643 break;
644 case SCOPE_QUEUEFAMILY:
645 case SCOPE_DEVICE:
646 switch (m_data.stage)
647 {
648 default:
649 DE_ASSERT(0); // fall through
650 case STAGE_COMPUTE:
651 css << " ivec2 globalId = ivec2(gl_GlobalInvocationID.xy);\n"
652 " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - "
653 "ivec2(gl_GlobalInvocationID.xy);\n"
654 " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
655 " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
656 " ivec2 imageCoord = globalId;\n"
657 " ivec2 partnerImageCoord = partnerGlobalId;\n";
658 break;
659 case STAGE_VERTEX:
660 css << " ivec2 globalId = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / "
661 "(DIM*NUM_WORKGROUP_EACH_DIM));\n"
662 " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - globalId;\n"
663 " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
664 " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
665 " ivec2 imageCoord = globalId;\n"
666 " ivec2 partnerImageCoord = partnerGlobalId;\n"
667 " gl_PointSize = 1.0f;\n"
668 " gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
669 break;
670 case STAGE_FRAGMENT:
671 css << " ivec2 localId = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
672 " ivec2 groupId = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
673 " ivec2 partnerLocalId = ivec2(DIM-1)-localId;\n"
674 " ivec2 partnerGroupId = groupId;\n"
675 " uint sharedCoord = localId.y * DIM + localId.x;\n"
676 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
677 " uint bufferCoord = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
678 " uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + "
679 "partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
680 " ivec2 imageCoord = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
681 " ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
682 break;
683 }
684 break;
685 }
686
687 // Initialize shared memory, followed by a barrier
688 if (m_data.payloadSC == SC_WORKGROUP)
689 {
690 css << " payload.x[sharedCoord] = 0;\n";
691 }
692 if (m_data.guardSC == SC_WORKGROUP)
693 {
694 css << " guard.x[sharedCoord] = 0;\n";
695 }
696 if (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP)
697 {
698 switch (invocationMapping)
699 {
700 default:
701 DE_ASSERT(0); // fall through
702 case SCOPE_SUBGROUP:
703 css << " subgroupBarrier();\n";
704 break;
705 case SCOPE_WORKGROUP:
706 css << " barrier();\n";
707 break;
708 }
709 }
710
711 if (m_data.testType == TT_MP)
712 {
713 if (intType)
714 {
715 // Store payload
716 switch (m_data.payloadSC)
717 {
718 default:
719 DE_ASSERT(0); // fall through
720 case SC_PHYSBUFFER: // fall through
721 case SC_BUFFER:
722 css << " payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n";
723 break;
724 case SC_IMAGE:
725 css << " imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, "
726 "partnerImageCoord).x>>31), 0, 0, 0));\n";
727 break;
728 case SC_WORKGROUP:
729 css << " payload.x[sharedCoord] = bufferCoord + (payload.x[partnerSharedCoord]>>31);\n";
730 break;
731 }
732 }
733 else
734 {
735 // Store payload
736 switch (m_data.payloadSC)
737 {
738 default:
739 DE_ASSERT(0); // fall through
740 case SC_PHYSBUFFER: // fall through
741 case SC_BUFFER:
742 css << " payload.x[bufferCoord] = " << typeStr
743 << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n";
744 break;
745 case SC_IMAGE:
746 css << " imageStore(payload, imageCoord, vec4(" << typeStr
747 << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x))>>31)), 0, 0, "
748 "0)); \n";
749 break;
750 case SC_WORKGROUP:
751 css << " payload.x[sharedCoord] = " << typeStr
752 << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerSharedCoord])))>>31);\n";
753 break;
754 }
755 }
756 }
757 else
758 {
759 DE_ASSERT(m_data.testType == TT_WAR);
760 // Load payload
761 switch (m_data.payloadSC)
762 {
763 default:
764 DE_ASSERT(0); // fall through
765 case SC_PHYSBUFFER: // fall through
766 case SC_BUFFER:
767 css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n";
768 break;
769 case SC_IMAGE:
770 css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n";
771 break;
772 case SC_WORKGROUP:
773 css << " " << typeStr << " r = payload.x[partnerSharedCoord];\n";
774 break;
775 }
776 }
777 if (m_data.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
778 {
779 // Acquire and release separate from control barrier
780 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", "
781 << semanticsRelease.str()
782 << ");\n"
783 " controlBarrier("
784 << scopeStr
785 << ", gl_ScopeInvocation, 0, 0);\n"
786 " memoryBarrier("
787 << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
788 }
789 else if (m_data.syncType == ST_CONTROL_BARRIER)
790 {
791 // Control barrier performs both acquire and release
792 css << " controlBarrier(" << scopeStr << ", " << scopeStr << ", " << storageSemanticsRelease.str() << " | "
793 << storageSemanticsAcquire.str() << ", " << semanticsAcquireRelease.str() << ");\n";
794 }
795 else
796 {
797 // Don't type cast for 64 bit image atomics
798 const char *typeCastStr =
799 (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr;
800 // Release barrier
801 std::stringstream atomicReleaseSemantics;
802 if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
803 {
804 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", "
805 << semanticsRelease.str() << ");\n";
806 atomicReleaseSemantics << ", 0, 0";
807 }
808 else
809 {
810 atomicReleaseSemantics << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str();
811 }
812 // Atomic store guard
813 if (m_data.atomicRMW)
814 {
815 switch (m_data.guardSC)
816 {
817 default:
818 DE_ASSERT(0); // fall through
819 case SC_PHYSBUFFER: // fall through
820 case SC_BUFFER:
821 css << " atomicExchange(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr
822 << atomicReleaseSemantics.str() << ");\n";
823 break;
824 case SC_IMAGE:
825 css << " imageAtomicExchange(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr
826 << atomicReleaseSemantics.str() << ");\n";
827 break;
828 case SC_WORKGROUP:
829 css << " atomicExchange(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr
830 << atomicReleaseSemantics.str() << ");\n";
831 break;
832 }
833 }
834 else
835 {
836 switch (m_data.guardSC)
837 {
838 default:
839 DE_ASSERT(0); // fall through
840 case SC_PHYSBUFFER: // fall through
841 case SC_BUFFER:
842 css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr
843 << atomicReleaseSemantics.str() << ");\n";
844 break;
845 case SC_IMAGE:
846 css << " imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr
847 << atomicReleaseSemantics.str() << ");\n";
848 break;
849 case SC_WORKGROUP:
850 css << " atomicStore(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr
851 << atomicReleaseSemantics.str() << ");\n";
852 break;
853 }
854 }
855
856 std::stringstream atomicAcquireSemantics;
857 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
858 {
859 atomicAcquireSemantics << ", 0, 0";
860 }
861 else
862 {
863 atomicAcquireSemantics << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str();
864 }
865 // Atomic load guard
866 if (m_data.atomicRMW)
867 {
868 switch (m_data.guardSC)
869 {
870 default:
871 DE_ASSERT(0); // fall through
872 case SC_PHYSBUFFER: // fall through
873 case SC_BUFFER:
874 css << " skip = atomicExchange(guard.x[partnerBufferCoord], " << typeStr << "(2u), " << scopeStr
875 << atomicAcquireSemantics.str() << ") == 0;\n";
876 break;
877 case SC_IMAGE:
878 css << " skip = imageAtomicExchange(guard, partnerImageCoord, " << typeCastStr << "(2u), " << scopeStr
879 << atomicAcquireSemantics.str() << ") == 0;\n";
880 break;
881 case SC_WORKGROUP:
882 css << " skip = atomicExchange(guard.x[partnerSharedCoord], " << typeStr << "(2u), " << scopeStr
883 << atomicAcquireSemantics.str() << ") == 0;\n";
884 break;
885 }
886 }
887 else
888 {
889 switch (m_data.guardSC)
890 {
891 default:
892 DE_ASSERT(0); // fall through
893 case SC_PHYSBUFFER: // fall through
894 case SC_BUFFER:
895 css << " skip = atomicLoad(guard.x[partnerBufferCoord], " << scopeStr << atomicAcquireSemantics.str()
896 << ") == 0;\n";
897 break;
898 case SC_IMAGE:
899 css << " skip = imageAtomicLoad(guard, partnerImageCoord, " << scopeStr
900 << atomicAcquireSemantics.str() << ") == 0;\n";
901 break;
902 case SC_WORKGROUP:
903 css << " skip = atomicLoad(guard.x[partnerSharedCoord], " << scopeStr << atomicAcquireSemantics.str()
904 << ") == 0;\n";
905 break;
906 }
907 }
908 // Acquire barrier
909 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
910 {
911 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", "
912 << semanticsAcquire.str() << ");\n";
913 }
914 }
915 if (m_data.testType == TT_MP)
916 {
917 // Load payload
918 switch (m_data.payloadSC)
919 {
920 default:
921 DE_ASSERT(0); // fall through
922 case SC_PHYSBUFFER: // fall through
923 case SC_BUFFER:
924 css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n";
925 break;
926 case SC_IMAGE:
927 css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n";
928 break;
929 case SC_WORKGROUP:
930 css << " " << typeStr << " r = payload.x[partnerSharedCoord];\n";
931 break;
932 }
933 css << " if (!skip && r != " << typeStr
934 << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n"
935 "}\n";
936 }
937 else
938 {
939 DE_ASSERT(m_data.testType == TT_WAR);
940 // Store payload, only if the partner invocation has already done its read
941 css << " if (!skip) {\n ";
942 switch (m_data.payloadSC)
943 {
944 default:
945 DE_ASSERT(0); // fall through
946 case SC_PHYSBUFFER: // fall through
947 case SC_BUFFER:
948 css << " payload.x[bufferCoord] = " << typeStr << "(bufferCoord);\n";
949 break;
950 case SC_IMAGE:
951 if (intType)
952 {
953 css << " imageStore(payload, imageCoord, uvec4(bufferCoord, 0, 0, 0));\n";
954 }
955 else
956 {
957 css << " imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord), 0, 0, 0));\n";
958 }
959 break;
960 case SC_WORKGROUP:
961 css << " payload.x[sharedCoord] = " << typeStr << "(bufferCoord);\n";
962 break;
963 }
964 css << " }\n"
965 " if (r != 0) { fail.x[bufferCoord] = 1; }\n"
966 "}\n";
967 }
968
969 // Draw a fullscreen triangle strip based on gl_VertexIndex
970 std::stringstream vss;
971 vss << "#version 450 core\n"
972 "vec2 coords[4] = {ivec2(-1,-1), ivec2(-1, 1), ivec2(1, -1), ivec2(1, 1)};\n"
973 "void main() { gl_Position = vec4(coords[gl_VertexIndex], 0, 1); }\n";
974
975 const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
976
977 switch (m_data.stage)
978 {
979 default:
980 DE_ASSERT(0); // fall through
981 case STAGE_COMPUTE:
982 programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
983 break;
984 case STAGE_VERTEX:
985 programCollection.glslSources.add("test") << glu::VertexSource(css.str()) << buildOptions;
986 break;
987 case STAGE_FRAGMENT:
988 programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
989 programCollection.glslSources.add("test") << glu::FragmentSource(css.str()) << buildOptions;
990 break;
991 }
992 }
993
initProgramsTransitive(SourceCollections & programCollection) const994 void MemoryModelTestCase::initProgramsTransitive(SourceCollections &programCollection) const
995 {
996 Scope invocationMapping = m_data.scope;
997
998 const char *typeStr = (m_data.dataType == DATA_TYPE_UINT64) ? "uint64_t" :
999 (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" :
1000 (m_data.dataType == DATA_TYPE_FLOAT64) ? "double" :
1001 "uint";
1002 const bool intType = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64);
1003
1004 // Construct storageSemantics strings. Both release and acquire
1005 // always have the payload storage class. They only include the
1006 // guard storage class if they're using FENCE for that side of the
1007 // sync.
1008 std::stringstream storageSemanticsPayload;
1009 switch (m_data.payloadSC)
1010 {
1011 default:
1012 DE_ASSERT(0); // fall through
1013 case SC_PHYSBUFFER: // fall through
1014 case SC_BUFFER:
1015 storageSemanticsPayload << "gl_StorageSemanticsBuffer";
1016 break;
1017 case SC_IMAGE:
1018 storageSemanticsPayload << "gl_StorageSemanticsImage";
1019 break;
1020 }
1021 std::stringstream storageSemanticsGuard;
1022 switch (m_data.guardSC)
1023 {
1024 default:
1025 DE_ASSERT(0); // fall through
1026 case SC_PHYSBUFFER: // fall through
1027 case SC_BUFFER:
1028 storageSemanticsGuard << "gl_StorageSemanticsBuffer";
1029 break;
1030 case SC_IMAGE:
1031 storageSemanticsGuard << "gl_StorageSemanticsImage";
1032 break;
1033 }
1034 std::stringstream storageSemanticsAll;
1035 storageSemanticsAll << storageSemanticsPayload.str() << " | " << storageSemanticsGuard.str();
1036
1037 std::stringstream css;
1038 css << "#version 450 core\n";
1039 css << "#pragma use_vulkan_memory_model\n";
1040 if (!intType)
1041 {
1042 css << "#extension GL_EXT_shader_atomic_float : enable\n"
1043 "#extension GL_KHR_memory_scope_semantics : enable\n";
1044 }
1045 css << "#extension GL_KHR_shader_subgroup_basic : enable\n"
1046 "#extension GL_KHR_shader_subgroup_shuffle : enable\n"
1047 "#extension GL_KHR_shader_subgroup_ballot : enable\n"
1048 "#extension GL_KHR_memory_scope_semantics : enable\n"
1049 "#extension GL_ARB_gpu_shader_int64 : enable\n"
1050 "#extension GL_EXT_buffer_reference : enable\n"
1051 "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
1052 "layout(constant_id = 0) const int DIM = 1;\n"
1053 "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
1054 "shared bool sharedSkip;\n";
1055
1056 css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
1057
1058 const char *memqual = "";
1059 const char *semAvail = "";
1060 const char *semVis = "";
1061 if (m_data.coherent)
1062 {
1063 memqual = "workgroupcoherent";
1064 }
1065 else
1066 {
1067 memqual = "nonprivate";
1068 semAvail = " | gl_SemanticsMakeAvailable";
1069 semVis = " | gl_SemanticsMakeVisible";
1070 }
1071
1072 stringstream pushConstMembers;
1073
1074 // Declare payload, guard, and fail resources
1075 switch (m_data.payloadSC)
1076 {
1077 default:
1078 DE_ASSERT(0); // fall through
1079 case SC_PHYSBUFFER:
1080 css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n";
1081 pushConstMembers << " layout(offset = 0) PayloadRef payloadref;\n";
1082 break;
1083 case SC_BUFFER:
1084 css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n";
1085 break;
1086 case SC_IMAGE:
1087 if (intType)
1088 css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n";
1089 else
1090 css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n";
1091 break;
1092 }
1093 // The guard variable is only accessed with atomics and need not be declared coherent.
1094 switch (m_data.guardSC)
1095 {
1096 default:
1097 DE_ASSERT(0); // fall through
1098 case SC_PHYSBUFFER:
1099 css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n";
1100 pushConstMembers << "layout(offset = 8) GuardRef guard;\n";
1101 break;
1102 case SC_BUFFER:
1103 css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n";
1104 break;
1105 case SC_IMAGE:
1106 if (intType)
1107 css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n";
1108 else
1109 css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n";
1110 break;
1111 }
1112
1113 css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
1114
1115 if (pushConstMembers.str().size() != 0)
1116 {
1117 css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n";
1118 }
1119
1120 css << "void main()\n"
1121 "{\n"
1122 " bool pass = true;\n"
1123 " bool skip = false;\n"
1124 " sharedSkip = false;\n";
1125
1126 if (m_data.payloadSC == SC_PHYSBUFFER)
1127 css << " " << memqual << " PayloadRef payload = payloadref;\n";
1128
1129 // Compute coordinates based on the storage class and scope.
1130 switch (invocationMapping)
1131 {
1132 default:
1133 DE_ASSERT(0); // fall through
1134 case SCOPE_DEVICE:
1135 css << " ivec2 globalId = ivec2(gl_GlobalInvocationID.xy);\n"
1136 " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
1137 " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
1138 " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
1139 " ivec2 imageCoord = globalId;\n"
1140 " ivec2 partnerImageCoord = partnerGlobalId;\n"
1141 " ivec2 globalId00 = ivec2(DIM) * ivec2(gl_WorkGroupID.xy);\n"
1142 " ivec2 partnerGlobalId00 = ivec2(DIM) * (ivec2(NUM_WORKGROUP_EACH_DIM-1) - "
1143 "ivec2(gl_WorkGroupID.xy));\n"
1144 " uint bufferCoord00 = globalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId00.x;\n"
1145 " uint partnerBufferCoord00 = partnerGlobalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + "
1146 "partnerGlobalId00.x;\n"
1147 " ivec2 imageCoord00 = globalId00;\n"
1148 " ivec2 partnerImageCoord00 = partnerGlobalId00;\n";
1149 break;
1150 }
1151
1152 // Store payload
1153 if (intType)
1154 {
1155 switch (m_data.payloadSC)
1156 {
1157 default:
1158 DE_ASSERT(0); // fall through
1159 case SC_PHYSBUFFER: // fall through
1160 case SC_BUFFER:
1161 css << " payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n";
1162 break;
1163 case SC_IMAGE:
1164 css << " imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, "
1165 "partnerImageCoord).x>>31), 0, 0, 0));\n";
1166 break;
1167 }
1168 }
1169 else
1170 {
1171 switch (m_data.payloadSC)
1172 {
1173 default:
1174 DE_ASSERT(0); // fall through
1175 case SC_PHYSBUFFER: // fall through
1176 case SC_BUFFER:
1177 css << " payload.x[bufferCoord] = " << typeStr
1178 << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n";
1179 break;
1180 case SC_IMAGE:
1181 css << " imageStore(payload, imageCoord, vec4(" << typeStr
1182 << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x)>>31))), 0, 0, 0)); "
1183 "\n";
1184 break;
1185 }
1186 }
1187
1188 // Sync to other threads in the workgroup
1189 css << " controlBarrier(gl_ScopeWorkgroup, "
1190 "gl_ScopeWorkgroup, "
1191 << storageSemanticsPayload.str()
1192 << " | gl_StorageSemanticsShared, "
1193 "gl_SemanticsAcquireRelease"
1194 << semAvail << ");\n";
1195
1196 // Device-scope release/availability in invocation(0,0)
1197 css << " if (all(equal(gl_LocalInvocationID.xy, ivec2(0,0)))) {\n";
1198 const char *typeCastStr =
1199 (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr;
1200 if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_ATOMIC_FENCE)
1201 {
1202 switch (m_data.guardSC)
1203 {
1204 default:
1205 DE_ASSERT(0); // fall through
1206 case SC_PHYSBUFFER: // fall through
1207 case SC_BUFFER:
1208 css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, "
1209 << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n";
1210 break;
1211 case SC_IMAGE:
1212 css << " imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, "
1213 << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n";
1214 break;
1215 }
1216 }
1217 else
1218 {
1219 css << " memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str()
1220 << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n";
1221 switch (m_data.guardSC)
1222 {
1223 default:
1224 DE_ASSERT(0); // fall through
1225 case SC_PHYSBUFFER: // fall through
1226 case SC_BUFFER:
1227 css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, 0, 0);\n";
1228 break;
1229 case SC_IMAGE:
1230 css << " imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, 0, 0);\n";
1231 break;
1232 }
1233 }
1234
1235 // Device-scope acquire/visibility either in invocation(0,0) or in every invocation
1236 if (!m_data.transitiveVis)
1237 {
1238 css << " }\n";
1239 }
1240 if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_FENCE_ATOMIC)
1241 {
1242 switch (m_data.guardSC)
1243 {
1244 default:
1245 DE_ASSERT(0); // fall through
1246 case SC_PHYSBUFFER: // fall through
1247 case SC_BUFFER:
1248 css << " skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, "
1249 << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n";
1250 break;
1251 case SC_IMAGE:
1252 css << " skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, "
1253 << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n";
1254 break;
1255 }
1256 }
1257 else
1258 {
1259 switch (m_data.guardSC)
1260 {
1261 default:
1262 DE_ASSERT(0); // fall through
1263 case SC_PHYSBUFFER: // fall through
1264 case SC_BUFFER:
1265 css << " skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, 0, 0) == 0;\n";
1266 break;
1267 case SC_IMAGE:
1268 css << " skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, 0, 0) == 0;\n";
1269 break;
1270 }
1271 css << " memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str()
1272 << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible);\n";
1273 }
1274
1275 // If invocation(0,0) did the acquire then store "skip" to shared memory and
1276 // synchronize with the workgroup
1277 if (m_data.transitiveVis)
1278 {
1279 css << " sharedSkip = skip;\n";
1280 css << " }\n";
1281
1282 css << " controlBarrier(gl_ScopeWorkgroup, "
1283 "gl_ScopeWorkgroup, "
1284 << storageSemanticsPayload.str()
1285 << " | gl_StorageSemanticsShared, "
1286 "gl_SemanticsAcquireRelease"
1287 << semVis << ");\n";
1288 css << " skip = sharedSkip;\n";
1289 }
1290
1291 // Load payload
1292 switch (m_data.payloadSC)
1293 {
1294 default:
1295 DE_ASSERT(0); // fall through
1296 case SC_PHYSBUFFER: // fall through
1297 case SC_BUFFER:
1298 css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n";
1299 break;
1300 case SC_IMAGE:
1301 css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n";
1302 break;
1303 }
1304 css << " if (!skip && r != " << typeStr
1305 << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n"
1306 "}\n";
1307
1308 const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
1309
1310 programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
1311 }
1312
createInstance(Context & context) const1313 TestInstance *MemoryModelTestCase::createInstance(Context &context) const
1314 {
1315 return new MemoryModelTestInstance(context, m_data);
1316 }
1317
iterate(void)1318 tcu::TestStatus MemoryModelTestInstance::iterate(void)
1319 {
1320 const DeviceInterface &vk = m_context.getDeviceInterface();
1321 const VkDevice device = m_context.getDevice();
1322 Allocator &allocator = m_context.getDefaultAllocator();
1323
1324 VkPhysicalDeviceProperties2 properties;
1325 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1326 properties.pNext = NULL;
1327
1328 m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
1329
1330 uint32_t DIM = 31;
1331 uint32_t NUM_WORKGROUP_EACH_DIM = 8;
1332 // If necessary, shrink workgroup size to fit HW limits
1333 if (DIM * DIM > properties.properties.limits.maxComputeWorkGroupInvocations)
1334 {
1335 DIM = (uint32_t)deFloatSqrt((float)properties.properties.limits.maxComputeWorkGroupInvocations);
1336 }
1337 uint32_t NUM_INVOCATIONS = (DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM);
1338
1339 VkDeviceSize bufferSizes[3];
1340 de::MovePtr<BufferWithMemory> buffers[3];
1341 vk::VkDescriptorBufferInfo bufferDescriptors[3];
1342 de::MovePtr<BufferWithMemory> copyBuffer;
1343
1344 for (uint32_t i = 0; i < 3; ++i)
1345 {
1346 size_t elementSize = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ?
1347 sizeof(uint64_t) :
1348 sizeof(uint32_t);
1349 // buffer2 is the "fail" buffer, and is always uint
1350 if (i == 2)
1351 elementSize = sizeof(uint32_t);
1352 bufferSizes[i] = NUM_INVOCATIONS * elementSize;
1353
1354 vk::VkFlags usageFlags = vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1355 VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1356
1357 bool memoryDeviceAddress = false;
1358
1359 bool local;
1360 switch (i)
1361 {
1362 default:
1363 DE_ASSERT(0); // fall through
1364 case 0:
1365 if (m_data.payloadSC != SC_BUFFER && m_data.payloadSC != SC_PHYSBUFFER)
1366 continue;
1367 local = m_data.payloadMemLocal;
1368 if (m_data.payloadSC == SC_PHYSBUFFER)
1369 {
1370 usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1371 if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
1372 memoryDeviceAddress = true;
1373 }
1374 break;
1375 case 1:
1376 if (m_data.guardSC != SC_BUFFER && m_data.guardSC != SC_PHYSBUFFER)
1377 continue;
1378 local = m_data.guardMemLocal;
1379 if (m_data.guardSC == SC_PHYSBUFFER)
1380 {
1381 usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1382 if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
1383 memoryDeviceAddress = true;
1384 }
1385 break;
1386 case 2:
1387 local = true;
1388 break;
1389 }
1390
1391 try
1392 {
1393 buffers[i] = de::MovePtr<BufferWithMemory>(
1394 new BufferWithMemory(vk, device, allocator, makeBufferCreateInfo(bufferSizes[i], usageFlags),
1395 (memoryDeviceAddress ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any) |
1396 (local ? MemoryRequirement::Local : MemoryRequirement::NonLocal)));
1397 }
1398 catch (const tcu::NotSupportedError &)
1399 {
1400 if (!local)
1401 {
1402 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
1403 }
1404 throw;
1405 }
1406 bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, bufferSizes[i]);
1407 }
1408
1409 // Try to use cached host memory for the buffer the CPU will read from, else fallback to host visible.
1410 try
1411 {
1412 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1413 vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT),
1414 MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1415 }
1416 catch (const tcu::NotSupportedError &)
1417 {
1418 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1419 vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT),
1420 MemoryRequirement::HostVisible));
1421 }
1422
1423 VkFormat imageFormat;
1424 switch (m_data.dataType)
1425 {
1426 case DATA_TYPE_UINT:
1427 case DATA_TYPE_UINT64:
1428 imageFormat = VK_FORMAT_R32_UINT;
1429 break;
1430 case DATA_TYPE_FLOAT32:
1431 case DATA_TYPE_FLOAT64:
1432 imageFormat = VK_FORMAT_R32_SFLOAT;
1433 break;
1434 default:
1435 TCU_FAIL("Invalid data type.");
1436 }
1437
1438 const VkImageCreateInfo imageCreateInfo = {
1439 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1440 DE_NULL, // const void* pNext;
1441 (VkImageCreateFlags)0u, // VkImageCreateFlags flags;
1442 VK_IMAGE_TYPE_2D, // VkImageType imageType;
1443 imageFormat, // VkFormat format;
1444 {
1445 DIM * NUM_WORKGROUP_EACH_DIM, // uint32_t width;
1446 DIM * NUM_WORKGROUP_EACH_DIM, // uint32_t height;
1447 1u // uint32_t depth;
1448 }, // VkExtent3D extent;
1449 1u, // uint32_t mipLevels;
1450 1u, // uint32_t arrayLayers;
1451 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1452 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
1453 VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
1454 VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
1455 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1456 0u, // uint32_t queueFamilyIndexCount;
1457 DE_NULL, // const uint32_t* pQueueFamilyIndices;
1458 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
1459 };
1460 VkImageViewCreateInfo imageViewCreateInfo = {
1461 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
1462 DE_NULL, // const void* pNext;
1463 (VkImageViewCreateFlags)0u, // VkImageViewCreateFlags flags;
1464 DE_NULL, // VkImage image;
1465 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
1466 imageFormat, // VkFormat format;
1467 {
1468 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
1469 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
1470 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
1471 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
1472 }, // VkComponentMapping components;
1473 {
1474 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
1475 0u, // uint32_t baseMipLevel;
1476 1u, // uint32_t levelCount;
1477 0u, // uint32_t baseArrayLayer;
1478 1u // uint32_t layerCount;
1479 } // VkImageSubresourceRange subresourceRange;
1480 };
1481
1482 de::MovePtr<ImageWithMemory> images[2];
1483 Move<VkImageView> imageViews[2];
1484 vk::VkDescriptorImageInfo imageDescriptors[2];
1485
1486 for (uint32_t i = 0; i < 2; ++i)
1487 {
1488
1489 bool local;
1490 switch (i)
1491 {
1492 default:
1493 DE_ASSERT(0); // fall through
1494 case 0:
1495 if (m_data.payloadSC != SC_IMAGE)
1496 continue;
1497 local = m_data.payloadMemLocal;
1498 break;
1499 case 1:
1500 if (m_data.guardSC != SC_IMAGE)
1501 continue;
1502 local = m_data.guardMemLocal;
1503 break;
1504 }
1505
1506 try
1507 {
1508 images[i] = de::MovePtr<ImageWithMemory>(
1509 new ImageWithMemory(vk, device, allocator, imageCreateInfo,
1510 local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
1511 }
1512 catch (const tcu::NotSupportedError &)
1513 {
1514 if (!local)
1515 {
1516 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
1517 }
1518 throw;
1519 }
1520 imageViewCreateInfo.image = **images[i];
1521 imageViews[i] = createImageView(vk, device, &imageViewCreateInfo, NULL);
1522
1523 imageDescriptors[i] = makeDescriptorImageInfo(DE_NULL, *imageViews[i], VK_IMAGE_LAYOUT_GENERAL);
1524 }
1525
1526 vk::DescriptorSetLayoutBuilder layoutBuilder;
1527
1528 switch (m_data.payloadSC)
1529 {
1530 default:
1531 case SC_BUFFER:
1532 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
1533 break;
1534 case SC_IMAGE:
1535 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages);
1536 break;
1537 }
1538 switch (m_data.guardSC)
1539 {
1540 default:
1541 case SC_BUFFER:
1542 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
1543 break;
1544 case SC_IMAGE:
1545 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages);
1546 break;
1547 }
1548 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
1549
1550 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1551
1552 vk::Unique<vk::VkDescriptorPool> descriptorPool(
1553 vk::DescriptorPoolBuilder()
1554 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3u)
1555 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u)
1556 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1557 vk::Unique<vk::VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1558
1559 vk::DescriptorSetUpdateBuilder setUpdateBuilder;
1560 switch (m_data.payloadSC)
1561 {
1562 default:
1563 DE_ASSERT(0); // fall through
1564 case SC_PHYSBUFFER:
1565 case SC_WORKGROUP:
1566 break;
1567 case SC_BUFFER:
1568 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
1569 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1570 break;
1571 case SC_IMAGE:
1572 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
1573 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[0]);
1574 break;
1575 }
1576 switch (m_data.guardSC)
1577 {
1578 default:
1579 DE_ASSERT(0); // fall through
1580 case SC_PHYSBUFFER:
1581 case SC_WORKGROUP:
1582 break;
1583 case SC_BUFFER:
1584 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
1585 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[1]);
1586 break;
1587 case SC_IMAGE:
1588 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
1589 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[1]);
1590 break;
1591 }
1592 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(2),
1593 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[2]);
1594
1595 setUpdateBuilder.update(vk, device);
1596
1597 const VkPushConstantRange pushConstRange = {
1598 allShaderStages, // VkShaderStageFlags stageFlags
1599 0, // uint32_t offset
1600 16 // uint32_t size
1601 };
1602
1603 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
1604 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
1605 DE_NULL, // pNext
1606 (VkPipelineLayoutCreateFlags)0,
1607 1, // setLayoutCount
1608 &descriptorSetLayout.get(), // pSetLayouts
1609 1u, // pushConstantRangeCount
1610 &pushConstRange, // pPushConstantRanges
1611 };
1612
1613 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1614
1615 Move<VkPipeline> pipeline;
1616 Move<VkRenderPass> renderPass;
1617 Move<VkFramebuffer> framebuffer;
1618
1619 VkPipelineBindPoint bindPoint =
1620 m_data.stage == STAGE_COMPUTE ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
1621
1622 const uint32_t specData[2] = {DIM, NUM_WORKGROUP_EACH_DIM};
1623
1624 const vk::VkSpecializationMapEntry entries[3] = {
1625 {0, sizeof(uint32_t) * 0, sizeof(uint32_t)},
1626 {1, sizeof(uint32_t) * 1, sizeof(uint32_t)},
1627 };
1628
1629 const vk::VkSpecializationInfo specInfo = {
1630 2, // mapEntryCount
1631 entries, // pMapEntries
1632 sizeof(specData), // dataSize
1633 specData // pData
1634 };
1635
1636 if (m_data.stage == STAGE_COMPUTE)
1637 {
1638 const Unique<VkShaderModule> shader(
1639 createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
1640
1641 const VkPipelineShaderStageCreateInfo shaderCreateInfo = {
1642 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1643 DE_NULL,
1644 (VkPipelineShaderStageCreateFlags)0,
1645 VK_SHADER_STAGE_COMPUTE_BIT, // stage
1646 *shader, // shader
1647 "main",
1648 &specInfo, // pSpecializationInfo
1649 };
1650
1651 const VkComputePipelineCreateInfo pipelineCreateInfo = {
1652 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1653 DE_NULL,
1654 0u, // flags
1655 shaderCreateInfo, // cs
1656 *pipelineLayout, // layout
1657 (vk::VkPipeline)0, // basePipelineHandle
1658 0u, // basePipelineIndex
1659 };
1660 pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1661 }
1662 else
1663 {
1664
1665 const vk::VkSubpassDescription subpassDesc = {
1666 (vk::VkSubpassDescriptionFlags)0,
1667 vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
1668 0u, // inputCount
1669 DE_NULL, // pInputAttachments
1670 0u, // colorCount
1671 DE_NULL, // pColorAttachments
1672 DE_NULL, // pResolveAttachments
1673 DE_NULL, // depthStencilAttachment
1674 0u, // preserveCount
1675 DE_NULL, // pPreserveAttachments
1676
1677 };
1678 const vk::VkRenderPassCreateInfo renderPassParams = {
1679 vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType
1680 DE_NULL, // pNext
1681 (vk::VkRenderPassCreateFlags)0,
1682 0u, // attachmentCount
1683 DE_NULL, // pAttachments
1684 1u, // subpassCount
1685 &subpassDesc, // pSubpasses
1686 0u, // dependencyCount
1687 DE_NULL, // pDependencies
1688 };
1689
1690 renderPass = createRenderPass(vk, device, &renderPassParams);
1691
1692 const vk::VkFramebufferCreateInfo framebufferParams = {
1693 vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType
1694 DE_NULL, // pNext
1695 (vk::VkFramebufferCreateFlags)0,
1696 *renderPass, // renderPass
1697 0u, // attachmentCount
1698 DE_NULL, // pAttachments
1699 DIM * NUM_WORKGROUP_EACH_DIM, // width
1700 DIM * NUM_WORKGROUP_EACH_DIM, // height
1701 1u, // layers
1702 };
1703
1704 framebuffer = createFramebuffer(vk, device, &framebufferParams);
1705
1706 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = {
1707 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1708 DE_NULL, // const void* pNext;
1709 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
1710 0u, // uint32_t vertexBindingDescriptionCount;
1711 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1712 0u, // uint32_t vertexAttributeDescriptionCount;
1713 DE_NULL // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
1714 };
1715
1716 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo = {
1717 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
1718 DE_NULL, // const void* pNext;
1719 (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
1720 (m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST :
1721 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology topology;
1722 VK_FALSE // VkBool32 primitiveRestartEnable;
1723 };
1724
1725 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo = {
1726 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
1727 DE_NULL, // const void* pNext;
1728 (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
1729 VK_FALSE, // VkBool32 depthClampEnable;
1730 (m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE, // VkBool32 rasterizerDiscardEnable;
1731 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
1732 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
1733 VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace;
1734 VK_FALSE, // VkBool32 depthBiasEnable;
1735 0.0f, // float depthBiasConstantFactor;
1736 0.0f, // float depthBiasClamp;
1737 0.0f, // float depthBiasSlopeFactor;
1738 1.0f // float lineWidth;
1739 };
1740
1741 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo = {
1742 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
1743 DE_NULL, // const void* pNext
1744 0u, // VkPipelineMultisampleStateCreateFlags flags
1745 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples
1746 VK_FALSE, // VkBool32 sampleShadingEnable
1747 1.0f, // float minSampleShading
1748 DE_NULL, // const VkSampleMask* pSampleMask
1749 VK_FALSE, // VkBool32 alphaToCoverageEnable
1750 VK_FALSE // VkBool32 alphaToOneEnable
1751 };
1752
1753 VkViewport viewport = makeViewport(DIM * NUM_WORKGROUP_EACH_DIM, DIM * NUM_WORKGROUP_EACH_DIM);
1754 VkRect2D scissor = makeRect2D(DIM * NUM_WORKGROUP_EACH_DIM, DIM * NUM_WORKGROUP_EACH_DIM);
1755
1756 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = {
1757 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
1758 DE_NULL, // const void* pNext
1759 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
1760 1u, // uint32_t viewportCount
1761 &viewport, // const VkViewport* pViewports
1762 1u, // uint32_t scissorCount
1763 &scissor // const VkRect2D* pScissors
1764 };
1765
1766 Move<VkShaderModule> fs;
1767 Move<VkShaderModule> vs;
1768
1769 uint32_t numStages;
1770 if (m_data.stage == STAGE_VERTEX)
1771 {
1772 vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1773 fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1774 numStages = 1u;
1775 }
1776 else
1777 {
1778 vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1779 fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1780 numStages = 2u;
1781 }
1782
1783 const VkPipelineShaderStageCreateInfo shaderCreateInfo[2] = {
1784 {
1785 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, DE_NULL, (VkPipelineShaderStageCreateFlags)0,
1786 VK_SHADER_STAGE_VERTEX_BIT, // stage
1787 *vs, // shader
1788 "main",
1789 &specInfo, // pSpecializationInfo
1790 },
1791 {
1792 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, DE_NULL, (VkPipelineShaderStageCreateFlags)0,
1793 VK_SHADER_STAGE_FRAGMENT_BIT, // stage
1794 *fs, // shader
1795 "main",
1796 &specInfo, // pSpecializationInfo
1797 }};
1798
1799 const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo = {
1800 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
1801 DE_NULL, // const void* pNext;
1802 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
1803 numStages, // uint32_t stageCount;
1804 &shaderCreateInfo[0], // const VkPipelineShaderStageCreateInfo* pStages;
1805 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
1806 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
1807 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
1808 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState;
1809 &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
1810 &multisampleStateCreateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
1811 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
1812 DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
1813 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
1814 pipelineLayout.get(), // VkPipelineLayout layout;
1815 renderPass.get(), // VkRenderPass renderPass;
1816 0u, // uint32_t subpass;
1817 DE_NULL, // VkPipeline basePipelineHandle;
1818 0 // int basePipelineIndex;
1819 };
1820
1821 pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1822 }
1823
1824 const VkQueue queue = m_context.getUniversalQueue();
1825 Move<VkCommandPool> cmdPool = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1826 m_context.getUniversalQueueFamilyIndex());
1827 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1828
1829 VkBufferDeviceAddressInfo addrInfo = {
1830 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
1831 DE_NULL, // const void* pNext;
1832 0, // VkBuffer buffer
1833 };
1834
1835 VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1836 VkClearValue clearColor = makeClearValueColorU32(0, 0, 0, 0);
1837
1838 VkMemoryBarrier memBarrier = {
1839 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1840 DE_NULL, // pNext
1841 0u, // srcAccessMask
1842 0u, // dstAccessMask
1843 };
1844
1845 const VkBufferCopy copyParams = {
1846 (VkDeviceSize)0u, // srcOffset
1847 (VkDeviceSize)0u, // dstOffset
1848 bufferSizes[2] // size
1849 };
1850
1851 uint32_t NUM_SUBMITS = 4;
1852
1853 for (uint32_t x = 0; x < NUM_SUBMITS; ++x)
1854 {
1855 beginCommandBuffer(vk, *cmdBuffer, 0u);
1856
1857 if (x == 0)
1858 vk.cmdFillBuffer(*cmdBuffer, **buffers[2], 0, bufferSizes[2], 0);
1859
1860 for (uint32_t i = 0; i < 2; ++i)
1861 {
1862 if (!images[i])
1863 continue;
1864
1865 const VkImageMemoryBarrier imageBarrier = {
1866 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType
1867 DE_NULL, // const void* pNext
1868 0u, // VkAccessFlags srcAccessMask
1869 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags dstAccessMask
1870 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout oldLayout
1871 VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout
1872 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
1873 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
1874 **images[i], // VkImage image
1875 {
1876 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask
1877 0u, // uint32_t baseMipLevel
1878 1u, // uint32_t mipLevels,
1879 0u, // uint32_t baseArray
1880 1u, // uint32_t arraySize
1881 }};
1882
1883 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1884 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 0,
1885 (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrier);
1886 }
1887
1888 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1889 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1890
1891 if (m_data.payloadSC == SC_PHYSBUFFER)
1892 {
1893 addrInfo.buffer = **buffers[0];
1894 VkDeviceAddress addr = vk.getBufferDeviceAddress(device, &addrInfo);
1895 vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages, 0, sizeof(VkDeviceSize), &addr);
1896 }
1897 if (m_data.guardSC == SC_PHYSBUFFER)
1898 {
1899 addrInfo.buffer = **buffers[1];
1900 VkDeviceAddress addr = vk.getBufferDeviceAddress(device, &addrInfo);
1901 vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages, 8, sizeof(VkDeviceSize), &addr);
1902 }
1903
1904 for (uint32_t iters = 0; iters < 50; ++iters)
1905 {
1906 for (uint32_t i = 0; i < 2; ++i)
1907 {
1908 if (buffers[i])
1909 vk.cmdFillBuffer(*cmdBuffer, **buffers[i], 0, bufferSizes[i], 0);
1910 if (images[i])
1911 vk.cmdClearColorImage(*cmdBuffer, **images[i], VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1,
1912 &range);
1913 }
1914
1915 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1916 memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1917 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages, 0, 1, &memBarrier, 0,
1918 DE_NULL, 0, DE_NULL);
1919
1920 if (m_data.stage == STAGE_COMPUTE)
1921 {
1922 vk.cmdDispatch(*cmdBuffer, NUM_WORKGROUP_EACH_DIM, NUM_WORKGROUP_EACH_DIM, 1);
1923 }
1924 else
1925 {
1926 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
1927 makeRect2D(DIM * NUM_WORKGROUP_EACH_DIM, DIM * NUM_WORKGROUP_EACH_DIM), 0, DE_NULL,
1928 VK_SUBPASS_CONTENTS_INLINE);
1929 // Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1930 if (m_data.stage == STAGE_VERTEX)
1931 {
1932 vk.cmdDraw(*cmdBuffer, DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM, 1u, 0u, 0u);
1933 }
1934 else
1935 {
1936 vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1937 }
1938 endRenderPass(vk, *cmdBuffer);
1939 }
1940
1941 memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1942 memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1943 vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &memBarrier, 0,
1944 DE_NULL, 0, DE_NULL);
1945 }
1946
1947 if (x == NUM_SUBMITS - 1)
1948 {
1949 vk.cmdCopyBuffer(*cmdBuffer, **buffers[2], **copyBuffer, 1, ©Params);
1950 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1951 memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
1952 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1,
1953 &memBarrier, 0, DE_NULL, 0, DE_NULL);
1954 }
1955
1956 endCommandBuffer(vk, *cmdBuffer);
1957
1958 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1959
1960 m_context.resetCommandPoolForVKSC(device, *cmdPool);
1961 }
1962
1963 tcu::TestLog &log = m_context.getTestContext().getLog();
1964
1965 uint32_t *ptr = (uint32_t *)copyBuffer->getAllocation().getHostPtr();
1966 invalidateAlloc(vk, device, copyBuffer->getAllocation());
1967 qpTestResult res = QP_TEST_RESULT_PASS;
1968
1969 uint32_t numErrors = 0;
1970 for (uint32_t i = 0; i < NUM_INVOCATIONS; ++i)
1971 {
1972 if (ptr[i] != 0)
1973 {
1974 if (numErrors < 256)
1975 {
1976 log << tcu::TestLog::Message << "Failed invocation: " << i << tcu::TestLog::EndMessage;
1977 }
1978 numErrors++;
1979 res = QP_TEST_RESULT_FAIL;
1980 }
1981 }
1982
1983 if (numErrors)
1984 {
1985 log << tcu::TestLog::Message << "Total Errors: " << numErrors << tcu::TestLog::EndMessage;
1986 }
1987
1988 return tcu::TestStatus(res, qpGetTestResultName(res));
1989 }
1990
1991 #ifndef CTS_USES_VULKANSC
checkPermutedIndexTestSupport(Context & context,std::string testName)1992 void checkPermutedIndexTestSupport(Context &context, std::string testName)
1993 {
1994 DE_UNREF(testName);
1995
1996 const auto maxComputeWorkGroupCount = context.getDeviceProperties().limits.maxComputeWorkGroupCount;
1997 const auto maxComputeWorkGroupSize = context.getDeviceProperties().limits.maxComputeWorkGroupSize;
1998 const auto maxComputeWorkGroupInvocations = context.getDeviceProperties().limits.maxComputeWorkGroupInvocations;
1999
2000 if (maxComputeWorkGroupCount[0] < 256u)
2001 TCU_THROW(NotSupportedError, "Minimum of 256 required for maxComputeWorkGroupCount.x");
2002
2003 if (maxComputeWorkGroupSize[0] < 256u)
2004 TCU_THROW(NotSupportedError, "Minimum of 256 required for maxComputeWorkGroupSize.x");
2005
2006 if (maxComputeWorkGroupInvocations < 256u)
2007 TCU_THROW(NotSupportedError, "Minimum of 256 required for maxComputeWorkGroupInvocations");
2008 }
2009
createPermutedIndexTests(tcu::TestContext & testCtx)2010 tcu::TestCaseGroup *createPermutedIndexTests(tcu::TestContext &testCtx)
2011 {
2012 de::MovePtr<tcu::TestCaseGroup> permutedIndex(new tcu::TestCaseGroup(testCtx, "permuted_index"));
2013 static const char dataDir[] = "memory_model/message_passing/permuted_index";
2014 static const std::string cases[] = {"barrier", "release_acquire", "release_acquire_atomic_payload"};
2015
2016 for (const auto &test : cases)
2017 {
2018 cts_amber::AmberTestCase *testCase =
2019 cts_amber::createAmberTestCase(testCtx, test.c_str(), dataDir, (test + ".amber").c_str());
2020 testCase->setCheckSupportCallback(checkPermutedIndexTestSupport);
2021
2022 permutedIndex->addChild(testCase);
2023 }
2024
2025 return permutedIndex.release();
2026 }
2027 #endif // CTS_USES_VULKANSC
2028
2029 } // namespace
2030
createTests(tcu::TestContext & testCtx,const std::string & name)2031 tcu::TestCaseGroup *createTests(tcu::TestContext &testCtx, const std::string &name)
2032 {
2033 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, name.c_str()));
2034
2035 typedef struct
2036 {
2037 uint32_t value;
2038 const char *name;
2039 } TestGroupCase;
2040
2041 TestGroupCase ttCases[] = {
2042 {TT_MP, "message_passing"},
2043 {TT_WAR, "write_after_read"},
2044 };
2045
2046 TestGroupCase core11Cases[] = {
2047 // Supported by Vulkan1.1
2048 {1, "core11"},
2049 // Requires VK_KHR_vulkan_memory_model extension
2050 {0, "ext"},
2051 };
2052
2053 TestGroupCase dtCases[] = {
2054 // uint32_t atomics
2055 {DATA_TYPE_UINT, "u32"},
2056 // uint64_t atomics
2057 {DATA_TYPE_UINT64, "u64"},
2058 // float32 atomics
2059 {DATA_TYPE_FLOAT32, "f32"},
2060 // float64 atomics
2061 {DATA_TYPE_FLOAT64, "f64"},
2062 };
2063
2064 TestGroupCase cohCases[] = {
2065 // coherent payload variable
2066 {1, "coherent"},
2067 // noncoherent payload variable
2068 {0, "noncoherent"},
2069 };
2070
2071 TestGroupCase stCases[] = {
2072 // release fence, acquire fence
2073 {ST_FENCE_FENCE, "fence_fence"},
2074 // release fence, atomic acquire
2075 {ST_FENCE_ATOMIC, "fence_atomic"},
2076 // atomic release, acquire fence
2077 {ST_ATOMIC_FENCE, "atomic_fence"},
2078 // atomic release, atomic acquire
2079 {ST_ATOMIC_ATOMIC, "atomic_atomic"},
2080 // control barrier
2081 {ST_CONTROL_BARRIER, "control_barrier"},
2082 // control barrier with release/acquire
2083 {ST_CONTROL_AND_MEMORY_BARRIER, "control_and_memory_barrier"},
2084 };
2085
2086 TestGroupCase rmwCases[] = {
2087 {0, "atomicwrite"},
2088 {1, "atomicrmw"},
2089 };
2090
2091 TestGroupCase scopeCases[] = {
2092 {SCOPE_DEVICE, "device"},
2093 {SCOPE_QUEUEFAMILY, "queuefamily"},
2094 {SCOPE_WORKGROUP, "workgroup"},
2095 {SCOPE_SUBGROUP, "subgroup"},
2096 };
2097
2098 TestGroupCase plCases[] = {
2099 // payload variable in non-local memory
2100 {0, "payload_nonlocal"},
2101 // payload variable in local memory
2102 {1, "payload_local"},
2103 };
2104
2105 TestGroupCase pscCases[] = {
2106 // payload variable in buffer memory
2107 {SC_BUFFER, "buffer"},
2108 // payload variable in image memory
2109 {SC_IMAGE, "image"},
2110 // payload variable in workgroup memory
2111 {SC_WORKGROUP, "workgroup"},
2112 // payload variable in physical storage buffer memory
2113 {SC_PHYSBUFFER, "physbuffer"},
2114 };
2115
2116 TestGroupCase glCases[] = {
2117 // guard variable in non-local memory
2118 {0, "guard_nonlocal"},
2119 // guard variable in local memory
2120 {1, "guard_local"},
2121 };
2122
2123 TestGroupCase gscCases[] = {
2124 // guard variable in buffer memory
2125 {SC_BUFFER, "buffer"},
2126 // guard variable in image memory
2127 {SC_IMAGE, "image"},
2128 // guard variable in workgroup memory
2129 {SC_WORKGROUP, "workgroup"},
2130 // guard variable in physical storage buffer memory
2131 {SC_PHYSBUFFER, "physbuffer"},
2132 };
2133
2134 TestGroupCase stageCases[] = {
2135 {STAGE_COMPUTE, "comp"},
2136 {STAGE_VERTEX, "vert"},
2137 {STAGE_FRAGMENT, "frag"},
2138 };
2139
2140 for (int ttNdx = 0; ttNdx < DE_LENGTH_OF_ARRAY(ttCases); ttNdx++)
2141 {
2142 de::MovePtr<tcu::TestCaseGroup> ttGroup(new tcu::TestCaseGroup(testCtx, ttCases[ttNdx].name));
2143
2144 #ifndef CTS_USES_VULKANSC
2145 // Permuted index tests for message passing.
2146 if (ttCases[ttNdx].value == TT_MP)
2147 ttGroup->addChild(createPermutedIndexTests(testCtx));
2148 #endif // CTS_USES_VULKANSC
2149
2150 for (int core11Ndx = 0; core11Ndx < DE_LENGTH_OF_ARRAY(core11Cases); core11Ndx++)
2151 {
2152 de::MovePtr<tcu::TestCaseGroup> core11Group(new tcu::TestCaseGroup(testCtx, core11Cases[core11Ndx].name));
2153 for (int dtNdx = 0; dtNdx < DE_LENGTH_OF_ARRAY(dtCases); dtNdx++)
2154 {
2155 de::MovePtr<tcu::TestCaseGroup> dtGroup(new tcu::TestCaseGroup(testCtx, dtCases[dtNdx].name));
2156 for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
2157 {
2158 de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name));
2159 for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
2160 {
2161 de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name));
2162 for (int rmwNdx = 0; rmwNdx < DE_LENGTH_OF_ARRAY(rmwCases); rmwNdx++)
2163 {
2164 de::MovePtr<tcu::TestCaseGroup> rmwGroup(
2165 new tcu::TestCaseGroup(testCtx, rmwCases[rmwNdx].name));
2166 for (int scopeNdx = 0; scopeNdx < DE_LENGTH_OF_ARRAY(scopeCases); scopeNdx++)
2167 {
2168 de::MovePtr<tcu::TestCaseGroup> scopeGroup(
2169 new tcu::TestCaseGroup(testCtx, scopeCases[scopeNdx].name));
2170 for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
2171 {
2172 de::MovePtr<tcu::TestCaseGroup> plGroup(
2173 new tcu::TestCaseGroup(testCtx, plCases[plNdx].name));
2174 for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
2175 {
2176 de::MovePtr<tcu::TestCaseGroup> pscGroup(
2177 new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name));
2178 for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
2179 {
2180 de::MovePtr<tcu::TestCaseGroup> glGroup(
2181 new tcu::TestCaseGroup(testCtx, glCases[glNdx].name));
2182 for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
2183 {
2184 de::MovePtr<tcu::TestCaseGroup> gscGroup(
2185 new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name));
2186 for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases);
2187 stageNdx++)
2188 {
2189 CaseDef c = {
2190 !!plCases[plNdx].value, // bool payloadMemLocal;
2191 !!glCases[glNdx].value, // bool guardMemLocal;
2192 !!cohCases[cohNdx].value, // bool coherent;
2193 !!core11Cases[core11Ndx].value, // bool core11;
2194 !!rmwCases[rmwNdx].value, // bool atomicRMW;
2195 (TestType)ttCases[ttNdx].value, // TestType testType;
2196 (StorageClass)pscCases[pscNdx].value, // StorageClass payloadSC;
2197 (StorageClass)gscCases[gscNdx].value, // StorageClass guardSC;
2198 (Scope)scopeCases[scopeNdx].value, // Scope scope;
2199 (SyncType)stCases[stNdx].value, // SyncType syncType;
2200 (Stage)stageCases[stageNdx].value, // Stage stage;
2201 (DataType)dtCases[dtNdx].value, // DataType dataType;
2202 false, // bool transitive;
2203 false, // bool transitiveVis;
2204 };
2205
2206 // Mustpass11 tests should only exercise things we expect to work on
2207 // existing implementations. Exclude noncoherent tests which require
2208 // new extensions, and assume atomic synchronization wouldn't work
2209 // (i.e. atomics may be implemented as relaxed atomics). Exclude
2210 // queuefamily scope which doesn't exist in Vulkan 1.1. Exclude
2211 // physical storage buffer which doesn't support the legacy decorations.
2212 if (c.core11 &&
2213 (c.coherent == 0 || c.syncType == ST_FENCE_ATOMIC ||
2214 c.syncType == ST_ATOMIC_FENCE ||
2215 c.syncType == ST_ATOMIC_ATOMIC ||
2216 c.dataType == DATA_TYPE_UINT64 ||
2217 c.dataType == DATA_TYPE_FLOAT64 ||
2218 c.scope == SCOPE_QUEUEFAMILY || c.payloadSC == SC_PHYSBUFFER ||
2219 c.guardSC == SC_PHYSBUFFER))
2220 {
2221 continue;
2222 }
2223
2224 if (c.stage != STAGE_COMPUTE && c.scope == SCOPE_WORKGROUP)
2225 {
2226 continue;
2227 }
2228
2229 // Don't exercise local and non-local for workgroup memory
2230 // Also don't exercise workgroup memory for non-compute stages
2231 if (c.payloadSC == SC_WORKGROUP &&
2232 (c.payloadMemLocal != 0 || c.stage != STAGE_COMPUTE))
2233 {
2234 continue;
2235 }
2236 if (c.guardSC == SC_WORKGROUP &&
2237 (c.guardMemLocal != 0 || c.stage != STAGE_COMPUTE))
2238 {
2239 continue;
2240 }
2241 // Can't do control barrier with larger than workgroup scope, or non-compute stages
2242 if ((c.syncType == ST_CONTROL_BARRIER ||
2243 c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
2244 (c.scope == SCOPE_DEVICE || c.scope == SCOPE_QUEUEFAMILY ||
2245 c.stage != STAGE_COMPUTE))
2246 {
2247 continue;
2248 }
2249
2250 // Limit RMW atomics to ST_ATOMIC_ATOMIC, just to reduce # of test cases
2251 if (c.atomicRMW && c.syncType != ST_ATOMIC_ATOMIC)
2252 {
2253 continue;
2254 }
2255
2256 // uint64/float32/float64 testing is primarily for atomics, so only test it for ST_ATOMIC_ATOMIC
2257 const bool atomicTesting = (c.dataType == DATA_TYPE_UINT64 ||
2258 c.dataType == DATA_TYPE_FLOAT32 ||
2259 c.dataType == DATA_TYPE_FLOAT64);
2260 if (atomicTesting && c.syncType != ST_ATOMIC_ATOMIC)
2261 {
2262 continue;
2263 }
2264
2265 // No 64-bit image types, so skip tests with both payload and guard in image memory
2266 if (c.dataType == DATA_TYPE_UINT64 && c.payloadSC == SC_IMAGE &&
2267 c.guardSC == SC_IMAGE)
2268 {
2269 continue;
2270 }
2271
2272 // No support for atomic operations on 64-bit floating point images
2273 if (c.dataType == DATA_TYPE_FLOAT64 &&
2274 (c.payloadSC == SC_IMAGE || c.guardSC == SC_IMAGE))
2275 {
2276 continue;
2277 }
2278 // Control barrier tests don't use a guard variable, so only run them with gsc,gl==0
2279 if ((c.syncType == ST_CONTROL_BARRIER ||
2280 c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
2281 (c.guardSC != 0 || c.guardMemLocal != 0))
2282 {
2283 continue;
2284 }
2285
2286 gscGroup->addChild(
2287 new MemoryModelTestCase(testCtx, stageCases[stageNdx].name, c));
2288 }
2289 glGroup->addChild(gscGroup.release());
2290 }
2291 pscGroup->addChild(glGroup.release());
2292 }
2293 plGroup->addChild(pscGroup.release());
2294 }
2295 scopeGroup->addChild(plGroup.release());
2296 }
2297 rmwGroup->addChild(scopeGroup.release());
2298 }
2299 stGroup->addChild(rmwGroup.release());
2300 }
2301 cohGroup->addChild(stGroup.release());
2302 }
2303 dtGroup->addChild(cohGroup.release());
2304 }
2305 core11Group->addChild(dtGroup.release());
2306 }
2307 ttGroup->addChild(core11Group.release());
2308 }
2309 group->addChild(ttGroup.release());
2310 }
2311
2312 TestGroupCase transVisCases[] = {
2313 // destination invocation acquires
2314 {0, "nontransvis"},
2315 // invocation 0,0 acquires
2316 {1, "transvis"},
2317 };
2318
2319 de::MovePtr<tcu::TestCaseGroup> transGroup(new tcu::TestCaseGroup(testCtx, "transitive"));
2320 for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
2321 {
2322 de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name));
2323 for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
2324 {
2325 de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name));
2326 for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
2327 {
2328 de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name));
2329 for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
2330 {
2331 de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name));
2332 for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
2333 {
2334 de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name));
2335 for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
2336 {
2337 de::MovePtr<tcu::TestCaseGroup> gscGroup(
2338 new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name));
2339 for (int visNdx = 0; visNdx < DE_LENGTH_OF_ARRAY(transVisCases); visNdx++)
2340 {
2341 CaseDef c = {
2342 !!plCases[plNdx].value, // bool payloadMemLocal;
2343 !!glCases[glNdx].value, // bool guardMemLocal;
2344 !!cohCases[cohNdx].value, // bool coherent;
2345 false, // bool core11;
2346 false, // bool atomicRMW;
2347 TT_MP, // TestType testType;
2348 (StorageClass)pscCases[pscNdx].value, // StorageClass payloadSC;
2349 (StorageClass)gscCases[gscNdx].value, // StorageClass guardSC;
2350 SCOPE_DEVICE, // Scope scope;
2351 (SyncType)stCases[stNdx].value, // SyncType syncType;
2352 STAGE_COMPUTE, // Stage stage;
2353 DATA_TYPE_UINT, // DataType dataType;
2354 true, // bool transitive;
2355 !!transVisCases[visNdx].value, // bool transitiveVis;
2356 };
2357 if (c.payloadSC == SC_WORKGROUP || c.guardSC == SC_WORKGROUP)
2358 {
2359 continue;
2360 }
2361 if (c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
2362 {
2363 continue;
2364 }
2365 gscGroup->addChild(new MemoryModelTestCase(testCtx, transVisCases[visNdx].name, c));
2366 }
2367 glGroup->addChild(gscGroup.release());
2368 }
2369 pscGroup->addChild(glGroup.release());
2370 }
2371 plGroup->addChild(pscGroup.release());
2372 }
2373 stGroup->addChild(plGroup.release());
2374 }
2375 cohGroup->addChild(stGroup.release());
2376 }
2377 transGroup->addChild(cohGroup.release());
2378 }
2379 group->addChild(transGroup.release());
2380
2381 // Padding tests.
2382 group->addChild(createPaddingTests(testCtx));
2383 // Shared memory layout tests.
2384 group->addChild(createSharedMemoryLayoutTests(testCtx));
2385
2386 return group.release();
2387 }
2388
2389 } // namespace MemoryModel
2390 } // namespace vkt
2391