1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017-2019 The Khronos Group Inc.
6  * Copyright (c) 2018-2019 NVIDIA Corporation
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Tests for VK_EXT_buffer_device_address.
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktBindingBufferDeviceAddressTests.hpp"
26 
27 #include "vkBufferWithMemory.hpp"
28 #include "vkImageWithMemory.hpp"
29 #include "vkQueryUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkObjUtil.hpp"
34 
35 #include "vktTestGroupUtil.hpp"
36 #include "vktTestCase.hpp"
37 
38 #include "deDefs.h"
39 #include "deMath.h"
40 #include "deRandom.h"
41 #include "deRandom.hpp"
42 #include "deSharedPtr.hpp"
43 #include "deString.h"
44 
45 #include "tcuTestCase.hpp"
46 #include "tcuTestLog.hpp"
47 
48 #include <string>
49 #include <sstream>
50 
51 namespace vkt
52 {
53 namespace BindingModel
54 {
55 namespace
56 {
57 using namespace vk;
58 using namespace std;
59 
60 typedef de::MovePtr<Unique<VkBuffer>> VkBufferSp;
61 typedef de::MovePtr<Allocation> AllocationSp;
62 
63 static const uint32_t DIM = 8;
64 
65 typedef enum
66 {
67     BASE_UBO = 0,
68     BASE_SSBO,
69 } Base;
70 
71 #define ENABLE_RAYTRACING 0
72 
73 typedef enum
74 {
75     STAGE_COMPUTE = 0,
76     STAGE_VERTEX,
77     STAGE_FRAGMENT,
78     STAGE_RAYGEN,
79 } Stage;
80 
81 typedef enum
82 {
83     BT_SINGLE = 0,
84     BT_MULTI,
85     BT_REPLAY,
86 } BufType;
87 
88 typedef enum
89 {
90     LAYOUT_STD140 = 0,
91     LAYOUT_SCALAR,
92 } Layout;
93 
94 typedef enum
95 {
96     CONVERT_NONE = 0,
97     CONVERT_UINT64,
98     CONVERT_UVEC2,
99     CONVERT_U64CMP,
100     CONVERT_UVEC2CMP,
101     CONVERT_UVEC2TOU64,
102     CONVERT_U64TOUVEC2,
103 } Convert;
104 
105 typedef enum
106 {
107     OFFSET_ZERO = 0,
108     OFFSET_NONZERO,
109 } MemoryOffset;
110 
111 struct CaseDef
112 {
113     uint32_t set;
114     uint32_t depth;
115     Base base;
116     Stage stage;
117     Convert convertUToPtr;
118     bool storeInLocal;
119     BufType bufType;
120     Layout layout;
121     MemoryOffset memoryOffset;
122 };
123 
124 class BufferAddressTestInstance : public TestInstance
125 {
126 public:
127     BufferAddressTestInstance(Context &context, const CaseDef &data);
128     ~BufferAddressTestInstance(void);
129     tcu::TestStatus iterate(void);
130     virtual void fillBuffer(const std::vector<uint8_t *> &cpuAddrs, const std::vector<uint64_t> &gpuAddrs,
131                             uint32_t bufNum, uint32_t curDepth) const;
132 
133 private:
134     CaseDef m_data;
135 
136     enum
137     {
138         WIDTH  = 256,
139         HEIGHT = 256
140     };
141 };
142 
BufferAddressTestInstance(Context & context,const CaseDef & data)143 BufferAddressTestInstance::BufferAddressTestInstance(Context &context, const CaseDef &data)
144     : vkt::TestInstance(context)
145     , m_data(data)
146 {
147 }
148 
~BufferAddressTestInstance(void)149 BufferAddressTestInstance::~BufferAddressTestInstance(void)
150 {
151 }
152 
153 class BufferAddressTestCase : public TestCase
154 {
155 public:
156     BufferAddressTestCase(tcu::TestContext &context, const char *name, const CaseDef data);
157     ~BufferAddressTestCase(void);
158     virtual void initPrograms(SourceCollections &programCollection) const;
159     virtual TestInstance *createInstance(Context &context) const;
160     virtual void checkSupport(Context &context) const;
161     virtual void checkBuffer(std::stringstream &checks, uint32_t bufNum, uint32_t curDepth,
162                              const std::string &prefix) const;
163 
164 private:
165     CaseDef m_data;
166 };
167 
BufferAddressTestCase(tcu::TestContext & context,const char * name,const CaseDef data)168 BufferAddressTestCase::BufferAddressTestCase(tcu::TestContext &context, const char *name, const CaseDef data)
169     : vkt::TestCase(context, name)
170     , m_data(data)
171 {
172 }
173 
~BufferAddressTestCase(void)174 BufferAddressTestCase::~BufferAddressTestCase(void)
175 {
176 }
177 
checkSupport(Context & context) const178 void BufferAddressTestCase::checkSupport(Context &context) const
179 {
180     if (!context.isBufferDeviceAddressSupported())
181         TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
182 
183     if (m_data.stage == STAGE_VERTEX && !context.getDeviceFeatures().vertexPipelineStoresAndAtomics)
184         TCU_THROW(NotSupportedError, "Vertex pipeline stores and atomics not supported");
185 
186     if (m_data.set >= context.getDeviceProperties().limits.maxBoundDescriptorSets)
187         TCU_THROW(NotSupportedError, "descriptor set number not supported");
188 
189 #ifndef CTS_USES_VULKANSC
190     bool isBufferDeviceAddressWithCaptureReplaySupported =
191         (context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") &&
192          context.getBufferDeviceAddressFeatures().bufferDeviceAddressCaptureReplay) ||
193         (context.isDeviceFunctionalitySupported("VK_EXT_buffer_device_address") &&
194          context.getBufferDeviceAddressFeaturesEXT().bufferDeviceAddressCaptureReplay);
195 #else
196     bool isBufferDeviceAddressWithCaptureReplaySupported =
197         (context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") &&
198          context.getBufferDeviceAddressFeatures().bufferDeviceAddressCaptureReplay);
199 #endif
200 
201     if (m_data.bufType == BT_REPLAY && !isBufferDeviceAddressWithCaptureReplaySupported)
202         TCU_THROW(NotSupportedError, "Capture/replay of physical storage buffer pointers not supported");
203 
204     if (m_data.layout == LAYOUT_SCALAR && !context.getScalarBlockLayoutFeatures().scalarBlockLayout)
205         TCU_THROW(NotSupportedError, "Scalar block layout not supported");
206 
207 #if ENABLE_RAYTRACING
208     if (m_data.stage == STAGE_RAYGEN && !context.isDeviceFunctionalitySupported("VK_NV_ray_tracing"))
209     {
210         TCU_THROW(NotSupportedError, "Ray tracing not supported");
211     }
212 #endif
213 
214     const bool needsInt64 = (m_data.convertUToPtr == CONVERT_UINT64 || m_data.convertUToPtr == CONVERT_U64CMP ||
215                              m_data.convertUToPtr == CONVERT_U64TOUVEC2 || m_data.convertUToPtr == CONVERT_UVEC2TOU64);
216 
217     const bool needsKHR = (m_data.convertUToPtr == CONVERT_UVEC2 || m_data.convertUToPtr == CONVERT_UVEC2CMP ||
218                            m_data.convertUToPtr == CONVERT_U64TOUVEC2 || m_data.convertUToPtr == CONVERT_UVEC2TOU64);
219 
220     if (needsInt64 && !context.getDeviceFeatures().shaderInt64)
221         TCU_THROW(NotSupportedError, "Int64 not supported");
222     if (needsKHR && !context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
223         TCU_THROW(NotSupportedError, "VK_KHR_buffer_device_address not supported");
224 }
225 
checkBuffer(std::stringstream & checks,uint32_t bufNum,uint32_t curDepth,const std::string & prefix) const226 void BufferAddressTestCase::checkBuffer(std::stringstream &checks, uint32_t bufNum, uint32_t curDepth,
227                                         const std::string &prefix) const
228 {
229     string newPrefix = prefix;
230     if (curDepth > 0)
231     {
232         if (m_data.convertUToPtr == CONVERT_UINT64 || m_data.convertUToPtr == CONVERT_UVEC2TOU64)
233             newPrefix = "T1(uint64_t(T1(" + newPrefix + ")))";
234         else if (m_data.convertUToPtr == CONVERT_UVEC2 || m_data.convertUToPtr == CONVERT_U64TOUVEC2)
235             newPrefix = "T1(uvec2(T1(" + newPrefix + ")))";
236     }
237 
238     if (m_data.storeInLocal && curDepth != 0)
239     {
240         std::string localName = "l" + de::toString(bufNum);
241         checks << "   " << ((bufNum & 1) ? "restrict " : "") << "T1 " << localName << " = " << newPrefix << ";\n";
242         newPrefix = localName;
243     }
244 
245     checks << "   accum |= " << newPrefix << ".a[0] - " << bufNum * 3 + 0 << ";\n";
246     checks << "   accum |= " << newPrefix << ".a[pc.identity[1]] - " << bufNum * 3 + 1 << ";\n";
247     checks << "   accum |= " << newPrefix << ".b - " << bufNum * 3 + 2 << ";\n";
248     checks << "   accum |= int(" << newPrefix << ".e[0][0] - " << bufNum * 3 + 3 << ");\n";
249     checks << "   accum |= int(" << newPrefix << ".e[0][1] - " << bufNum * 3 + 5 << ");\n";
250     checks << "   accum |= int(" << newPrefix << ".e[1][0] - " << bufNum * 3 + 4 << ");\n";
251     checks << "   accum |= int(" << newPrefix << ".e[1][1] - " << bufNum * 3 + 6 << ");\n";
252 
253     if (m_data.layout == LAYOUT_SCALAR)
254     {
255         checks << "   f = " << newPrefix << ".f;\n";
256         checks << "   accum |= f.x - " << bufNum * 3 + 7 << ";\n";
257         checks << "   accum |= f.y - " << bufNum * 3 + 8 << ";\n";
258         checks << "   accum |= f.z - " << bufNum * 3 + 9 << ";\n";
259     }
260 
261     const std::string localPrefix = "l" + de::toString(bufNum);
262 
263     if (m_data.convertUToPtr == CONVERT_U64CMP || m_data.convertUToPtr == CONVERT_UVEC2CMP)
264     {
265         const std::string type = ((m_data.convertUToPtr == CONVERT_U64CMP) ? "uint64_t" : "uvec2");
266 
267         checks << "   " << type << " " << localPrefix << "c0 = " << type << "(" << newPrefix << ".c[0]);\n";
268         checks << "   " << type << " " << localPrefix << "c1 = " << type << "(" << newPrefix
269                << ".c[pc.identity[1]]);\n";
270         checks << "   " << type << " " << localPrefix << "d  = " << type << "(" << newPrefix << ".d);\n";
271     }
272 
273     if (curDepth != m_data.depth)
274     {
275         // Check non-null pointers and inequality among them.
276         if (m_data.convertUToPtr == CONVERT_U64CMP)
277         {
278             checks << "   if (" << localPrefix << "c0 == zero ||\n"
279                    << "       " << localPrefix << "c1 == zero ||\n"
280                    << "       " << localPrefix << "d  == zero ||\n"
281                    << "       " << localPrefix << "c0 == " << localPrefix << "c1 ||\n"
282                    << "       " << localPrefix << "c1 == " << localPrefix << "d  ||\n"
283                    << "       " << localPrefix << "c0 == " << localPrefix << "d  ) {\n"
284                    << "     accum |= 1;\n"
285                    << "   }\n";
286         }
287         else if (m_data.convertUToPtr == CONVERT_UVEC2CMP)
288         {
289             checks << "   if (all(equal(" << localPrefix << "c0, zero)) ||\n"
290                    << "       all(equal(" << localPrefix << "c1, zero)) ||\n"
291                    << "       all(equal(" << localPrefix << "d , zero)) ||\n"
292                    << "       all(equal(" << localPrefix << "c0, " << localPrefix << "c1)) ||\n"
293                    << "       all(equal(" << localPrefix << "c1, " << localPrefix << "d )) ||\n"
294                    << "       all(equal(" << localPrefix << "c0, " << localPrefix << "d )) ) {\n"
295                    << "     accum |= 1;\n"
296                    << "   }\n";
297         }
298 
299         checkBuffer(checks, bufNum * 3 + 1, curDepth + 1, newPrefix + ".c[0]");
300         checkBuffer(checks, bufNum * 3 + 2, curDepth + 1, newPrefix + ".c[pc.identity[1]]");
301         checkBuffer(checks, bufNum * 3 + 3, curDepth + 1, newPrefix + ".d");
302     }
303     else
304     {
305         // Check null pointers nonexplicitly.
306         if (m_data.convertUToPtr == CONVERT_U64CMP)
307         {
308             checks << "   if (!(" << localPrefix << "c0 == " << localPrefix << "c1 &&\n"
309                    << "         " << localPrefix << "c1 == " << localPrefix << "d  &&\n"
310                    << "         " << localPrefix << "c0 == " << localPrefix << "d  )) {\n"
311                    << "     accum |= 1;\n"
312                    << "   }\n";
313         }
314         else if (m_data.convertUToPtr == CONVERT_UVEC2CMP)
315         {
316             checks << "   if (!(all(equal(" << localPrefix << "c0, " << localPrefix << "c1)) &&\n"
317                    << "         all(equal(" << localPrefix << "c1, " << localPrefix << "d )) &&\n"
318                    << "         all(equal(" << localPrefix << "c0, " << localPrefix << "d )) )) {\n"
319                    << "     accum |= 1;\n"
320                    << "   }\n";
321         }
322     }
323 }
324 
fillBuffer(const std::vector<uint8_t * > & cpuAddrs,const std::vector<uint64_t> & gpuAddrs,uint32_t bufNum,uint32_t curDepth) const325 void BufferAddressTestInstance::fillBuffer(const std::vector<uint8_t *> &cpuAddrs,
326                                            const std::vector<uint64_t> &gpuAddrs, uint32_t bufNum,
327                                            uint32_t curDepth) const
328 {
329     uint8_t *buf = cpuAddrs[bufNum];
330 
331     uint32_t aStride   = m_data.layout == LAYOUT_SCALAR ? 1 : 4; // (in deUint32s)
332     uint32_t cStride   = m_data.layout == LAYOUT_SCALAR ? 1 : 2; // (in deUint64s)
333     uint32_t matStride = m_data.layout == LAYOUT_SCALAR ? 2 : 4; // (in floats)
334 
335     // a
336     ((uint32_t *)(buf + 0))[0]       = bufNum * 3 + 0;
337     ((uint32_t *)(buf + 0))[aStride] = bufNum * 3 + 1;
338     // b
339     ((uint32_t *)(buf + 32))[0] = bufNum * 3 + 2;
340     if (m_data.layout == LAYOUT_SCALAR)
341     {
342         // f
343         ((uint32_t *)(buf + 36))[0] = bufNum * 3 + 7;
344         ((uint32_t *)(buf + 36))[1] = bufNum * 3 + 8;
345         ((uint32_t *)(buf + 36))[2] = bufNum * 3 + 9;
346     }
347     // e
348     ((float *)(buf + 96))[0]             = (float)(bufNum * 3 + 3);
349     ((float *)(buf + 96))[1]             = (float)(bufNum * 3 + 4);
350     ((float *)(buf + 96))[matStride]     = (float)(bufNum * 3 + 5);
351     ((float *)(buf + 96))[matStride + 1] = (float)(bufNum * 3 + 6);
352 
353     if (curDepth != m_data.depth)
354     {
355         // c
356         ((uint64_t *)(buf + 48))[0]       = gpuAddrs[bufNum * 3 + 1];
357         ((uint64_t *)(buf + 48))[cStride] = gpuAddrs[bufNum * 3 + 2];
358         // d
359         ((uint64_t *)(buf + 80))[0] = gpuAddrs[bufNum * 3 + 3];
360 
361         fillBuffer(cpuAddrs, gpuAddrs, bufNum * 3 + 1, curDepth + 1);
362         fillBuffer(cpuAddrs, gpuAddrs, bufNum * 3 + 2, curDepth + 1);
363         fillBuffer(cpuAddrs, gpuAddrs, bufNum * 3 + 3, curDepth + 1);
364     }
365     else
366     {
367         // c
368         ((uint64_t *)(buf + 48))[0]       = 0ull;
369         ((uint64_t *)(buf + 48))[cStride] = 0ull;
370         // d
371         ((uint64_t *)(buf + 80))[0] = 0ull;
372     }
373 }
374 
initPrograms(SourceCollections & programCollection) const375 void BufferAddressTestCase::initPrograms(SourceCollections &programCollection) const
376 {
377     std::stringstream decls, checks, localDecls;
378 
379     std::string baseStorage   = m_data.base == BASE_UBO ? "uniform" : "buffer";
380     std::string memberStorage = "buffer";
381 
382     decls << "layout(r32ui, set = " << m_data.set << ", binding = 0) uniform uimage2D image0_0;\n";
383     decls << "layout(buffer_reference) " << memberStorage << " T1;\n";
384 
385     std::string refType;
386     switch (m_data.convertUToPtr)
387     {
388     case CONVERT_UINT64:
389     case CONVERT_U64TOUVEC2:
390         refType = "uint64_t";
391         break;
392 
393     case CONVERT_UVEC2:
394     case CONVERT_UVEC2TOU64:
395         refType = "uvec2";
396         break;
397 
398     default:
399         refType = "T1";
400         break;
401     }
402 
403     std::string layout = m_data.layout == LAYOUT_SCALAR ? "scalar" : "std140";
404     decls
405         << "layout(set = " << m_data.set << ", binding = 1, " << layout << ") " << baseStorage
406         << " T2 {\n"
407            "   layout(offset = 0) int a[2]; // stride = 4 for scalar, 16 for std140\n"
408            "   layout(offset = 32) int b;\n"
409         << ((m_data.layout == LAYOUT_SCALAR) ? "   layout(offset = 36) ivec3 f;\n" : "") << "   layout(offset = 48) "
410         << refType
411         << " c[2]; // stride = 8 for scalar, 16 for std140\n"
412            "   layout(offset = 80) "
413         << refType
414         << " d;\n"
415            "   layout(offset = 96, row_major) mat2 e; // tightly packed for scalar, 16 byte matrix stride for std140\n"
416            "} x;\n";
417     decls
418         << "layout(buffer_reference, " << layout << ") " << memberStorage
419         << " T1 {\n"
420            "   layout(offset = 0) int a[2]; // stride = 4 for scalar, 16 for std140\n"
421            "   layout(offset = 32) int b;\n"
422         << ((m_data.layout == LAYOUT_SCALAR) ? "   layout(offset = 36) ivec3 f;\n" : "") << "   layout(offset = 48) "
423         << refType
424         << " c[2]; // stride = 8 for scalar, 16 for std140\n"
425            "   layout(offset = 80) "
426         << refType
427         << " d;\n"
428            "   layout(offset = 96, row_major) mat2 e; // tightly packed for scalar, 16 byte matrix stride for std140\n"
429            "};\n";
430 
431     if (m_data.convertUToPtr == CONVERT_U64CMP)
432         localDecls << "  uint64_t zero = uint64_t(0);\n";
433     else if (m_data.convertUToPtr == CONVERT_UVEC2CMP)
434         localDecls << "  uvec2 zero = uvec2(0, 0);\n";
435 
436     checkBuffer(checks, 0, 0, "x");
437 
438     std::stringstream pushdecl;
439     pushdecl << "layout (push_constant, std430) uniform Block { int identity[32]; } pc;\n";
440 
441     vk::ShaderBuildOptions::Flags flags = vk::ShaderBuildOptions::Flags(0);
442     if (m_data.layout == LAYOUT_SCALAR)
443         flags = vk::ShaderBuildOptions::FLAG_ALLOW_SCALAR_OFFSETS;
444 
445     // The conversion and comparison in uvec2 form test needs SPIR-V 1.5 for OpBitcast.
446     const vk::SpirvVersion spirvVersion =
447         ((m_data.convertUToPtr == CONVERT_UVEC2CMP) ? vk::SPIRV_VERSION_1_5 : vk::SPIRV_VERSION_1_0);
448 
449     switch (m_data.stage)
450     {
451     default:
452         DE_ASSERT(0); // Fallthrough
453     case STAGE_COMPUTE:
454     {
455         std::stringstream css;
456         css << "#version 450 core\n"
457                "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
458                "#extension GL_EXT_buffer_reference : enable\n"
459                "#extension GL_EXT_scalar_block_layout : enable\n"
460                "#extension GL_EXT_buffer_reference_uvec2 : enable\n"
461             << pushdecl.str() << decls.str()
462             << "layout(local_size_x = 1, local_size_y = 1) in;\n"
463                "void main()\n"
464                "{\n"
465                "  int accum = 0, temp;\n"
466                "  ivec3 f;\n"
467             << localDecls.str() << checks.str()
468             << "  uvec4 color = (accum != 0) ? uvec4(0,0,0,0) : uvec4(1,0,0,1);\n"
469                "  imageStore(image0_0, ivec2(gl_GlobalInvocationID.xy), color);\n"
470                "}\n";
471 
472         programCollection.glslSources.add("test")
473             << glu::ComputeSource(css.str())
474             << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, spirvVersion, flags);
475         break;
476     }
477 #if ENABLE_RAYTRACING
478     case STAGE_RAYGEN:
479     {
480         std::stringstream css;
481         css << "#version 460 core\n"
482                "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
483                "#extension GL_EXT_buffer_reference : enable\n"
484                "#extension GL_EXT_scalar_block_layout : enable\n"
485                "#extension GL_EXT_buffer_reference_uvec2 : enable\n"
486                "#extension GL_NV_ray_tracing : require\n"
487             << pushdecl.str() << decls.str()
488             << "void main()\n"
489                "{\n"
490                "  int accum = 0, temp;\n"
491                "  ivec3 f;\n"
492             << localDecls.str() << checks.str()
493             << "  uvec4 color = (accum != 0) ? uvec4(0,0,0,0) : uvec4(1,0,0,1);\n"
494                "  imageStore(image0_0, ivec2(gl_LaunchIDNV.xy), color);\n"
495                "}\n";
496 
497         programCollection.glslSources.add("test")
498             << glu::RaygenSource(css.str())
499             << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, spirvVersion, flags);
500         break;
501     }
502 #endif
503     case STAGE_VERTEX:
504     {
505         std::stringstream vss;
506         vss << "#version 450 core\n"
507                "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
508                "#extension GL_EXT_buffer_reference : enable\n"
509                "#extension GL_EXT_scalar_block_layout : enable\n"
510                "#extension GL_EXT_buffer_reference_uvec2 : enable\n"
511             << pushdecl.str() << decls.str()
512             << "void main()\n"
513                "{\n"
514                "  int accum = 0, temp;\n"
515                "  ivec3 f;\n"
516             << localDecls.str() << checks.str()
517             << "  uvec4 color = (accum != 0) ? uvec4(0,0,0,0) : uvec4(1,0,0,1);\n"
518                "  imageStore(image0_0, ivec2(gl_VertexIndex % "
519             << DIM << ", gl_VertexIndex / " << DIM
520             << "), color);\n"
521                "  gl_PointSize = 1.0f;\n"
522                "}\n";
523 
524         programCollection.glslSources.add("test")
525             << glu::VertexSource(vss.str())
526             << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, spirvVersion, flags);
527         break;
528     }
529     case STAGE_FRAGMENT:
530     {
531         std::stringstream vss;
532         vss << "#version 450 core\n"
533                "void main()\n"
534                "{\n"
535                // full-viewport quad
536                "  gl_Position = vec4( 2.0*float(gl_VertexIndex&2) - 1.0, 4.0*(gl_VertexIndex&1)-1.0, 1.0 - 2.0 * "
537                "float(gl_VertexIndex&1), 1);\n"
538                "}\n";
539 
540         programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
541 
542         std::stringstream fss;
543         fss << "#version 450 core\n"
544                "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
545                "#extension GL_EXT_buffer_reference : enable\n"
546                "#extension GL_EXT_scalar_block_layout : enable\n"
547                "#extension GL_EXT_buffer_reference_uvec2 : enable\n"
548             << pushdecl.str() << decls.str()
549             << "void main()\n"
550                "{\n"
551                "  int accum = 0, temp;\n"
552                "  ivec3 f;\n"
553             << localDecls.str() << checks.str()
554             << "  uvec4 color = (accum != 0) ? uvec4(0,0,0,0) : uvec4(1,0,0,1);\n"
555                "  imageStore(image0_0, ivec2(gl_FragCoord.x, gl_FragCoord.y), color);\n"
556                "}\n";
557 
558         programCollection.glslSources.add("test")
559             << glu::FragmentSource(fss.str())
560             << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, spirvVersion, flags);
561         break;
562     }
563     }
564 }
565 
createInstance(Context & context) const566 TestInstance *BufferAddressTestCase::createInstance(Context &context) const
567 {
568     return new BufferAddressTestInstance(context, m_data);
569 }
570 
makeBufferCreateInfo(const void * pNext,const VkDeviceSize bufferSize,const VkBufferUsageFlags usage,const VkBufferCreateFlags flags)571 VkBufferCreateInfo makeBufferCreateInfo(const void *pNext, const VkDeviceSize bufferSize,
572                                         const VkBufferUsageFlags usage, const VkBufferCreateFlags flags)
573 {
574     const VkBufferCreateInfo bufferCreateInfo = {
575         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
576         pNext,                                // const void* pNext;
577         flags,                                // VkBufferCreateFlags flags;
578         bufferSize,                           // VkDeviceSize size;
579         usage,                                // VkBufferUsageFlags usage;
580         VK_SHARING_MODE_EXCLUSIVE,            // VkSharingMode sharingMode;
581         0u,                                   // uint32_t queueFamilyIndexCount;
582         DE_NULL,                              // const uint32_t* pQueueFamilyIndices;
583     };
584     return bufferCreateInfo;
585 }
586 
iterate(void)587 tcu::TestStatus BufferAddressTestInstance::iterate(void)
588 {
589     const InstanceInterface &vki       = m_context.getInstanceInterface();
590     const DeviceInterface &vk          = m_context.getDeviceInterface();
591     const VkPhysicalDevice &physDevice = m_context.getPhysicalDevice();
592     const VkDevice device              = m_context.getDevice();
593     Allocator &allocator               = m_context.getDefaultAllocator();
594     const bool useKHR                  = m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");
595 
596     VkFlags allShaderStages   = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
597     VkFlags allPipelineStages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
598                                 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
599 
600 #if ENABLE_RAYTRACING
601     if (m_data.stage == STAGE_RAYGEN)
602     {
603         allShaderStages   = VK_SHADER_STAGE_RAYGEN_BIT_NV;
604         allPipelineStages = VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_NV;
605     }
606 #endif
607 
608     VkPhysicalDeviceProperties2 properties;
609     deMemset(&properties, 0, sizeof(properties));
610     properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
611 
612 #if ENABLE_RAYTRACING
613     VkPhysicalDeviceRayTracingPropertiesNV rayTracingProperties;
614     deMemset(&rayTracingProperties, 0, sizeof(rayTracingProperties));
615     rayTracingProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PROPERTIES_NV;
616 
617     if (m_context.isDeviceFunctionalitySupported("VK_NV_ray_tracing"))
618     {
619         properties.pNext = &rayTracingProperties;
620     }
621 #endif
622 
623     m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
624 
625     VkPipelineBindPoint bindPoint;
626 
627     switch (m_data.stage)
628     {
629     case STAGE_COMPUTE:
630         bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
631         break;
632 #if ENABLE_RAYTRACING
633     case STAGE_RAYGEN:
634         bindPoint = VK_PIPELINE_BIND_POINT_RAY_TRACING_NV;
635         break;
636 #endif
637     default:
638         bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
639         break;
640     }
641 
642     Move<vk::VkDescriptorPool> descriptorPool;
643     Move<vk::VkDescriptorSet> descriptorSet;
644 
645     VkDescriptorPoolCreateFlags poolCreateFlags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
646 
647     VkDescriptorSetLayoutBinding bindings[2];
648     bindings[0] = {
649         0,                                // uint32_t binding;
650         VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, // VkDescriptorType descriptorType;
651         1,                                // uint32_t descriptorCount;
652         allShaderStages,                  // VkShaderStageFlags stageFlags;
653         DE_NULL                           // const VkSampler* pImmutableSamplers;
654     };
655     bindings[1] = {
656         1, // uint32_t binding;
657         m_data.base == BASE_UBO ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER :
658                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // VkDescriptorType descriptorType;
659         1,                                                           // uint32_t descriptorCount;
660         allShaderStages,                                             // VkShaderStageFlags stageFlags;
661         DE_NULL                                                      // const VkSampler* pImmutableSamplers;
662     };
663 
664     // Create a layout and allocate a descriptor set for it.
665     VkDescriptorSetLayoutCreateInfo setLayoutCreateInfo = {vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
666                                                            DE_NULL,
667 
668                                                            0, (uint32_t)2, &bindings[0]};
669 
670     Move<vk::VkDescriptorSetLayout> descriptorSetLayout =
671         vk::createDescriptorSetLayout(vk, device, &setLayoutCreateInfo);
672 
673     setLayoutCreateInfo.bindingCount = 0;
674     Move<vk::VkDescriptorSetLayout> emptyDescriptorSetLayout =
675         vk::createDescriptorSetLayout(vk, device, &setLayoutCreateInfo);
676 
677     vk::DescriptorPoolBuilder poolBuilder;
678     poolBuilder.addType(bindings[1].descriptorType, 1);
679     poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1);
680 
681     descriptorPool = poolBuilder.build(vk, device, poolCreateFlags, 1u);
682     descriptorSet  = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
683 
684     VkDeviceSize align = de::max(de::max(properties.properties.limits.minUniformBufferOffsetAlignment,
685                                          properties.properties.limits.minStorageBufferOffsetAlignment),
686                                  (VkDeviceSize)128 /*sizeof(T1)*/);
687 
688     uint32_t numBindings = 1;
689     for (uint32_t d = 0; d < m_data.depth; ++d)
690     {
691         numBindings = numBindings * 3 + 1;
692     }
693 
694 #ifndef CTS_USES_VULKANSC
695     VkBufferDeviceAddressCreateInfoEXT addressCreateInfoEXT = {
696         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT, // VkStructureType  sType;
697         DE_NULL,                                                 // const void*  pNext;
698         0x000000000ULL,                                          // VkDeviceSize         deviceAddress
699     };
700 #endif
701 
702     VkBufferOpaqueCaptureAddressCreateInfo bufferOpaqueCaptureAddressCreateInfo = {
703         VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO, // VkStructureType  sType;
704         DE_NULL,                                                     // const void*  pNext;
705         0x000000000ULL,                                              // VkDeviceSize         opaqueCaptureAddress
706     };
707 
708     std::vector<uint8_t *> cpuAddrs(numBindings);
709     std::vector<VkDeviceAddress> gpuAddrs(numBindings);
710     std::vector<uint64_t> opaqueBufferAddrs(numBindings);
711     std::vector<uint64_t> opaqueMemoryAddrs(numBindings);
712 
713     VkBufferDeviceAddressInfo bufferDeviceAddressInfo = {
714         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType  sType;
715         DE_NULL,                                      // const void*  pNext;
716         0,                                            // VkBuffer             buffer
717     };
718 
719     VkDeviceMemoryOpaqueCaptureAddressInfo deviceMemoryOpaqueCaptureAddressInfo = {
720         VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO, // VkStructureType  sType;
721         DE_NULL,                                                     // const void*  pNext;
722         0,                                                           // VkDeviceMemory  memory;
723     };
724 
725     bool multiBuffer          = m_data.bufType != BT_SINGLE;
726     bool offsetNonZero        = m_data.memoryOffset == OFFSET_NONZERO;
727     uint32_t numBuffers       = multiBuffer ? numBindings : 1;
728     VkDeviceSize bufferSize   = multiBuffer ? align : (align * numBindings);
729     VkDeviceSize memoryOffset = 0;
730 
731     vector<VkBufferSp> buffers(numBuffers);
732     vector<AllocationSp> allocations(numBuffers);
733 
734     VkBufferCreateInfo bufferCreateInfo =
735         makeBufferCreateInfo(DE_NULL, bufferSize,
736                              VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
737                                  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
738                              m_data.bufType == BT_REPLAY ? VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT : 0);
739 
740     // VkMemoryAllocateFlags to be filled out later
741     VkMemoryAllocateFlagsInfo allocFlagsInfo = {
742         VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, //    VkStructureType    sType
743         DE_NULL,                                      //    const void*        pNext
744         0,                                            //    VkMemoryAllocateFlags    flags
745         0,                                            //    uint32_t                 deviceMask
746     };
747 
748     VkMemoryOpaqueCaptureAddressAllocateInfo memoryOpaqueCaptureAddressAllocateInfo = {
749         VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO, // VkStructureType    sType;
750         DE_NULL,                                                       // const void*        pNext;
751         0,                                                             // uint64_t           opaqueCaptureAddress;
752     };
753 
754     if (useKHR)
755         allocFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
756 
757     if (useKHR && m_data.bufType == BT_REPLAY)
758     {
759         allocFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
760         allocFlagsInfo.pNext = &memoryOpaqueCaptureAddressAllocateInfo;
761     }
762 
763     for (uint32_t i = 0; i < numBuffers; ++i)
764     {
765         buffers[i] = VkBufferSp(new Unique<VkBuffer>(createBuffer(vk, device, &bufferCreateInfo)));
766 
767         // query opaque capture address before binding memory
768         if (useKHR)
769         {
770             bufferDeviceAddressInfo.buffer = **buffers[i];
771             opaqueBufferAddrs[i]           = vk.getBufferOpaqueCaptureAddress(device, &bufferDeviceAddressInfo);
772         }
773 
774         VkMemoryRequirements memReq = getBufferMemoryRequirements(vk, device, **buffers[i]);
775         if (offsetNonZero)
776         {
777             memoryOffset = memReq.alignment;
778             memReq.size += memoryOffset;
779         }
780 
781         allocations[i] = AllocationSp(
782             allocateExtended(vki, vk, physDevice, device, memReq, MemoryRequirement::HostVisible, &allocFlagsInfo));
783 
784         if (useKHR)
785         {
786             deviceMemoryOpaqueCaptureAddressInfo.memory = allocations[i]->getMemory();
787             opaqueMemoryAddrs[i] =
788                 vk.getDeviceMemoryOpaqueCaptureAddress(device, &deviceMemoryOpaqueCaptureAddressInfo);
789         }
790 
791         VK_CHECK(vk.bindBufferMemory(device, **buffers[i], allocations[i]->getMemory(), memoryOffset));
792     }
793 
794     if (m_data.bufType == BT_REPLAY)
795     {
796         for (uint32_t i = 0; i < numBuffers; ++i)
797         {
798             bufferDeviceAddressInfo.buffer = **buffers[i];
799             gpuAddrs[i]                    = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
800         }
801         buffers.clear();
802         buffers.resize(numBuffers);
803         allocations.clear();
804         allocations.resize(numBuffers);
805 
806 #ifndef CTS_USES_VULKANSC
807         bufferCreateInfo.pNext = useKHR ? (void *)&bufferOpaqueCaptureAddressCreateInfo : (void *)&addressCreateInfoEXT;
808 #else
809         bufferCreateInfo.pNext = (void *)&bufferOpaqueCaptureAddressCreateInfo;
810 #endif
811 
812         for (int32_t i = numBuffers - 1; i >= 0; --i)
813         {
814 #ifndef CTS_USES_VULKANSC
815             addressCreateInfoEXT.deviceAddress = gpuAddrs[i];
816 #endif
817             bufferOpaqueCaptureAddressCreateInfo.opaqueCaptureAddress   = opaqueBufferAddrs[i];
818             memoryOpaqueCaptureAddressAllocateInfo.opaqueCaptureAddress = opaqueMemoryAddrs[i];
819 
820             buffers[i]     = VkBufferSp(new Unique<VkBuffer>(createBuffer(vk, device, &bufferCreateInfo)));
821             allocations[i] = AllocationSp(allocateExtended(vki, vk, physDevice, device,
822                                                            getBufferMemoryRequirements(vk, device, **buffers[i]),
823                                                            MemoryRequirement::HostVisible, &allocFlagsInfo));
824             VK_CHECK(vk.bindBufferMemory(device, **buffers[i], allocations[i]->getMemory(), 0));
825 
826             bufferDeviceAddressInfo.buffer = **buffers[i];
827             VkDeviceSize newAddr           = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
828 
829             if (newAddr != gpuAddrs[i])
830                 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "address mismatch");
831         }
832     }
833 
834     // Create a buffer and compute the address for each "align" bytes.
835     for (uint32_t i = 0; i < numBindings; ++i)
836     {
837         bufferDeviceAddressInfo.buffer = **buffers[multiBuffer ? i : 0];
838         gpuAddrs[i]                    = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
839 
840         cpuAddrs[i] = (uint8_t *)allocations[multiBuffer ? i : 0]->getHostPtr() + memoryOffset;
841         if (!multiBuffer)
842         {
843             cpuAddrs[i] = cpuAddrs[i] + align * i;
844             gpuAddrs[i] = gpuAddrs[i] + align * i;
845         }
846         //printf("addr 0x%08x`%08x\n", (unsigned)(gpuAddrs[i]>>32), (unsigned)(gpuAddrs[i]));
847     }
848 
849     fillBuffer(cpuAddrs, gpuAddrs, 0, 0);
850 
851     for (uint32_t i = 0; i < numBuffers; ++i)
852         flushAlloc(vk, device, *allocations[i]);
853 
854     const VkQueue queue             = m_context.getUniversalQueue();
855     Move<VkCommandPool> cmdPool     = createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex());
856     Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
857 
858     beginCommandBuffer(vk, *cmdBuffer, 0u);
859 
860     // Push constants are used for dynamic indexing. PushConstant[i] = i.
861 
862     const VkPushConstantRange pushConstRange = {
863         allShaderStages, // VkShaderStageFlags    stageFlags
864         0,               // uint32_t                offset
865         128              // uint32_t                size
866     };
867 
868     uint32_t nonEmptySetLimit = m_data.base == BASE_UBO ?
869                                     properties.properties.limits.maxPerStageDescriptorUniformBuffers :
870                                     properties.properties.limits.maxPerStageDescriptorStorageBuffers;
871     nonEmptySetLimit = de::min(nonEmptySetLimit, properties.properties.limits.maxPerStageDescriptorStorageImages);
872 
873     vector<vk::VkDescriptorSetLayout> descriptorSetLayoutsRaw(m_data.set + 1);
874     for (size_t i = 0; i < m_data.set + 1; ++i)
875     {
876         // use nonempty descriptor sets to consume resources until we run out of descriptors
877         if (i < nonEmptySetLimit - 1 || i == m_data.set)
878             descriptorSetLayoutsRaw[i] = descriptorSetLayout.get();
879         else
880             descriptorSetLayoutsRaw[i] = emptyDescriptorSetLayout.get();
881     }
882 
883     const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
884         VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
885         DE_NULL,                                       // pNext
886         (VkPipelineLayoutCreateFlags)0,
887         m_data.set + 1,              // setLayoutCount
888         &descriptorSetLayoutsRaw[0], // pSetLayouts
889         1u,                          // pushConstantRangeCount
890         &pushConstRange,             // pPushConstantRanges
891     };
892 
893     Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
894 
895     // PushConstant[i] = i
896     for (uint32_t i = 0; i < (uint32_t)(128 / sizeof(uint32_t)); ++i)
897     {
898         vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages, (uint32_t)(i * sizeof(uint32_t)),
899                             (uint32_t)sizeof(uint32_t), &i);
900     }
901 
902     de::MovePtr<BufferWithMemory> copyBuffer;
903     copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
904         vk, device, allocator,
905         makeBufferCreateInfo(DE_NULL, DIM * DIM * sizeof(uint32_t), VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0),
906         MemoryRequirement::HostVisible));
907 
908     const VkImageCreateInfo imageCreateInfo = {
909         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
910         DE_NULL,                             // const void* pNext;
911         (VkImageCreateFlags)0u,              // VkImageCreateFlags flags;
912         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
913         VK_FORMAT_R32_UINT,                  // VkFormat format;
914         {
915             DIM,                 // uint32_t width;
916             DIM,                 // uint32_t height;
917             1u                   // uint32_t depth;
918         },                       // VkExtent3D extent;
919         1u,                      // uint32_t mipLevels;
920         1u,                      // uint32_t arrayLayers;
921         VK_SAMPLE_COUNT_1_BIT,   // VkSampleCountFlagBits samples;
922         VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
923         VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
924             VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
925         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
926         0u,                                  // uint32_t queueFamilyIndexCount;
927         DE_NULL,                             // const uint32_t* pQueueFamilyIndices;
928         VK_IMAGE_LAYOUT_UNDEFINED            // VkImageLayout initialLayout;
929     };
930 
931     VkImageViewCreateInfo imageViewCreateInfo = {
932         VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
933         DE_NULL,                                  // const void* pNext;
934         (VkImageViewCreateFlags)0u,               // VkImageViewCreateFlags flags;
935         DE_NULL,                                  // VkImage image;
936         VK_IMAGE_VIEW_TYPE_2D,                    // VkImageViewType viewType;
937         VK_FORMAT_R32_UINT,                       // VkFormat format;
938         {
939             VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
940             VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
941             VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
942             VK_COMPONENT_SWIZZLE_A  // VkComponentSwizzle a;
943         },                          // VkComponentMapping  components;
944         {
945             VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
946             0u,                        // uint32_t baseMipLevel;
947             1u,                        // uint32_t levelCount;
948             0u,                        // uint32_t baseArrayLayer;
949             1u                         // uint32_t layerCount;
950         }                              // VkImageSubresourceRange subresourceRange;
951     };
952 
953     de::MovePtr<ImageWithMemory> image;
954     Move<VkImageView> imageView;
955 
956     image = de::MovePtr<ImageWithMemory>(
957         new ImageWithMemory(vk, device, allocator, imageCreateInfo, MemoryRequirement::Any));
958     imageViewCreateInfo.image = **image;
959     imageView                 = createImageView(vk, device, &imageViewCreateInfo, NULL);
960 
961     VkDescriptorImageInfo imageInfo   = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
962     VkDescriptorBufferInfo bufferInfo = makeDescriptorBufferInfo(**buffers[0], 0, align);
963 
964     VkWriteDescriptorSet w = {
965         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
966         DE_NULL,                                // pNext
967         *descriptorSet,                         // dstSet
968         (uint32_t)0,                            // dstBinding
969         0,                                      // dstArrayElement
970         1u,                                     // descriptorCount
971         bindings[0].descriptorType,             // descriptorType
972         &imageInfo,                             // pImageInfo
973         &bufferInfo,                            // pBufferInfo
974         DE_NULL,                                // pTexelBufferView
975     };
976     vk.updateDescriptorSets(device, 1, &w, 0, NULL);
977 
978     w.dstBinding     = 1;
979     w.descriptorType = bindings[1].descriptorType;
980     vk.updateDescriptorSets(device, 1, &w, 0, NULL);
981 
982     vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, m_data.set, 1, &descriptorSet.get(), 0, DE_NULL);
983 
984     Move<VkPipeline> pipeline;
985     Move<VkRenderPass> renderPass;
986     Move<VkFramebuffer> framebuffer;
987     de::MovePtr<BufferWithMemory> sbtBuffer;
988 
989     m_context.getTestContext().touchWatchdogAndDisableIntervalTimeLimit();
990 
991     if (m_data.stage == STAGE_COMPUTE)
992     {
993         const Unique<VkShaderModule> shader(
994             createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
995 
996         const VkPipelineShaderStageCreateInfo shaderCreateInfo = {
997             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
998             DE_NULL,
999             (VkPipelineShaderStageCreateFlags)0,
1000             VK_SHADER_STAGE_COMPUTE_BIT, // stage
1001             *shader,                     // shader
1002             "main",
1003             DE_NULL, // pSpecializationInfo
1004         };
1005 
1006         const VkComputePipelineCreateInfo pipelineCreateInfo = {
1007             VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1008             DE_NULL,
1009             0u,                // flags
1010             shaderCreateInfo,  // cs
1011             *pipelineLayout,   // layout
1012             (vk::VkPipeline)0, // basePipelineHandle
1013             0u,                // basePipelineIndex
1014         };
1015         pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1016     }
1017 #if ENABLE_RAYTRACING
1018     else if (m_data.stage == STAGE_RAYGEN)
1019     {
1020         const Unique<VkShaderModule> shader(
1021             createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
1022 
1023         const VkPipelineShaderStageCreateInfo shaderCreateInfo = {
1024             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1025             DE_NULL,
1026             (VkPipelineShaderStageCreateFlags)0,
1027             VK_SHADER_STAGE_RAYGEN_BIT_NV, // stage
1028             *shader,                       // shader
1029             "main",
1030             DE_NULL, // pSpecializationInfo
1031         };
1032 
1033         VkRayTracingShaderGroupCreateInfoNV group = {
1034             VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV,
1035             DE_NULL,
1036             VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, // type
1037             0,                                           // generalShader
1038             VK_SHADER_UNUSED_NV,                         // closestHitShader
1039             VK_SHADER_UNUSED_NV,                         // anyHitShader
1040             VK_SHADER_UNUSED_NV,                         // intersectionShader
1041         };
1042 
1043         VkRayTracingPipelineCreateInfoNV pipelineCreateInfo = {
1044             VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV, // sType
1045             DE_NULL,                                               // pNext
1046             0,                                                     // flags
1047             1,                                                     // stageCount
1048             &shaderCreateInfo,                                     // pStages
1049             1,                                                     // groupCount
1050             &group,                                                // pGroups
1051             0,                                                     // maxRecursionDepth
1052             *pipelineLayout,                                       // layout
1053             (vk::VkPipeline)0,                                     // basePipelineHandle
1054             0u,                                                    // basePipelineIndex
1055         };
1056 
1057         pipeline = createRayTracingPipelineNV(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1058 
1059         sbtBuffer     = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1060             vk, device, allocator,
1061             makeBufferCreateInfo(DE_NULL, rayTracingProperties.shaderGroupHandleSize,
1062                                      VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_RAY_TRACING_BIT_NV, 0),
1063             MemoryRequirement::HostVisible));
1064         uint32_t *ptr = (uint32_t *)sbtBuffer->getAllocation().getHostPtr();
1065         invalidateAlloc(vk, device, sbtBuffer->getAllocation());
1066 
1067         vk.getRayTracingShaderGroupHandlesNV(device, *pipeline, 0, 1, rayTracingProperties.shaderGroupHandleSize, ptr);
1068     }
1069 #endif
1070     else
1071     {
1072 
1073         const vk::VkSubpassDescription subpassDesc = {
1074             (vk::VkSubpassDescriptionFlags)0,
1075             vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
1076             0u,                                  // inputCount
1077             DE_NULL,                             // pInputAttachments
1078             0u,                                  // colorCount
1079             DE_NULL,                             // pColorAttachments
1080             DE_NULL,                             // pResolveAttachments
1081             DE_NULL,                             // depthStencilAttachment
1082             0u,                                  // preserveCount
1083             DE_NULL,                             // pPreserveAttachments
1084         };
1085         const vk::VkRenderPassCreateInfo renderPassParams = {
1086             vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType
1087             DE_NULL,                                       // pNext
1088             (vk::VkRenderPassCreateFlags)0,
1089             0u,           // attachmentCount
1090             DE_NULL,      // pAttachments
1091             1u,           // subpassCount
1092             &subpassDesc, // pSubpasses
1093             0u,           // dependencyCount
1094             DE_NULL,      // pDependencies
1095         };
1096 
1097         renderPass = createRenderPass(vk, device, &renderPassParams);
1098 
1099         const vk::VkFramebufferCreateInfo framebufferParams = {
1100             vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType
1101             DE_NULL,                                       // pNext
1102             (vk::VkFramebufferCreateFlags)0,
1103             *renderPass, // renderPass
1104             0u,          // attachmentCount
1105             DE_NULL,     // pAttachments
1106             DIM,         // width
1107             DIM,         // height
1108             1u,          // layers
1109         };
1110 
1111         framebuffer = createFramebuffer(vk, device, &framebufferParams);
1112 
1113         const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = {
1114             VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1115             DE_NULL,                                                   // const void* pNext;
1116             (VkPipelineVertexInputStateCreateFlags)0,                  // VkPipelineVertexInputStateCreateFlags flags;
1117             0u,                                                        // uint32_t vertexBindingDescriptionCount;
1118             DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1119             0u,      // uint32_t vertexAttributeDescriptionCount;
1120             DE_NULL  // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
1121         };
1122 
1123         const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo = {
1124             VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
1125             DE_NULL,                                                     // const void* pNext;
1126             (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
1127             (m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST :
1128                                              VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology topology;
1129             VK_FALSE                                                               // VkBool32 primitiveRestartEnable;
1130         };
1131 
1132         const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo = {
1133             VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
1134             DE_NULL,                                                    // const void* pNext;
1135             (VkPipelineRasterizationStateCreateFlags)0,          // VkPipelineRasterizationStateCreateFlags flags;
1136             VK_FALSE,                                            // VkBool32 depthClampEnable;
1137             (m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE, // VkBool32 rasterizerDiscardEnable;
1138             VK_POLYGON_MODE_FILL,                                // VkPolygonMode polygonMode;
1139             VK_CULL_MODE_NONE,                                   // VkCullModeFlags cullMode;
1140             VK_FRONT_FACE_CLOCKWISE,                             // VkFrontFace frontFace;
1141             VK_FALSE,                                            // VkBool32 depthBiasEnable;
1142             0.0f,                                                // float depthBiasConstantFactor;
1143             0.0f,                                                // float depthBiasClamp;
1144             0.0f,                                                // float depthBiasSlopeFactor;
1145             1.0f                                                 // float lineWidth;
1146         };
1147 
1148         const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo = {
1149             VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType                            sType
1150             DE_NULL,               // const void*                                pNext
1151             0u,                    // VkPipelineMultisampleStateCreateFlags    flags
1152             VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits                    rasterizationSamples
1153             VK_FALSE,              // VkBool32                                    sampleShadingEnable
1154             1.0f,                  // float                                    minSampleShading
1155             DE_NULL,               // const VkSampleMask*                        pSampleMask
1156             VK_FALSE,              // VkBool32                                    alphaToCoverageEnable
1157             VK_FALSE               // VkBool32                                    alphaToOneEnable
1158         };
1159 
1160         VkViewport viewport = makeViewport(DIM, DIM);
1161         VkRect2D scissor    = makeRect2D(DIM, DIM);
1162 
1163         const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = {
1164             VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType                            sType
1165             DE_NULL,                                               // const void*                                pNext
1166             (VkPipelineViewportStateCreateFlags)0,                 // VkPipelineViewportStateCreateFlags        flags
1167             1u,        // uint32_t                                    viewportCount
1168             &viewport, // const VkViewport*                        pViewports
1169             1u,        // uint32_t                                    scissorCount
1170             &scissor   // const VkRect2D*                            pScissors
1171         };
1172 
1173         Move<VkShaderModule> fs;
1174         Move<VkShaderModule> vs;
1175 
1176         uint32_t numStages;
1177         if (m_data.stage == STAGE_VERTEX)
1178         {
1179             vs        = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1180             fs        = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1181             numStages = 1u;
1182         }
1183         else
1184         {
1185             vs        = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1186             fs        = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1187             numStages = 2u;
1188         }
1189 
1190         const VkPipelineShaderStageCreateInfo shaderCreateInfo[2] = {
1191             {
1192                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, DE_NULL, (VkPipelineShaderStageCreateFlags)0,
1193                 VK_SHADER_STAGE_VERTEX_BIT, // stage
1194                 *vs,                        // shader
1195                 "main",
1196                 DE_NULL, // pSpecializationInfo
1197             },
1198             {
1199                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, DE_NULL, (VkPipelineShaderStageCreateFlags)0,
1200                 VK_SHADER_STAGE_FRAGMENT_BIT, // stage
1201                 *fs,                          // shader
1202                 "main",
1203                 DE_NULL, // pSpecializationInfo
1204             }};
1205 
1206         const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo = {
1207             VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
1208             DE_NULL,                                         // const void* pNext;
1209             (VkPipelineCreateFlags)0,                        // VkPipelineCreateFlags flags;
1210             numStages,                                       // uint32_t stageCount;
1211             &shaderCreateInfo[0],                            // const VkPipelineShaderStageCreateInfo* pStages;
1212             &vertexInputStateCreateInfo,   // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
1213             &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
1214             DE_NULL,                       // const VkPipelineTessellationStateCreateInfo* pTessellationState;
1215             &viewportStateCreateInfo,      // const VkPipelineViewportStateCreateInfo* pViewportState;
1216             &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
1217             &multisampleStateCreateInfo,   // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
1218             DE_NULL,                       // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
1219             DE_NULL,                       // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
1220             DE_NULL,                       // const VkPipelineDynamicStateCreateInfo* pDynamicState;
1221             pipelineLayout.get(),          // VkPipelineLayout layout;
1222             renderPass.get(),              // VkRenderPass renderPass;
1223             0u,                            // uint32_t subpass;
1224             DE_NULL,                       // VkPipeline basePipelineHandle;
1225             0                              // int basePipelineIndex;
1226         };
1227 
1228         pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1229     }
1230 
1231     m_context.getTestContext().touchWatchdogAndEnableIntervalTimeLimit();
1232 
1233     const VkImageMemoryBarrier imageBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType        sType
1234                                                DE_NULL,                                // const void*            pNext
1235                                                0u,                           // VkAccessFlags        srcAccessMask
1236                                                VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags        dstAccessMask
1237                                                VK_IMAGE_LAYOUT_UNDEFINED,    // VkImageLayout        oldLayout
1238                                                VK_IMAGE_LAYOUT_GENERAL,      // VkImageLayout        newLayout
1239                                                VK_QUEUE_FAMILY_IGNORED, // uint32_t                srcQueueFamilyIndex
1240                                                VK_QUEUE_FAMILY_IGNORED, // uint32_t                dstQueueFamilyIndex
1241                                                **image,                 // VkImage                image
1242                                                {
1243                                                    VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags    aspectMask
1244                                                    0u,                        // uint32_t                baseMipLevel
1245                                                    1u,                        // uint32_t                mipLevels,
1246                                                    0u,                        // uint32_t                baseArray
1247                                                    1u,                        // uint32_t                arraySize
1248                                                }};
1249 
1250     vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1251                           (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 0,
1252                           (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrier);
1253 
1254     vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1255 
1256     VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1257     VkClearValue clearColor       = makeClearValueColorU32(0, 0, 0, 0);
1258 
1259     VkMemoryBarrier memBarrier = {
1260         VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1261         DE_NULL,                          // pNext
1262         0u,                               // srcAccessMask
1263         0u,                               // dstAccessMask
1264     };
1265 
1266     vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
1267 
1268     memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1269     memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1270     vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages, 0, 1, &memBarrier, 0, DE_NULL,
1271                           0, DE_NULL);
1272 
1273     if (m_data.stage == STAGE_COMPUTE)
1274     {
1275         vk.cmdDispatch(*cmdBuffer, DIM, DIM, 1);
1276     }
1277 #if ENABLE_RAYTRACING
1278     else if (m_data.stage == STAGE_RAYGEN)
1279     {
1280         vk.cmdTraceRaysNV(*cmdBuffer, **sbtBuffer, 0, DE_NULL, 0, 0, DE_NULL, 0, 0, DE_NULL, 0, 0, DIM, DIM, 1);
1281     }
1282 #endif
1283     else
1284     {
1285         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(DIM, DIM), 0, DE_NULL,
1286                         VK_SUBPASS_CONTENTS_INLINE);
1287         // Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1288         if (m_data.stage == STAGE_VERTEX)
1289         {
1290             vk.cmdDraw(*cmdBuffer, DIM * DIM, 1u, 0u, 0u);
1291         }
1292         else
1293         {
1294             vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1295         }
1296         endRenderPass(vk, *cmdBuffer);
1297     }
1298 
1299     memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1300     memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1301     vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &memBarrier, 0, DE_NULL,
1302                           0, DE_NULL);
1303 
1304     const VkBufferImageCopy copyRegion = makeBufferImageCopy(
1305         makeExtent3D(DIM, DIM, 1u), makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u));
1306     vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **copyBuffer, 1u, &copyRegion);
1307 
1308     endCommandBuffer(vk, *cmdBuffer);
1309 
1310     submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1311 
1312     uint32_t *ptr = (uint32_t *)copyBuffer->getAllocation().getHostPtr();
1313     invalidateAlloc(vk, device, copyBuffer->getAllocation());
1314 
1315     qpTestResult res = QP_TEST_RESULT_PASS;
1316 
1317     for (uint32_t i = 0; i < DIM * DIM; ++i)
1318     {
1319         if (ptr[i] != 1)
1320         {
1321             res = QP_TEST_RESULT_FAIL;
1322         }
1323     }
1324 
1325     return tcu::TestStatus(res, qpGetTestResultName(res));
1326 }
1327 
1328 class CaptureReplayTestCase : public TestCase
1329 {
1330 public:
1331     CaptureReplayTestCase(tcu::TestContext &context, const char *name, uint32_t seed);
1332     ~CaptureReplayTestCase(void);
initPrograms(SourceCollections & programCollection) const1333     virtual void initPrograms(SourceCollections &programCollection) const
1334     {
1335         DE_UNREF(programCollection);
1336     }
1337     virtual TestInstance *createInstance(Context &context) const;
1338     virtual void checkSupport(Context &context) const;
1339 
1340 private:
1341     uint32_t m_seed;
1342 };
1343 
CaptureReplayTestCase(tcu::TestContext & context,const char * name,uint32_t seed)1344 CaptureReplayTestCase::CaptureReplayTestCase(tcu::TestContext &context, const char *name, uint32_t seed)
1345     : vkt::TestCase(context, name)
1346     , m_seed(seed)
1347 {
1348 }
1349 
~CaptureReplayTestCase(void)1350 CaptureReplayTestCase::~CaptureReplayTestCase(void)
1351 {
1352 }
1353 
checkSupport(Context & context) const1354 void CaptureReplayTestCase::checkSupport(Context &context) const
1355 {
1356     if (!context.isBufferDeviceAddressSupported())
1357         TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
1358 
1359 #ifndef CTS_USES_VULKANSC
1360     bool isBufferDeviceAddressWithCaptureReplaySupported =
1361         (context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") &&
1362          context.getBufferDeviceAddressFeatures().bufferDeviceAddressCaptureReplay) ||
1363         (context.isDeviceFunctionalitySupported("VK_EXT_buffer_device_address") &&
1364          context.getBufferDeviceAddressFeaturesEXT().bufferDeviceAddressCaptureReplay);
1365 #else
1366     bool isBufferDeviceAddressWithCaptureReplaySupported =
1367         (context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") &&
1368          context.getBufferDeviceAddressFeatures().bufferDeviceAddressCaptureReplay);
1369 #endif
1370 
1371     if (!isBufferDeviceAddressWithCaptureReplaySupported)
1372         TCU_THROW(NotSupportedError, "Capture/replay of physical storage buffer pointers not supported");
1373 }
1374 
1375 class CaptureReplayTestInstance : public TestInstance
1376 {
1377 public:
1378     CaptureReplayTestInstance(Context &context, uint32_t seed);
1379     ~CaptureReplayTestInstance(void);
1380     tcu::TestStatus iterate(void);
1381 
1382 private:
1383     uint32_t m_seed;
1384 };
1385 
CaptureReplayTestInstance(Context & context,uint32_t seed)1386 CaptureReplayTestInstance::CaptureReplayTestInstance(Context &context, uint32_t seed)
1387     : vkt::TestInstance(context)
1388     , m_seed(seed)
1389 {
1390 }
1391 
~CaptureReplayTestInstance(void)1392 CaptureReplayTestInstance::~CaptureReplayTestInstance(void)
1393 {
1394 }
1395 
createInstance(Context & context) const1396 TestInstance *CaptureReplayTestCase::createInstance(Context &context) const
1397 {
1398     return new CaptureReplayTestInstance(context, m_seed);
1399 }
1400 
iterate(void)1401 tcu::TestStatus CaptureReplayTestInstance::iterate(void)
1402 {
1403     const InstanceInterface &vki       = m_context.getInstanceInterface();
1404     const DeviceInterface &vk          = m_context.getDeviceInterface();
1405     const VkPhysicalDevice &physDevice = m_context.getPhysicalDevice();
1406     const VkDevice device              = m_context.getDevice();
1407     const bool useKHR                  = m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");
1408     de::Random rng(m_seed);
1409 
1410 #ifndef CTS_USES_VULKANSC
1411     VkBufferDeviceAddressCreateInfoEXT addressCreateInfoEXT = {
1412         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT, // VkStructureType  sType;
1413         DE_NULL,                                                 // const void*  pNext;
1414         0x000000000ULL,                                          // VkDeviceSize         deviceAddress
1415     };
1416 #endif
1417 
1418     VkBufferOpaqueCaptureAddressCreateInfo bufferOpaqueCaptureAddressCreateInfo = {
1419         VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO, // VkStructureType  sType;
1420         DE_NULL,                                                     // const void*  pNext;
1421         0x000000000ULL,                                              // VkDeviceSize         opaqueCaptureAddress
1422     };
1423 
1424     const uint32_t numBuffers = 100;
1425     std::vector<VkDeviceSize> bufferSizes(numBuffers);
1426     // random sizes, powers of two [4K, 4MB]
1427     for (uint32_t i = 0; i < numBuffers; ++i)
1428         bufferSizes[i] = 4096 << (rng.getUint32() % 11);
1429 
1430     std::vector<VkDeviceAddress> gpuAddrs(numBuffers);
1431     std::vector<uint64_t> opaqueBufferAddrs(numBuffers);
1432     std::vector<uint64_t> opaqueMemoryAddrs(numBuffers);
1433 
1434     VkBufferDeviceAddressInfo bufferDeviceAddressInfo = {
1435         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType  sType;
1436         DE_NULL,                                      // const void*  pNext;
1437         0,                                            // VkBuffer             buffer
1438     };
1439 
1440     VkDeviceMemoryOpaqueCaptureAddressInfo deviceMemoryOpaqueCaptureAddressInfo = {
1441         VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO, // VkStructureType  sType;
1442         DE_NULL,                                                     // const void*  pNext;
1443         0,                                                           // VkDeviceMemory  memory;
1444     };
1445 
1446     vector<VkBufferSp> buffers(numBuffers);
1447     vector<AllocationSp> allocations(numBuffers);
1448 
1449     VkBufferCreateInfo bufferCreateInfo =
1450         makeBufferCreateInfo(DE_NULL, 0,
1451                              VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
1452                                  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
1453                              VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT);
1454 
1455     // VkMemoryAllocateFlags to be filled out later
1456     VkMemoryAllocateFlagsInfo allocFlagsInfo = {
1457         VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, //    VkStructureType    sType
1458         DE_NULL,                                      //    const void*        pNext
1459         0,                                            //    VkMemoryAllocateFlags    flags
1460         0,                                            //    uint32_t                 deviceMask
1461     };
1462 
1463     VkMemoryOpaqueCaptureAddressAllocateInfo memoryOpaqueCaptureAddressAllocateInfo = {
1464         VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO, // VkStructureType    sType;
1465         DE_NULL,                                                       // const void*        pNext;
1466         0,                                                             // uint64_t           opaqueCaptureAddress;
1467     };
1468 
1469     if (useKHR)
1470         allocFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
1471 
1472     if (useKHR)
1473     {
1474         allocFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
1475         allocFlagsInfo.pNext = &memoryOpaqueCaptureAddressAllocateInfo;
1476     }
1477 
1478     for (uint32_t i = 0; i < numBuffers; ++i)
1479     {
1480         bufferCreateInfo.size = bufferSizes[i];
1481         buffers[i]            = VkBufferSp(new Unique<VkBuffer>(createBuffer(vk, device, &bufferCreateInfo)));
1482 
1483         // query opaque capture address before binding memory
1484         if (useKHR)
1485         {
1486             bufferDeviceAddressInfo.buffer = **buffers[i];
1487             opaqueBufferAddrs[i]           = vk.getBufferOpaqueCaptureAddress(device, &bufferDeviceAddressInfo);
1488         }
1489 
1490         allocations[i] = AllocationSp(allocateExtended(vki, vk, physDevice, device,
1491                                                        getBufferMemoryRequirements(vk, device, **buffers[i]),
1492                                                        MemoryRequirement::HostVisible, &allocFlagsInfo));
1493 
1494         if (useKHR)
1495         {
1496             deviceMemoryOpaqueCaptureAddressInfo.memory = allocations[i]->getMemory();
1497             opaqueMemoryAddrs[i] =
1498                 vk.getDeviceMemoryOpaqueCaptureAddress(device, &deviceMemoryOpaqueCaptureAddressInfo);
1499         }
1500 
1501         VK_CHECK(vk.bindBufferMemory(device, **buffers[i], allocations[i]->getMemory(), 0));
1502     }
1503 
1504     for (uint32_t i = 0; i < numBuffers; ++i)
1505     {
1506         bufferDeviceAddressInfo.buffer = **buffers[i];
1507         gpuAddrs[i]                    = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
1508     }
1509     buffers.clear();
1510     buffers.resize(numBuffers);
1511     allocations.clear();
1512     allocations.resize(numBuffers);
1513 
1514 #ifndef CTS_USES_VULKANSC
1515     bufferCreateInfo.pNext = useKHR ? (void *)&bufferOpaqueCaptureAddressCreateInfo : (void *)&addressCreateInfoEXT;
1516 #else
1517     bufferCreateInfo.pNext = (void *)&bufferOpaqueCaptureAddressCreateInfo;
1518 #endif
1519 
1520     for (int32_t i = numBuffers - 1; i >= 0; --i)
1521     {
1522 #ifndef CTS_USES_VULKANSC
1523         addressCreateInfoEXT.deviceAddress = gpuAddrs[i];
1524 #endif
1525         bufferOpaqueCaptureAddressCreateInfo.opaqueCaptureAddress   = opaqueBufferAddrs[i];
1526         memoryOpaqueCaptureAddressAllocateInfo.opaqueCaptureAddress = opaqueMemoryAddrs[i];
1527 
1528         bufferCreateInfo.size = bufferSizes[i];
1529         buffers[i]            = VkBufferSp(new Unique<VkBuffer>(createBuffer(vk, device, &bufferCreateInfo)));
1530         allocations[i]        = AllocationSp(allocateExtended(vki, vk, physDevice, device,
1531                                                               getBufferMemoryRequirements(vk, device, **buffers[i]),
1532                                                               MemoryRequirement::HostVisible, &allocFlagsInfo));
1533         VK_CHECK(vk.bindBufferMemory(device, **buffers[i], allocations[i]->getMemory(), 0));
1534 
1535         bufferDeviceAddressInfo.buffer = **buffers[i];
1536         VkDeviceSize newAddr           = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
1537 
1538         if (newAddr != gpuAddrs[i])
1539             return tcu::TestStatus(QP_TEST_RESULT_FAIL, "address mismatch");
1540     }
1541 
1542     return tcu::TestStatus(QP_TEST_RESULT_PASS, qpGetTestResultName(QP_TEST_RESULT_PASS));
1543 }
1544 
1545 } // namespace
1546 
createBufferDeviceAddressTests(tcu::TestContext & testCtx)1547 tcu::TestCaseGroup *createBufferDeviceAddressTests(tcu::TestContext &testCtx)
1548 {
1549     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "buffer_device_address"));
1550 
1551     typedef struct
1552     {
1553         uint32_t count;
1554         const char *name;
1555     } TestGroupCase;
1556 
1557     TestGroupCase setCases[] = {
1558         {0, "set0"}, {3, "set3"}, {7, "set7"}, {15, "set15"}, {31, "set31"},
1559     };
1560 
1561     TestGroupCase depthCases[] = {
1562         {1, "depth1"},
1563         {2, "depth2"},
1564         {3, "depth3"},
1565     };
1566 
1567     TestGroupCase baseCases[] = {
1568         {BASE_UBO, "baseubo"},
1569         {BASE_SSBO, "basessbo"},
1570     };
1571 
1572     TestGroupCase cvtCases[] = {
1573         // load reference
1574         {CONVERT_NONE, "load"},
1575         // load and convert reference
1576         {CONVERT_UINT64, "convert"},
1577         // load and convert reference to uvec2
1578         {CONVERT_UVEC2, "convertuvec2"},
1579         // load, convert and compare references as uint64_t
1580         {CONVERT_U64CMP, "convertchecku64"},
1581         // load, convert and compare references as uvec2
1582         {CONVERT_UVEC2CMP, "convertcheckuv2"},
1583         // load reference as uint64_t and convert it to uvec2
1584         {CONVERT_UVEC2TOU64, "crossconvertu2p"},
1585         // load reference as uvec2 and convert it to uint64_t
1586         {CONVERT_U64TOUVEC2, "crossconvertp2u"},
1587     };
1588 
1589     TestGroupCase storeCases[] = {
1590         // don't store intermediate reference
1591         {0, "nostore"},
1592         // store intermediate reference
1593         {1, "store"},
1594     };
1595 
1596     TestGroupCase btCases[] = {
1597         // single buffer
1598         {BT_SINGLE, "single"},
1599         // multiple buffers
1600         {BT_MULTI, "multi"},
1601         // multiple buffers and VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT
1602         {BT_REPLAY, "replay"},
1603     };
1604 
1605     TestGroupCase layoutCases[] = {
1606         {LAYOUT_STD140, "std140"},
1607         {LAYOUT_SCALAR, "scalar"},
1608     };
1609 
1610     TestGroupCase stageCases[] = {
1611         {STAGE_COMPUTE, "comp"},
1612         {STAGE_FRAGMENT, "frag"},
1613         {STAGE_VERTEX, "vert"},
1614 #if ENABLE_RAYTRACING
1615         // raygen
1616         {STAGE_RAYGEN, "rgen"},
1617 #endif
1618     };
1619 
1620     TestGroupCase offsetCases[] = {
1621         {OFFSET_ZERO, "offset_zero"},
1622         {OFFSET_NONZERO, "offset_nonzero"},
1623     };
1624 
1625     for (int setNdx = 0; setNdx < DE_LENGTH_OF_ARRAY(setCases); setNdx++)
1626     {
1627         de::MovePtr<tcu::TestCaseGroup> setGroup(new tcu::TestCaseGroup(testCtx, setCases[setNdx].name));
1628         for (int depthNdx = 0; depthNdx < DE_LENGTH_OF_ARRAY(depthCases); depthNdx++)
1629         {
1630             de::MovePtr<tcu::TestCaseGroup> depthGroup(new tcu::TestCaseGroup(testCtx, depthCases[depthNdx].name));
1631             for (int baseNdx = 0; baseNdx < DE_LENGTH_OF_ARRAY(baseCases); baseNdx++)
1632             {
1633                 de::MovePtr<tcu::TestCaseGroup> baseGroup(new tcu::TestCaseGroup(testCtx, baseCases[baseNdx].name));
1634                 for (int cvtNdx = 0; cvtNdx < DE_LENGTH_OF_ARRAY(cvtCases); cvtNdx++)
1635                 {
1636                     de::MovePtr<tcu::TestCaseGroup> cvtGroup(new tcu::TestCaseGroup(testCtx, cvtCases[cvtNdx].name));
1637                     for (int storeNdx = 0; storeNdx < DE_LENGTH_OF_ARRAY(storeCases); storeNdx++)
1638                     {
1639                         de::MovePtr<tcu::TestCaseGroup> storeGroup(
1640                             new tcu::TestCaseGroup(testCtx, storeCases[storeNdx].name));
1641                         for (int btNdx = 0; btNdx < DE_LENGTH_OF_ARRAY(btCases); btNdx++)
1642                         {
1643                             de::MovePtr<tcu::TestCaseGroup> btGroup(
1644                                 new tcu::TestCaseGroup(testCtx, btCases[btNdx].name));
1645                             for (int layoutNdx = 0; layoutNdx < DE_LENGTH_OF_ARRAY(layoutCases); layoutNdx++)
1646                             {
1647                                 de::MovePtr<tcu::TestCaseGroup> layoutGroup(
1648                                     new tcu::TestCaseGroup(testCtx, layoutCases[layoutNdx].name));
1649                                 for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases); stageNdx++)
1650                                 {
1651                                     for (int offsetNdx = 0; offsetNdx < DE_LENGTH_OF_ARRAY(offsetCases); offsetNdx++)
1652                                     {
1653                                         CaseDef c = {
1654                                             setCases[setNdx].count,                     // uint32_t set;
1655                                             depthCases[depthNdx].count,                 // uint32_t depth;
1656                                             (Base)baseCases[baseNdx].count,             // Base base;
1657                                             (Stage)stageCases[stageNdx].count,          // Stage stage;
1658                                             (Convert)cvtCases[cvtNdx].count,            // Convert convertUToPtr;
1659                                             !!storeCases[storeNdx].count,               // bool storeInLocal;
1660                                             (BufType)btCases[btNdx].count,              // BufType bufType;
1661                                             (Layout)layoutCases[layoutNdx].count,       // Layout layout;
1662                                             (MemoryOffset)offsetCases[offsetNdx].count, // Memory Offset;
1663                                         };
1664 
1665                                         // Skip more complex test cases for most descriptor sets, to reduce runtime.
1666                                         if (c.set != 3 && (c.depth == 3 || c.layout != LAYOUT_STD140))
1667                                             continue;
1668 
1669                                         // Memory offset tests are only for single buffer test cases.
1670                                         if (c.memoryOffset == OFFSET_NONZERO && c.bufType != BT_SINGLE)
1671                                             continue;
1672 
1673                                         std::ostringstream caseName;
1674                                         caseName << stageCases[stageNdx].name;
1675                                         if (c.memoryOffset == OFFSET_NONZERO)
1676                                             caseName << "_offset_nonzero";
1677 
1678                                         layoutGroup->addChild(
1679                                             new BufferAddressTestCase(testCtx, caseName.str().c_str(), c));
1680                                     }
1681                                 }
1682                                 btGroup->addChild(layoutGroup.release());
1683                             }
1684                             storeGroup->addChild(btGroup.release());
1685                         }
1686                         cvtGroup->addChild(storeGroup.release());
1687                     }
1688                     baseGroup->addChild(cvtGroup.release());
1689                 }
1690                 depthGroup->addChild(baseGroup.release());
1691             }
1692             setGroup->addChild(depthGroup.release());
1693         }
1694         group->addChild(setGroup.release());
1695     }
1696 
1697     de::MovePtr<tcu::TestCaseGroup> capGroup(new tcu::TestCaseGroup(testCtx, "capture_replay_stress"));
1698     for (uint32_t i = 0; i < 10; ++i)
1699     {
1700         capGroup->addChild(new CaptureReplayTestCase(testCtx, (std::string("seed_") + de::toString(i)).c_str(), i));
1701     }
1702     group->addChild(capGroup.release());
1703     return group.release();
1704 }
1705 
1706 } // namespace BindingModel
1707 } // namespace vkt
1708