1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktSparseResourcesImageMemoryAliasing.cpp
21  * \brief Sparse image memory aliasing tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSparseResourcesImageMemoryAliasing.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkTypeUtil.hpp"
40 #include "vkCmdUtil.hpp"
41 #include "vkObjUtil.hpp"
42 
43 #include "deStringUtil.hpp"
44 #include "deUniquePtr.hpp"
45 #include "deSharedPtr.hpp"
46 
47 #include "tcuTexture.hpp"
48 #include "tcuTextureUtil.hpp"
49 #include "tcuTexVerifierUtil.hpp"
50 
51 #include <deMath.h>
52 #include <string>
53 #include <vector>
54 
55 using namespace vk;
56 
57 namespace vkt
58 {
59 namespace sparse
60 {
61 namespace
62 {
63 
64 const uint32_t MODULO_DIVISOR = 127;
65 
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)66 const std::string getCoordStr(const ImageType imageType, const std::string &x, const std::string &y,
67                               const std::string &z)
68 {
69     switch (imageType)
70     {
71     case IMAGE_TYPE_1D:
72     case IMAGE_TYPE_BUFFER:
73         return x;
74 
75     case IMAGE_TYPE_1D_ARRAY:
76     case IMAGE_TYPE_2D:
77         return "ivec2(" + x + "," + y + ")";
78 
79     case IMAGE_TYPE_2D_ARRAY:
80     case IMAGE_TYPE_3D:
81     case IMAGE_TYPE_CUBE:
82     case IMAGE_TYPE_CUBE_ARRAY:
83         return "ivec3(" + x + "," + y + "," + z + ")";
84 
85     default:
86         DE_FATAL("Unexpected image type");
87         return "";
88     }
89 }
90 
91 class ImageSparseMemoryAliasingCase : public TestCase
92 {
93 public:
94     ImageSparseMemoryAliasingCase(tcu::TestContext &testCtx, const std::string &name, const ImageType imageType,
95                                   const tcu::UVec3 &imageSize, const VkFormat format,
96                                   const glu::GLSLVersion glslVersion, const bool useDeviceGroups);
97 
98     void initPrograms(SourceCollections &sourceCollections) const;
99     TestInstance *createInstance(Context &context) const;
100     virtual void checkSupport(Context &context) const;
101 
102 private:
103     const bool m_useDeviceGroups;
104     const ImageType m_imageType;
105     const tcu::UVec3 m_imageSize;
106     const VkFormat m_format;
107     const glu::GLSLVersion m_glslVersion;
108 };
109 
ImageSparseMemoryAliasingCase(tcu::TestContext & testCtx,const std::string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const VkFormat format,const glu::GLSLVersion glslVersion,const bool useDeviceGroups)110 ImageSparseMemoryAliasingCase::ImageSparseMemoryAliasingCase(tcu::TestContext &testCtx, const std::string &name,
111                                                              const ImageType imageType, const tcu::UVec3 &imageSize,
112                                                              const VkFormat format, const glu::GLSLVersion glslVersion,
113                                                              const bool useDeviceGroups)
114     : TestCase(testCtx, name)
115     , m_useDeviceGroups(useDeviceGroups)
116     , m_imageType(imageType)
117     , m_imageSize(imageSize)
118     , m_format(format)
119     , m_glslVersion(glslVersion)
120 {
121 }
122 
checkSupport(Context & context) const123 void ImageSparseMemoryAliasingCase::checkSupport(Context &context) const
124 {
125     const InstanceInterface &instance     = context.getInstanceInterface();
126     const VkPhysicalDevice physicalDevice = context.getPhysicalDevice();
127 
128     context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_ALIASED);
129 
130     // Check if image size does not exceed device limits
131     if (!isImageSizeSupported(instance, physicalDevice, m_imageType, m_imageSize))
132         TCU_THROW(NotSupportedError, "Image size not supported for device");
133 
134     // Check if device supports sparse operations for image type
135     if (!checkSparseSupportForImageType(instance, physicalDevice, m_imageType))
136         TCU_THROW(NotSupportedError, "Sparse residency for image type is not supported");
137 
138     if (formatIsR64(m_format))
139     {
140         context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
141 
142         if (context.getShaderImageAtomicInt64FeaturesEXT().shaderImageInt64Atomics == VK_FALSE)
143         {
144             TCU_THROW(NotSupportedError, "shaderImageInt64Atomics is not supported");
145         }
146 
147         if (context.getShaderImageAtomicInt64FeaturesEXT().sparseImageInt64Atomics == VK_FALSE)
148         {
149             TCU_THROW(NotSupportedError, "sparseImageInt64Atomics is not supported for device");
150         }
151     }
152 }
153 
154 class ImageSparseMemoryAliasingInstance : public SparseResourcesBaseInstance
155 {
156 public:
157     ImageSparseMemoryAliasingInstance(Context &context, const ImageType imageType, const tcu::UVec3 &imageSize,
158                                       const VkFormat format, const bool useDeviceGroups);
159 
160     tcu::TestStatus iterate(void);
161 
162 private:
163     const bool m_useDeviceGroups;
164     const ImageType m_imageType;
165     const tcu::UVec3 m_imageSize;
166     const VkFormat m_format;
167 };
168 
ImageSparseMemoryAliasingInstance(Context & context,const ImageType imageType,const tcu::UVec3 & imageSize,const VkFormat format,const bool useDeviceGroups)169 ImageSparseMemoryAliasingInstance::ImageSparseMemoryAliasingInstance(Context &context, const ImageType imageType,
170                                                                      const tcu::UVec3 &imageSize, const VkFormat format,
171                                                                      const bool useDeviceGroups)
172     : SparseResourcesBaseInstance(context, useDeviceGroups)
173     , m_useDeviceGroups(useDeviceGroups)
174     , m_imageType(imageType)
175     , m_imageSize(imageSize)
176     , m_format(format)
177 {
178 }
179 
iterate(void)180 tcu::TestStatus ImageSparseMemoryAliasingInstance::iterate(void)
181 {
182     const float epsilon               = 1e-5f;
183     const InstanceInterface &instance = m_context.getInstanceInterface();
184 
185     {
186         // Create logical device supporting both sparse and compute queues
187         QueueRequirementsVec queueRequirements;
188         queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
189         queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
190 
191         createDeviceSupportingQueues(queueRequirements, formatIsR64(m_format));
192     }
193 
194     const VkPhysicalDevice physicalDevice  = getPhysicalDevice();
195     const tcu::UVec3 maxWorkGroupSize      = tcu::UVec3(128u, 128u, 64u);
196     const tcu::UVec3 maxWorkGroupCount     = tcu::UVec3(65535u, 65535u, 65535u);
197     const uint32_t maxWorkGroupInvocations = 128u;
198     VkImageCreateInfo imageSparseInfo;
199     std::vector<DeviceMemorySp> deviceMemUniquePtrVec;
200 
201     //vsk getting queues should be outside the loop
202     //see these in all image files
203 
204     const DeviceInterface &deviceInterface          = getDeviceInterface();
205     const Queue &sparseQueue                        = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
206     const Queue &computeQueue                       = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
207     const PlanarFormatDescription formatDescription = getPlanarFormatDescription(m_format);
208 
209     // Go through all physical devices
210     for (uint32_t physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
211     {
212         const uint32_t firstDeviceID  = physDevID;
213         const uint32_t secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
214 
215         imageSparseInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
216         imageSparseInfo.pNext = DE_NULL;
217         imageSparseInfo.flags = VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_SPARSE_ALIASED_BIT |
218                                 VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
219         imageSparseInfo.imageType     = mapImageType(m_imageType);
220         imageSparseInfo.format        = m_format;
221         imageSparseInfo.extent        = makeExtent3D(getLayerSize(m_imageType, m_imageSize));
222         imageSparseInfo.arrayLayers   = getNumLayers(m_imageType, m_imageSize);
223         imageSparseInfo.samples       = VK_SAMPLE_COUNT_1_BIT;
224         imageSparseInfo.tiling        = VK_IMAGE_TILING_OPTIMAL;
225         imageSparseInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
226         imageSparseInfo.usage =
227             VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
228         imageSparseInfo.sharingMode           = VK_SHARING_MODE_EXCLUSIVE;
229         imageSparseInfo.queueFamilyIndexCount = 0u;
230         imageSparseInfo.pQueueFamilyIndices   = DE_NULL;
231 
232         if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
233             imageSparseInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
234 
235         // Check if device supports sparse operations for image format
236         if (!checkSparseSupportForImageFormat(instance, physicalDevice, imageSparseInfo))
237             TCU_THROW(NotSupportedError, "The image format does not support sparse operations");
238 
239         {
240             // Assign maximum allowed mipmap levels to image
241             VkImageFormatProperties imageFormatProperties;
242             if (instance.getPhysicalDeviceImageFormatProperties(
243                     physicalDevice, imageSparseInfo.format, imageSparseInfo.imageType, imageSparseInfo.tiling,
244                     imageSparseInfo.usage, imageSparseInfo.flags,
245                     &imageFormatProperties) == VK_ERROR_FORMAT_NOT_SUPPORTED)
246             {
247                 TCU_THROW(NotSupportedError, "Image format does not support sparse operations");
248             }
249 
250             imageSparseInfo.mipLevels =
251                 getMipmapCount(m_format, formatDescription, imageFormatProperties, imageSparseInfo.extent);
252         }
253 
254         // Create sparse image
255         const Unique<VkImage> imageRead(createImage(deviceInterface, getDevice(), &imageSparseInfo));
256         const Unique<VkImage> imageWrite(createImage(deviceInterface, getDevice(), &imageSparseInfo));
257 
258         // Create semaphores to synchronize sparse binding operations with other operations on the sparse images
259         const Unique<VkSemaphore> memoryBindSemaphoreTransfer(createSemaphore(deviceInterface, getDevice()));
260         const Unique<VkSemaphore> memoryBindSemaphoreCompute(createSemaphore(deviceInterface, getDevice()));
261 
262         const VkSemaphore imageMemoryBindSemaphores[] = {memoryBindSemaphoreTransfer.get(),
263                                                          memoryBindSemaphoreCompute.get()};
264 
265         std::vector<VkSparseImageMemoryRequirements> sparseMemoryRequirements;
266 
267         {
268             // Get sparse image general memory requirements
269             const VkMemoryRequirements imageMemoryRequirements =
270                 getImageMemoryRequirements(deviceInterface, getDevice(), *imageRead);
271 
272             // Check if required image memory size does not exceed device limits
273             if (imageMemoryRequirements.size >
274                 getPhysicalDeviceProperties(instance, getPhysicalDevice(secondDeviceID)).limits.sparseAddressSpaceSize)
275                 TCU_THROW(NotSupportedError, "Required memory size for sparse resource exceeds device limits");
276 
277             DE_ASSERT((imageMemoryRequirements.size % imageMemoryRequirements.alignment) == 0);
278 
279             const uint32_t memoryType = findMatchingMemoryType(instance, getPhysicalDevice(secondDeviceID),
280                                                                imageMemoryRequirements, MemoryRequirement::Any);
281 
282             if (memoryType == NO_MATCH_FOUND)
283                 return tcu::TestStatus::fail("No matching memory type found");
284 
285             if (firstDeviceID != secondDeviceID)
286             {
287                 VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
288                 const uint32_t heapIndex =
289                     getHeapIndexForMemoryType(instance, getPhysicalDevice(secondDeviceID), memoryType);
290                 deviceInterface.getDeviceGroupPeerMemoryFeatures(getDevice(), heapIndex, firstDeviceID, secondDeviceID,
291                                                                  &peerMemoryFeatureFlags);
292 
293                 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT) == 0) ||
294                     ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_DST_BIT) == 0) ||
295                     ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT) == 0))
296                 {
297                     TCU_THROW(NotSupportedError, "Peer memory does not support COPY_SRC, COPY_DST, and GENERIC_DST");
298                 }
299             }
300 
301             // Get sparse image sparse memory requirements
302             sparseMemoryRequirements = getImageSparseMemoryRequirements(deviceInterface, getDevice(), *imageRead);
303 
304             DE_ASSERT(sparseMemoryRequirements.size() != 0);
305 
306             std::vector<VkSparseImageMemoryBind> imageResidencyMemoryBinds;
307             std::vector<VkSparseMemoryBind> imageReadMipTailBinds;
308             std::vector<VkSparseMemoryBind> imageWriteMipTailBinds;
309 
310             for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
311             {
312                 const VkImageAspectFlags aspect =
313                     (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
314                 const uint32_t aspectIndex = getSparseAspectRequirementsIndex(sparseMemoryRequirements, aspect);
315 
316                 if (aspectIndex == NO_MATCH_FOUND)
317                     TCU_THROW(NotSupportedError, "Not supported image aspect");
318 
319                 VkSparseImageMemoryRequirements aspectRequirements = sparseMemoryRequirements[aspectIndex];
320 
321                 DE_ASSERT((aspectRequirements.imageMipTailSize % imageMemoryRequirements.alignment) == 0);
322 
323                 VkExtent3D imageGranularity = aspectRequirements.formatProperties.imageGranularity;
324 
325                 // Bind memory for each layer
326                 for (uint32_t layerNdx = 0; layerNdx < imageSparseInfo.arrayLayers; ++layerNdx)
327                 {
328                     for (uint32_t mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
329                     {
330                         const VkExtent3D mipExtent =
331                             getPlaneExtent(formatDescription, imageSparseInfo.extent, planeNdx, mipLevelNdx);
332                         const tcu::UVec3 sparseBlocks        = alignedDivide(mipExtent, imageGranularity);
333                         const uint32_t numSparseBlocks       = sparseBlocks.x() * sparseBlocks.y() * sparseBlocks.z();
334                         const VkImageSubresource subresource = {aspect, mipLevelNdx, layerNdx};
335 
336                         const VkSparseImageMemoryBind imageMemoryBind = makeSparseImageMemoryBind(
337                             deviceInterface, getDevice(), imageMemoryRequirements.alignment * numSparseBlocks,
338                             memoryType, subresource, makeOffset3D(0u, 0u, 0u), mipExtent);
339 
340                         deviceMemUniquePtrVec.push_back(makeVkSharedPtr(
341                             Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMemoryBind.memory),
342                                                  Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
343 
344                         imageResidencyMemoryBinds.push_back(imageMemoryBind);
345                     }
346 
347                     if (!(aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) &&
348                         aspectRequirements.imageMipTailFirstLod < imageSparseInfo.mipLevels)
349                     {
350                         const VkSparseMemoryBind imageReadMipTailMemoryBind = makeSparseMemoryBind(
351                             deviceInterface, getDevice(), aspectRequirements.imageMipTailSize, memoryType,
352                             aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
353 
354                         deviceMemUniquePtrVec.push_back(makeVkSharedPtr(
355                             Move<VkDeviceMemory>(check<VkDeviceMemory>(imageReadMipTailMemoryBind.memory),
356                                                  Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
357 
358                         imageReadMipTailBinds.push_back(imageReadMipTailMemoryBind);
359 
360                         const VkSparseMemoryBind imageWriteMipTailMemoryBind = makeSparseMemoryBind(
361                             deviceInterface, getDevice(), aspectRequirements.imageMipTailSize, memoryType,
362                             aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
363 
364                         deviceMemUniquePtrVec.push_back(makeVkSharedPtr(
365                             Move<VkDeviceMemory>(check<VkDeviceMemory>(imageWriteMipTailMemoryBind.memory),
366                                                  Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
367 
368                         imageWriteMipTailBinds.push_back(imageWriteMipTailMemoryBind);
369                     }
370                 }
371 
372                 if ((aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) &&
373                     aspectRequirements.imageMipTailFirstLod < imageSparseInfo.mipLevels)
374                 {
375                     const VkSparseMemoryBind imageReadMipTailMemoryBind =
376                         makeSparseMemoryBind(deviceInterface, getDevice(), aspectRequirements.imageMipTailSize,
377                                              memoryType, aspectRequirements.imageMipTailOffset);
378 
379                     deviceMemUniquePtrVec.push_back(makeVkSharedPtr(
380                         Move<VkDeviceMemory>(check<VkDeviceMemory>(imageReadMipTailMemoryBind.memory),
381                                              Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
382 
383                     imageReadMipTailBinds.push_back(imageReadMipTailMemoryBind);
384 
385                     const VkSparseMemoryBind imageWriteMipTailMemoryBind =
386                         makeSparseMemoryBind(deviceInterface, getDevice(), aspectRequirements.imageMipTailSize,
387                                              memoryType, aspectRequirements.imageMipTailOffset);
388 
389                     deviceMemUniquePtrVec.push_back(makeVkSharedPtr(
390                         Move<VkDeviceMemory>(check<VkDeviceMemory>(imageWriteMipTailMemoryBind.memory),
391                                              Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
392 
393                     imageWriteMipTailBinds.push_back(imageWriteMipTailMemoryBind);
394                 }
395             }
396 
397             const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo = {
398                 VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO, //VkStructureType sType;
399                 DE_NULL,                                         //const void* pNext;
400                 firstDeviceID,                                   //uint32_t resourceDeviceIndex;
401                 secondDeviceID,                                  //uint32_t memoryDeviceIndex;
402             };
403 
404             VkBindSparseInfo bindSparseInfo = {
405                 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,                    //VkStructureType sType;
406                 m_useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL, //const void* pNext;
407                 0u,                                                    //uint32_t waitSemaphoreCount;
408                 DE_NULL,                                               //const VkSemaphore* pWaitSemaphores;
409                 0u,                                                    //uint32_t bufferBindCount;
410                 DE_NULL,                  //const VkSparseBufferMemoryBindInfo* pBufferBinds;
411                 0u,                       //uint32_t imageOpaqueBindCount;
412                 DE_NULL,                  //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
413                 0u,                       //uint32_t imageBindCount;
414                 DE_NULL,                  //const VkSparseImageMemoryBindInfo* pImageBinds;
415                 2u,                       //uint32_t signalSemaphoreCount;
416                 imageMemoryBindSemaphores //const VkSemaphore* pSignalSemaphores;
417             };
418 
419             VkSparseImageMemoryBindInfo imageResidencyBindInfo[2];
420             VkSparseImageOpaqueMemoryBindInfo imageMipTailBindInfo[2];
421 
422             if (imageResidencyMemoryBinds.size() > 0)
423             {
424                 imageResidencyBindInfo[0].image     = *imageRead;
425                 imageResidencyBindInfo[0].bindCount = static_cast<uint32_t>(imageResidencyMemoryBinds.size());
426                 imageResidencyBindInfo[0].pBinds    = imageResidencyMemoryBinds.data();
427 
428                 imageResidencyBindInfo[1].image     = *imageWrite;
429                 imageResidencyBindInfo[1].bindCount = static_cast<uint32_t>(imageResidencyMemoryBinds.size());
430                 imageResidencyBindInfo[1].pBinds    = imageResidencyMemoryBinds.data();
431 
432                 bindSparseInfo.imageBindCount = 2u;
433                 bindSparseInfo.pImageBinds    = imageResidencyBindInfo;
434             }
435 
436             if (imageReadMipTailBinds.size() > 0)
437             {
438                 imageMipTailBindInfo[0].image     = *imageRead;
439                 imageMipTailBindInfo[0].bindCount = static_cast<uint32_t>(imageReadMipTailBinds.size());
440                 imageMipTailBindInfo[0].pBinds    = imageReadMipTailBinds.data();
441 
442                 imageMipTailBindInfo[1].image     = *imageWrite;
443                 imageMipTailBindInfo[1].bindCount = static_cast<uint32_t>(imageWriteMipTailBinds.size());
444                 imageMipTailBindInfo[1].pBinds    = imageWriteMipTailBinds.data();
445 
446                 bindSparseInfo.imageOpaqueBindCount = 2u;
447                 bindSparseInfo.pImageOpaqueBinds    = imageMipTailBindInfo;
448             }
449 
450             // Submit sparse bind commands for execution
451             VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
452         }
453 
454         uint32_t imageSizeInBytes = 0;
455         std::vector<std::vector<uint32_t>> planeOffsets(imageSparseInfo.mipLevels);
456         std::vector<std::vector<uint32_t>> planeRowPitches(imageSparseInfo.mipLevels);
457 
458         for (uint32_t mipmapNdx = 0; mipmapNdx < imageSparseInfo.mipLevels; ++mipmapNdx)
459         {
460             planeOffsets[mipmapNdx].resize(formatDescription.numPlanes, 0);
461             planeRowPitches[mipmapNdx].resize(formatDescription.numPlanes, 0);
462         }
463 
464         for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
465         {
466             for (uint32_t mipmapNdx = 0; mipmapNdx < imageSparseInfo.mipLevels; ++mipmapNdx)
467             {
468                 const tcu::UVec3 gridSize         = getShaderGridSize(m_imageType, m_imageSize, mipmapNdx);
469                 planeOffsets[mipmapNdx][planeNdx] = imageSizeInBytes;
470                 const uint32_t planeW =
471                     gridSize.x() / (formatDescription.blockWidth * formatDescription.planes[planeNdx].widthDivisor);
472                 planeRowPitches[mipmapNdx][planeNdx] = formatDescription.planes[planeNdx].elementSizeBytes * planeW;
473                 imageSizeInBytes +=
474                     getImageMipLevelSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, formatDescription,
475                                                 planeNdx, mipmapNdx, BUFFER_IMAGE_COPY_OFFSET_GRANULARITY);
476             }
477         }
478 
479         std::vector<VkBufferImageCopy> bufferImageCopy(formatDescription.numPlanes * imageSparseInfo.mipLevels);
480         {
481             uint32_t bufferOffset = 0;
482 
483             for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
484             {
485                 const VkImageAspectFlags aspect =
486                     (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
487 
488                 for (uint32_t mipmapNdx = 0; mipmapNdx < imageSparseInfo.mipLevels; ++mipmapNdx)
489                 {
490                     bufferImageCopy[planeNdx * imageSparseInfo.mipLevels + mipmapNdx] = {
491                         bufferOffset, // VkDeviceSize bufferOffset;
492                         0u,           // uint32_t bufferRowLength;
493                         0u,           // uint32_t bufferImageHeight;
494                         makeImageSubresourceLayers(
495                             aspect, mipmapNdx, 0u,
496                             imageSparseInfo.arrayLayers), // VkImageSubresourceLayers imageSubresource;
497                         makeOffset3D(0, 0, 0),            // VkOffset3D imageOffset;
498                         vk::getPlaneExtent(formatDescription, imageSparseInfo.extent, planeNdx,
499                                            mipmapNdx) // VkExtent3D imageExtent;
500                     };
501                     bufferOffset += getImageMipLevelSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers,
502                                                                 formatDescription, planeNdx, mipmapNdx,
503                                                                 BUFFER_IMAGE_COPY_OFFSET_GRANULARITY);
504                 }
505             }
506         }
507 
508         // Create command buffer for compute and transfer operations
509         const Unique<VkCommandPool> commandPool(
510             makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
511         const Unique<VkCommandBuffer> commandBuffer(
512             allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
513 
514         // Start recording commands
515         beginCommandBuffer(deviceInterface, *commandBuffer);
516 
517         const VkBufferCreateInfo inputBufferCreateInfo =
518             makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
519         const Unique<VkBuffer> inputBuffer(createBuffer(deviceInterface, getDevice(), &inputBufferCreateInfo));
520         const de::UniquePtr<Allocation> inputBufferAlloc(
521             bindBuffer(deviceInterface, getDevice(), getAllocator(), *inputBuffer, MemoryRequirement::HostVisible));
522 
523         std::vector<uint8_t> referenceData(imageSizeInBytes);
524 
525         for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
526             for (uint32_t mipmapNdx = 0u; mipmapNdx < imageSparseInfo.mipLevels; ++mipmapNdx)
527             {
528                 const uint32_t mipLevelSizeInBytes =
529                     getImageMipLevelSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, formatDescription,
530                                                 planeNdx, mipmapNdx, BUFFER_IMAGE_COPY_OFFSET_GRANULARITY);
531                 const uint32_t bufferOffset = static_cast<uint32_t>(
532                     bufferImageCopy[planeNdx * imageSparseInfo.mipLevels + mipmapNdx].bufferOffset);
533 
534                 deMemset(&referenceData[bufferOffset], mipmapNdx + 1u, mipLevelSizeInBytes);
535             }
536 
537         deMemcpy(inputBufferAlloc->getHostPtr(), referenceData.data(), imageSizeInBytes);
538 
539         flushAlloc(deviceInterface, getDevice(), *inputBufferAlloc);
540 
541         {
542             const VkBufferMemoryBarrier inputBufferBarrier = makeBufferMemoryBarrier(
543                 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *inputBuffer, 0u, imageSizeInBytes);
544 
545             deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT,
546                                                VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &inputBufferBarrier,
547                                                0u, DE_NULL);
548         }
549 
550         {
551             std::vector<VkImageMemoryBarrier> imageSparseTransferDstBarriers;
552 
553             for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
554             {
555                 const VkImageAspectFlags aspect =
556                     (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
557 
558                 imageSparseTransferDstBarriers.emplace_back(makeImageMemoryBarrier(
559                     0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
560                     *imageRead,
561                     makeImageSubresourceRange(aspect, 0u, imageSparseInfo.mipLevels, 0u, imageSparseInfo.arrayLayers),
562                     sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? sparseQueue.queueFamilyIndex :
563                                                                                     VK_QUEUE_FAMILY_IGNORED,
564                     sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? computeQueue.queueFamilyIndex :
565                                                                                     VK_QUEUE_FAMILY_IGNORED));
566             }
567 
568             deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
569                                                VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL,
570                                                static_cast<uint32_t>(imageSparseTransferDstBarriers.size()),
571                                                imageSparseTransferDstBarriers.data());
572         }
573 
574         deviceInterface.cmdCopyBufferToImage(*commandBuffer, *inputBuffer, *imageRead,
575                                              VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
576                                              static_cast<uint32_t>(bufferImageCopy.size()), bufferImageCopy.data());
577 
578         {
579             std::vector<VkImageMemoryBarrier> imageSparseTransferSrcBarriers;
580 
581             for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
582             {
583                 const VkImageAspectFlags aspect =
584                     (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
585 
586                 imageSparseTransferSrcBarriers.emplace_back(makeImageMemoryBarrier(
587                     VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
588                     VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *imageRead,
589                     makeImageSubresourceRange(aspect, 0u, imageSparseInfo.mipLevels, 0u, imageSparseInfo.arrayLayers)));
590             }
591 
592             deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
593                                                VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL,
594                                                static_cast<uint32_t>(imageSparseTransferSrcBarriers.size()),
595                                                imageSparseTransferSrcBarriers.data());
596         }
597 
598         {
599             std::vector<VkImageMemoryBarrier> imageSparseShaderStorageBarriers;
600 
601             for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
602             {
603                 const VkImageAspectFlags aspect =
604                     (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
605 
606                 imageSparseShaderStorageBarriers.emplace_back(makeImageMemoryBarrier(
607                     0u, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *imageWrite,
608                     makeImageSubresourceRange(aspect, 0u, imageSparseInfo.mipLevels, 0u, imageSparseInfo.arrayLayers)));
609             }
610 
611             deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
612                                                VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL,
613                                                static_cast<uint32_t>(imageSparseShaderStorageBarriers.size()),
614                                                imageSparseShaderStorageBarriers.data());
615         }
616 
617         // Create descriptor set layout
618         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
619             DescriptorSetLayoutBuilder()
620                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
621                 .build(deviceInterface, getDevice()));
622 
623         Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
624 
625         Unique<VkDescriptorPool> descriptorPool(
626             DescriptorPoolBuilder()
627                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, imageSparseInfo.mipLevels)
628                 .build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
629                        imageSparseInfo.mipLevels));
630 
631         typedef de::SharedPtr<Unique<VkImageView>> SharedVkImageView;
632         std::vector<SharedVkImageView> imageViews;
633         imageViews.resize(imageSparseInfo.mipLevels);
634 
635         typedef de::SharedPtr<Unique<VkDescriptorSet>> SharedVkDescriptorSet;
636         std::vector<SharedVkDescriptorSet> descriptorSets;
637         descriptorSets.resize(imageSparseInfo.mipLevels);
638 
639         typedef de::SharedPtr<Unique<VkPipeline>> SharedVkPipeline;
640         std::vector<SharedVkPipeline> computePipelines;
641         computePipelines.resize(imageSparseInfo.mipLevels);
642 
643         for (uint32_t mipLevelNdx = 0u; mipLevelNdx < imageSparseInfo.mipLevels; ++mipLevelNdx)
644         {
645             std::ostringstream name;
646             name << "comp" << mipLevelNdx;
647 
648             // Create and bind compute pipeline
649             Unique<VkShaderModule> shaderModule(createShaderModule(
650                 deviceInterface, getDevice(), m_context.getBinaryCollection().get(name.str()), DE_NULL));
651 
652             computePipelines[mipLevelNdx] =
653                 makeVkSharedPtr(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
654             VkPipeline computePipeline = **computePipelines[mipLevelNdx];
655 
656             deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline);
657 
658             // Create and bind descriptor set
659             descriptorSets[mipLevelNdx] =
660                 makeVkSharedPtr(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
661             VkDescriptorSet descriptorSet = **descriptorSets[mipLevelNdx];
662 
663             // Select which mipmap level to bind
664             const VkImageSubresourceRange subresourceRange =
665                 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, mipLevelNdx, 1u, 0u, imageSparseInfo.arrayLayers);
666 
667             imageViews[mipLevelNdx] =
668                 makeVkSharedPtr(makeImageView(deviceInterface, getDevice(), *imageWrite, mapImageViewType(m_imageType),
669                                               imageSparseInfo.format, subresourceRange));
670             VkImageView imageView = **imageViews[mipLevelNdx];
671 
672             const VkDescriptorImageInfo descriptorImageSparseInfo =
673                 makeDescriptorImageInfo(DE_NULL, imageView, VK_IMAGE_LAYOUT_GENERAL);
674 
675             DescriptorSetUpdateBuilder()
676                 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
677                              VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageSparseInfo)
678                 .update(deviceInterface, getDevice());
679 
680             deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u,
681                                                   1u, &descriptorSet, 0u, DE_NULL);
682 
683             const tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize, mipLevelNdx);
684             const uint32_t xWorkGroupSize =
685                 std::min(std::min(gridSize.x(), maxWorkGroupSize.x()), maxWorkGroupInvocations);
686             const uint32_t yWorkGroupSize =
687                 std::min(std::min(gridSize.y(), maxWorkGroupSize.y()), maxWorkGroupInvocations / xWorkGroupSize);
688             const uint32_t zWorkGroupSize = std::min(std::min(gridSize.z(), maxWorkGroupSize.z()),
689                                                      maxWorkGroupInvocations / (xWorkGroupSize * yWorkGroupSize));
690 
691             const uint32_t xWorkGroupCount = gridSize.x() / xWorkGroupSize + (gridSize.x() % xWorkGroupSize ? 1u : 0u);
692             const uint32_t yWorkGroupCount = gridSize.y() / yWorkGroupSize + (gridSize.y() % yWorkGroupSize ? 1u : 0u);
693             const uint32_t zWorkGroupCount = gridSize.z() / zWorkGroupSize + (gridSize.z() % zWorkGroupSize ? 1u : 0u);
694 
695             if (maxWorkGroupCount.x() < xWorkGroupCount || maxWorkGroupCount.y() < yWorkGroupCount ||
696                 maxWorkGroupCount.z() < zWorkGroupCount)
697             {
698                 TCU_THROW(NotSupportedError, "Image size is not supported");
699             }
700 
701             deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
702         }
703 
704         {
705             const VkMemoryBarrier memoryBarrier =
706                 makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
707 
708             deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
709                                                VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 1u, &memoryBarrier, 0u, DE_NULL, 0u,
710                                                DE_NULL);
711         }
712 
713         const VkBufferCreateInfo outputBufferCreateInfo =
714             makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
715         const Unique<VkBuffer> outputBuffer(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
716         const de::UniquePtr<Allocation> outputBufferAlloc(
717             bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
718 
719         deviceInterface.cmdCopyImageToBuffer(*commandBuffer, *imageRead, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
720                                              *outputBuffer, static_cast<uint32_t>(bufferImageCopy.size()),
721                                              bufferImageCopy.data());
722 
723         {
724             const VkBufferMemoryBarrier outputBufferBarrier = makeBufferMemoryBarrier(
725                 VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0u, imageSizeInBytes);
726 
727             deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
728                                                VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier,
729                                                0u, DE_NULL);
730         }
731 
732         // End recording commands
733         endCommandBuffer(deviceInterface, *commandBuffer);
734 
735         const VkPipelineStageFlags stageBits[] = {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT};
736 
737         // Submit commands for execution and wait for completion
738         submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 2u,
739                               imageMemoryBindSemaphores, stageBits, 0, DE_NULL, m_useDeviceGroups, firstDeviceID);
740 
741         // Retrieve data from buffer to host memory
742         invalidateAlloc(deviceInterface, getDevice(), *outputBufferAlloc);
743 
744         uint8_t *outputData = static_cast<uint8_t *>(outputBufferAlloc->getHostPtr());
745 
746         std::vector<std::vector<void *>> planePointers(imageSparseInfo.mipLevels);
747 
748         for (uint32_t mipmapNdx = 0; mipmapNdx < imageSparseInfo.mipLevels; ++mipmapNdx)
749             planePointers[mipmapNdx].resize(formatDescription.numPlanes);
750 
751         for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
752             for (uint32_t mipmapNdx = 0; mipmapNdx < imageSparseInfo.mipLevels; ++mipmapNdx)
753                 planePointers[mipmapNdx][planeNdx] =
754                     outputData + static_cast<size_t>(planeOffsets[mipmapNdx][planeNdx]);
755 
756         // Wait for sparse queue to become idle
757         deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
758 
759         for (uint32_t channelNdx = 0; channelNdx < 4; ++channelNdx)
760         {
761             if (!formatDescription.hasChannelNdx(channelNdx))
762                 continue;
763 
764             uint32_t planeNdx = formatDescription.channels[channelNdx].planeNdx;
765             const VkImageAspectFlags aspect =
766                 (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
767             const uint32_t aspectIndex = getSparseAspectRequirementsIndex(sparseMemoryRequirements, aspect);
768 
769             if (aspectIndex == NO_MATCH_FOUND)
770                 TCU_THROW(NotSupportedError, "Not supported image aspect");
771 
772             VkSparseImageMemoryRequirements aspectRequirements = sparseMemoryRequirements[aspectIndex];
773 
774             for (uint32_t mipmapNdx = 0; mipmapNdx < aspectRequirements.imageMipTailFirstLod; ++mipmapNdx)
775             {
776                 const tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize, mipmapNdx);
777                 const tcu::ConstPixelBufferAccess pixelBuffer =
778                     vk::getChannelAccess(formatDescription, gridSize, planeRowPitches[mipmapNdx].data(),
779                                          (const void *const *)planePointers[mipmapNdx].data(), channelNdx);
780                 tcu::IVec3 pixelDivider = pixelBuffer.getDivider();
781 
782                 for (uint32_t offsetZ = 0u; offsetZ < gridSize.z(); ++offsetZ)
783                     for (uint32_t offsetY = 0u; offsetY < gridSize.y(); ++offsetY)
784                         for (uint32_t offsetX = 0u; offsetX < gridSize.x(); ++offsetX)
785                         {
786                             const uint32_t index =
787                                 offsetX + gridSize.x() * offsetY + gridSize.x() * gridSize.y() * offsetZ;
788                             uint32_t iReferenceValue;
789                             float fReferenceValue;
790                             float acceptableError = epsilon;
791 
792                             switch (channelNdx)
793                             {
794                             case 0:
795                             case 1:
796                             case 2:
797                                 iReferenceValue = index % MODULO_DIVISOR;
798                                 fReferenceValue =
799                                     static_cast<float>(iReferenceValue) / static_cast<float>(MODULO_DIVISOR);
800                                 break;
801                             case 3:
802                                 iReferenceValue = 1u;
803                                 fReferenceValue = 1.f;
804                                 break;
805                             default:
806                                 DE_FATAL("Unexpected channel index");
807                                 break;
808                             }
809 
810                             switch (formatDescription.channels[channelNdx].type)
811                             {
812                             case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
813                             case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
814                             {
815                                 const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(
816                                     offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
817 
818                                 if (outputValue.x() != iReferenceValue)
819                                     return tcu::TestStatus::fail("Failed");
820 
821                                 break;
822                             }
823                             case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
824                             case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
825                             {
826                                 float fixedPointError = tcu::TexVerifierUtil::computeFixedPointError(
827                                     formatDescription.channels[channelNdx].sizeBits);
828                                 acceptableError += fixedPointError;
829                                 const tcu::Vec4 outputValue = pixelBuffer.getPixel(
830                                     offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
831 
832                                 if (deAbs(outputValue.x() - fReferenceValue) > acceptableError)
833                                     return tcu::TestStatus::fail("Failed");
834 
835                                 break;
836                             }
837                             case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
838                             {
839                                 const tcu::Vec4 outputValue = pixelBuffer.getPixel(
840                                     offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
841 
842                                 if (deAbs(outputValue.x() - fReferenceValue) > acceptableError)
843                                     return tcu::TestStatus::fail("Failed");
844 
845                                 break;
846                             }
847                             default:
848                                 DE_FATAL("Unexpected channel type");
849                                 break;
850                             }
851                         }
852             }
853 
854             for (uint32_t mipmapNdx = aspectRequirements.imageMipTailFirstLod; mipmapNdx < imageSparseInfo.mipLevels;
855                  ++mipmapNdx)
856             {
857                 const uint32_t mipLevelSizeInBytes = getImageMipLevelSizeInBytes(
858                     imageSparseInfo.extent, imageSparseInfo.arrayLayers, formatDescription, planeNdx, mipmapNdx);
859                 const uint32_t bufferOffset = static_cast<uint32_t>(
860                     bufferImageCopy[planeNdx * imageSparseInfo.mipLevels + mipmapNdx].bufferOffset);
861 
862                 if (deMemCmp(outputData + bufferOffset, &referenceData[bufferOffset], mipLevelSizeInBytes) != 0)
863                     return tcu::TestStatus::fail("Failed");
864             }
865         }
866     }
867 
868     return tcu::TestStatus::pass("Passed");
869 }
870 
initPrograms(SourceCollections & sourceCollections) const871 void ImageSparseMemoryAliasingCase::initPrograms(SourceCollections &sourceCollections) const
872 {
873     const char *const versionDecl                   = glu::getGLSLVersionDeclaration(m_glslVersion);
874     const PlanarFormatDescription formatDescription = getPlanarFormatDescription(m_format);
875     const std::string imageTypeStr                  = getShaderImageType(formatDescription, m_imageType);
876     const std::string formatQualifierStr            = getShaderImageFormatQualifier(m_format);
877     const std::string formatDataStr                 = getShaderImageDataType(formatDescription);
878     const uint32_t maxWorkGroupInvocations          = 128u;
879     const tcu::UVec3 maxWorkGroupSize               = tcu::UVec3(128u, 128u, 64u);
880     VkExtent3D layerExtent                          = makeExtent3D(getLayerSize(m_imageType, m_imageSize));
881     VkImageFormatProperties imageFormatProperties;
882     imageFormatProperties.maxMipLevels = 20;
883     const uint32_t mipLevels = getMipmapCount(m_format, formatDescription, imageFormatProperties, layerExtent);
884 
885     std::ostringstream formatValueStr;
886     switch (formatDescription.channels[0].type)
887     {
888     case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
889     case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
890         formatValueStr << "( index % " << MODULO_DIVISOR << ", index % " << MODULO_DIVISOR << ", index % "
891                        << MODULO_DIVISOR << ", 1)";
892         break;
893     case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
894     case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
895     case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
896         formatValueStr << "( float( index % " << MODULO_DIVISOR << ") / " << MODULO_DIVISOR << ".0, float( index % "
897                        << MODULO_DIVISOR << ") / " << MODULO_DIVISOR << ".0, float( index % " << MODULO_DIVISOR
898                        << ") / " << MODULO_DIVISOR << ".0, 1.0)";
899         break;
900     default:
901         DE_FATAL("Unexpected channel type");
902         break;
903     }
904 
905     for (uint32_t mipLevelNdx = 0; mipLevelNdx < mipLevels; ++mipLevelNdx)
906     {
907         // Create compute program
908         const tcu::UVec3 gridSize     = getShaderGridSize(m_imageType, m_imageSize, mipLevelNdx);
909         const uint32_t xWorkGroupSize = std::min(std::min(gridSize.x(), maxWorkGroupSize.x()), maxWorkGroupInvocations);
910         const uint32_t yWorkGroupSize =
911             std::min(std::min(gridSize.y(), maxWorkGroupSize.y()), maxWorkGroupInvocations / xWorkGroupSize);
912         const uint32_t zWorkGroupSize = std::min(std::min(gridSize.z(), maxWorkGroupSize.z()),
913                                                  maxWorkGroupInvocations / (xWorkGroupSize * yWorkGroupSize));
914 
915         std::ostringstream src;
916 
917         src << versionDecl << "\n";
918         if (formatIsR64(m_format))
919         {
920             src << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
921                 << "#extension GL_EXT_shader_image_int64 : require\n";
922         }
923         src << "layout (local_size_x = " << xWorkGroupSize << ", local_size_y = " << yWorkGroupSize
924             << ", local_size_z = " << zWorkGroupSize << ") in; \n"
925             << "layout (binding = 0, " << formatQualifierStr << ") writeonly uniform highp " << imageTypeStr
926             << " u_image;\n"
927             << "void main (void)\n"
928             << "{\n"
929             << "    if( gl_GlobalInvocationID.x < " << gridSize.x() << " ) \n"
930             << "    if( gl_GlobalInvocationID.y < " << gridSize.y() << " ) \n"
931             << "    if( gl_GlobalInvocationID.z < " << gridSize.z() << " ) \n"
932             << "    {\n"
933             << "        int index = int( gl_GlobalInvocationID.x + " << gridSize.x() << " * gl_GlobalInvocationID.y + "
934             << gridSize.x() << " * " << gridSize.y() << " * gl_GlobalInvocationID.z );\n"
935             << "        imageStore(u_image, "
936             << getCoordStr(m_imageType, "gl_GlobalInvocationID.x", "gl_GlobalInvocationID.y", "gl_GlobalInvocationID.z")
937             << "," << formatDataStr << formatValueStr.str() << "); \n"
938             << "    }\n"
939             << "}\n";
940 
941         std::ostringstream name;
942         name << "comp" << mipLevelNdx;
943         sourceCollections.glslSources.add(name.str()) << glu::ComputeSource(src.str());
944     }
945 }
946 
createInstance(Context & context) const947 TestInstance *ImageSparseMemoryAliasingCase::createInstance(Context &context) const
948 {
949     return new ImageSparseMemoryAliasingInstance(context, m_imageType, m_imageSize, m_format, m_useDeviceGroups);
950 }
951 
952 } // namespace
953 
createImageSparseMemoryAliasingTestsCommon(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> testGroup,const bool useDeviceGroup=false)954 tcu::TestCaseGroup *createImageSparseMemoryAliasingTestsCommon(tcu::TestContext &testCtx,
955                                                                de::MovePtr<tcu::TestCaseGroup> testGroup,
956                                                                const bool useDeviceGroup = false)
957 {
958 
959     const std::vector<TestImageParameters> imageParameters{
960         {IMAGE_TYPE_2D,
961          {tcu::UVec3(512u, 256u, 1u), tcu::UVec3(128u, 128u, 1u), tcu::UVec3(503u, 137u, 1u), tcu::UVec3(11u, 37u, 1u)},
962          getTestFormats(IMAGE_TYPE_2D)},
963         {IMAGE_TYPE_2D_ARRAY,
964          {tcu::UVec3(512u, 256u, 6u), tcu::UVec3(128u, 128u, 8u), tcu::UVec3(503u, 137u, 3u), tcu::UVec3(11u, 37u, 3u)},
965          getTestFormats(IMAGE_TYPE_2D_ARRAY)},
966         {IMAGE_TYPE_CUBE,
967          {tcu::UVec3(256u, 256u, 1u), tcu::UVec3(128u, 128u, 1u), tcu::UVec3(137u, 137u, 1u), tcu::UVec3(11u, 11u, 1u)},
968          getTestFormats(IMAGE_TYPE_CUBE)},
969         {IMAGE_TYPE_CUBE_ARRAY,
970          {tcu::UVec3(256u, 256u, 6u), tcu::UVec3(128u, 128u, 8u), tcu::UVec3(137u, 137u, 3u), tcu::UVec3(11u, 11u, 3u)},
971          getTestFormats(IMAGE_TYPE_CUBE_ARRAY)},
972         {IMAGE_TYPE_3D,
973          {tcu::UVec3(256u, 256u, 16u), tcu::UVec3(128u, 128u, 8u), tcu::UVec3(503u, 137u, 3u),
974           tcu::UVec3(11u, 37u, 3u)},
975          getTestFormats(IMAGE_TYPE_3D)}};
976 
977     for (size_t imageTypeNdx = 0; imageTypeNdx < imageParameters.size(); ++imageTypeNdx)
978     {
979         const ImageType imageType = imageParameters[imageTypeNdx].imageType;
980         de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(
981             new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str()));
982 
983         for (size_t formatNdx = 0; formatNdx < imageParameters[imageTypeNdx].formats.size(); ++formatNdx)
984         {
985             VkFormat format               = imageParameters[imageTypeNdx].formats[formatNdx].format;
986             tcu::UVec3 imageSizeAlignment = getImageSizeAlignment(format);
987             de::MovePtr<tcu::TestCaseGroup> formatGroup(
988                 new tcu::TestCaseGroup(testCtx, getImageFormatID(format).c_str()));
989 
990             for (size_t imageSizeNdx = 0; imageSizeNdx < imageParameters[imageTypeNdx].imageSizes.size();
991                  ++imageSizeNdx)
992             {
993                 const tcu::UVec3 imageSize = imageParameters[imageTypeNdx].imageSizes[imageSizeNdx];
994 
995                 // skip test for images with odd sizes for some YCbCr formats
996                 if ((imageSize.x() % imageSizeAlignment.x()) != 0)
997                     continue;
998                 if ((imageSize.y() % imageSizeAlignment.y()) != 0)
999                     continue;
1000 
1001                 std::ostringstream stream;
1002                 stream << imageSize.x() << "_" << imageSize.y() << "_" << imageSize.z();
1003 
1004                 formatGroup->addChild(new ImageSparseMemoryAliasingCase(testCtx, stream.str(), imageType, imageSize,
1005                                                                         format, glu::GLSL_VERSION_440, useDeviceGroup));
1006             }
1007             imageTypeGroup->addChild(formatGroup.release());
1008         }
1009         testGroup->addChild(imageTypeGroup.release());
1010     }
1011 
1012     return testGroup.release();
1013 }
1014 
createImageSparseMemoryAliasingTests(tcu::TestContext & testCtx)1015 tcu::TestCaseGroup *createImageSparseMemoryAliasingTests(tcu::TestContext &testCtx)
1016 {
1017     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_sparse_memory_aliasing"));
1018     return createImageSparseMemoryAliasingTestsCommon(testCtx, testGroup);
1019 }
1020 
createDeviceGroupImageSparseMemoryAliasingTests(tcu::TestContext & testCtx)1021 tcu::TestCaseGroup *createDeviceGroupImageSparseMemoryAliasingTests(tcu::TestContext &testCtx)
1022 {
1023     de::MovePtr<tcu::TestCaseGroup> testGroup(
1024         new tcu::TestCaseGroup(testCtx, "device_group_image_sparse_memory_aliasing"));
1025     return createImageSparseMemoryAliasingTestsCommon(testCtx, testGroup, true);
1026 }
1027 
1028 } // namespace sparse
1029 } // namespace vkt
1030