1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktSparseResourcesImageSparseResidency.cpp
21  * \brief Sparse partially resident images tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSparseResourcesBufferSparseBinding.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkMemUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vkBuilderUtil.hpp"
37 #include "vkImageUtil.hpp"
38 #include "vkQueryUtil.hpp"
39 #include "vkTypeUtil.hpp"
40 #include "vkCmdUtil.hpp"
41 #include "vkObjUtil.hpp"
42 #include "tcuTestLog.hpp"
43 
44 #include "deMath.h"
45 #include "deUniquePtr.hpp"
46 #include "deStringUtil.hpp"
47 
48 #include "tcuTextureUtil.hpp"
49 #include "tcuTexVerifierUtil.hpp"
50 
51 #include <string>
52 #include <vector>
53 #include <sstream>
54 
55 using namespace vk;
56 
57 namespace vkt
58 {
59 namespace sparse
60 {
61 namespace
62 {
63 
getFormatValueString(const std::vector<std::pair<uint32_t,uint32_t>> & channelsOnPlane,const std::vector<std::string> & formatValueStrings)64 std::string getFormatValueString(const std::vector<std::pair<uint32_t, uint32_t>> &channelsOnPlane,
65                                  const std::vector<std::string> &formatValueStrings)
66 {
67     std::vector<std::string> usedValues{"0", "0", "0", "0"}; // Default values.
68 
69     for (const auto &channel : channelsOnPlane)
70     {
71         const auto channelIdx  = channel.first;
72         usedValues[channelIdx] = formatValueStrings[channelIdx];
73     }
74 
75     std::string result;
76     for (const auto &value : usedValues)
77     {
78         const auto prefix = (result.empty() ? "" : ", ");
79         result += prefix + value;
80     }
81     result = "(" + result + ")";
82     return result;
83 }
84 
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)85 const std::string getCoordStr(const ImageType imageType, const std::string &x, const std::string &y,
86                               const std::string &z)
87 {
88     switch (imageType)
89     {
90     case IMAGE_TYPE_1D:
91     case IMAGE_TYPE_BUFFER:
92         return x;
93 
94     case IMAGE_TYPE_1D_ARRAY:
95     case IMAGE_TYPE_2D:
96         return "ivec2(" + x + "," + y + ")";
97 
98     case IMAGE_TYPE_2D_ARRAY:
99     case IMAGE_TYPE_3D:
100     case IMAGE_TYPE_CUBE:
101     case IMAGE_TYPE_CUBE_ARRAY:
102         return "ivec3(" + x + "," + y + "," + z + ")";
103 
104     default:
105         DE_ASSERT(false);
106         return "";
107     }
108 }
109 
computeWorkGroupSize(const VkExtent3D & planeExtent)110 tcu::UVec3 computeWorkGroupSize(const VkExtent3D &planeExtent)
111 {
112     const uint32_t maxComputeWorkGroupInvocations = 128u;
113     const tcu::UVec3 maxComputeWorkGroupSize      = tcu::UVec3(128u, 128u, 64u);
114 
115     const uint32_t xWorkGroupSize =
116         std::min(std::min(planeExtent.width, maxComputeWorkGroupSize.x()), maxComputeWorkGroupInvocations);
117     const uint32_t yWorkGroupSize = std::min(std::min(planeExtent.height, maxComputeWorkGroupSize.y()),
118                                              maxComputeWorkGroupInvocations / xWorkGroupSize);
119     const uint32_t zWorkGroupSize = std::min(std::min(planeExtent.depth, maxComputeWorkGroupSize.z()),
120                                              maxComputeWorkGroupInvocations / (xWorkGroupSize * yWorkGroupSize));
121 
122     return tcu::UVec3(xWorkGroupSize, yWorkGroupSize, zWorkGroupSize);
123 }
124 
125 class ImageSparseResidencyCase : public TestCase
126 {
127 public:
128     ImageSparseResidencyCase(tcu::TestContext &testCtx, const std::string &name, const ImageType imageType,
129                              const tcu::UVec3 &imageSize, const VkFormat format, const glu::GLSLVersion glslVersion,
130                              const bool useDeviceGroups);
131 
132     void initPrograms(SourceCollections &sourceCollections) const;
133     virtual void checkSupport(Context &context) const;
134     TestInstance *createInstance(Context &context) const;
135 
136 private:
137     const bool m_useDeviceGroups;
138     const ImageType m_imageType;
139     const tcu::UVec3 m_imageSize;
140     const VkFormat m_format;
141     const glu::GLSLVersion m_glslVersion;
142 };
143 
ImageSparseResidencyCase(tcu::TestContext & testCtx,const std::string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const VkFormat format,const glu::GLSLVersion glslVersion,const bool useDeviceGroups)144 ImageSparseResidencyCase::ImageSparseResidencyCase(tcu::TestContext &testCtx, const std::string &name,
145                                                    const ImageType imageType, const tcu::UVec3 &imageSize,
146                                                    const VkFormat format, const glu::GLSLVersion glslVersion,
147                                                    const bool useDeviceGroups)
148     : TestCase(testCtx, name)
149     , m_useDeviceGroups(useDeviceGroups)
150     , m_imageType(imageType)
151     , m_imageSize(imageSize)
152     , m_format(format)
153     , m_glslVersion(glslVersion)
154 {
155 }
156 
initPrograms(SourceCollections & sourceCollections) const157 void ImageSparseResidencyCase::initPrograms(SourceCollections &sourceCollections) const
158 {
159     // Create compute program
160     const char *const versionDecl                   = glu::getGLSLVersionDeclaration(m_glslVersion);
161     const PlanarFormatDescription formatDescription = getPlanarFormatDescription(m_format);
162     const std::string imageTypeStr                  = getShaderImageType(formatDescription, m_imageType);
163     const std::string formatDataStr                 = getShaderImageDataType(formatDescription);
164     const tcu::UVec3 shaderGridSize                 = getShaderGridSize(m_imageType, m_imageSize);
165     const auto isAlphaOnly                          = isAlphaOnlyFormat(m_format);
166 
167     std::vector<std::string> formatValueStrings;
168     switch (formatDescription.channels[isAlphaOnly ? 3 : 0].type)
169     {
170     case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
171     case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
172         formatValueStrings = {"int(gl_GlobalInvocationID.x) % 127", "int(gl_GlobalInvocationID.y) % 127",
173                               "int(gl_GlobalInvocationID.z) % 127", "1"};
174         break;
175     case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
176     case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
177     case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
178         // For A8_UNORM, exchange the red and alpha channels.
179         formatValueStrings = {
180             (isAlphaOnly ? "1.0" : "float(int(gl_GlobalInvocationID.x) % 127) / 127.0"),
181             "float(int(gl_GlobalInvocationID.y) % 127) / 127.0",
182             "float(int(gl_GlobalInvocationID.z) % 127) / 127.0",
183             (isAlphaOnly ? "float(int(gl_GlobalInvocationID.x) % 127) / 127.0" : "1.0"),
184         };
185         break;
186     default:
187         DE_ASSERT(false);
188         break;
189     }
190 
191     for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
192     {
193         VkFormat planeCompatibleFormat = getPlaneCompatibleFormatForWriting(formatDescription, planeNdx);
194         vk::PlanarFormatDescription compatibleFormatDescription =
195             (planeCompatibleFormat != getPlaneCompatibleFormat(formatDescription, planeNdx)) ?
196                 getPlanarFormatDescription(planeCompatibleFormat) :
197                 formatDescription;
198         VkExtent3D compatibleShaderGridSize{shaderGridSize.x() / formatDescription.blockWidth,
199                                             shaderGridSize.y() / formatDescription.blockHeight,
200                                             shaderGridSize.z() / 1u};
201 
202         std::vector<std::pair<uint32_t, uint32_t>> channelsOnPlane;
203         for (uint32_t channelNdx = 0; channelNdx < 4; ++channelNdx)
204         {
205             if (!formatDescription.hasChannelNdx(channelNdx))
206                 continue;
207             if (formatDescription.channels[channelNdx].planeNdx != planeNdx)
208                 continue;
209             channelsOnPlane.push_back({channelNdx, formatDescription.channels[channelNdx].offsetBits});
210         }
211         // reorder channels for multi-planar images
212         if (formatDescription.numPlanes > 1)
213             std::sort(begin(channelsOnPlane), end(channelsOnPlane),
214                       [](const std::pair<uint32_t, uint32_t> &lhs, const std::pair<uint32_t, uint32_t> &rhs)
215                       { return lhs.second < rhs.second; });
216         std::string formatValueStr = getFormatValueString(channelsOnPlane, formatValueStrings);
217         VkExtent3D shaderExtent    = getPlaneExtent(compatibleFormatDescription, compatibleShaderGridSize, planeNdx, 0);
218         const std::string formatQualifierStr =
219             (isAlphaOnly ? "" : ", " + getShaderImageFormatQualifier(planeCompatibleFormat));
220         const tcu::UVec3 workGroupSize = computeWorkGroupSize(shaderExtent);
221 
222         std::ostringstream src;
223         src << versionDecl << "\n";
224         if (formatIsR64(m_format))
225         {
226             src << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
227                 << "#extension GL_EXT_shader_image_int64 : require\n";
228         }
229         if (isAlphaOnly)
230         {
231             src << "#extension GL_EXT_shader_image_load_formatted : require\n";
232         }
233         src << "layout (local_size_x = " << workGroupSize.x() << ", local_size_y = " << workGroupSize.y()
234             << ", local_size_z = " << workGroupSize.z() << ") in; \n"
235             << "layout (binding = 0" << formatQualifierStr << ") writeonly uniform highp " << imageTypeStr
236             << " u_image;\n"
237             << "void main (void)\n"
238             << "{\n"
239             << "    if( gl_GlobalInvocationID.x < " << shaderExtent.width << " ) \n"
240             << "    if( gl_GlobalInvocationID.y < " << shaderExtent.height << " ) \n"
241             << "    if( gl_GlobalInvocationID.z < " << shaderExtent.depth << " ) \n"
242             << "    {\n"
243             << "        imageStore(u_image, "
244             << getCoordStr(m_imageType, "gl_GlobalInvocationID.x", "gl_GlobalInvocationID.y", "gl_GlobalInvocationID.z")
245             << "," << formatDataStr << formatValueStr << ");\n"
246             << "    }\n"
247             << "}\n";
248         std::ostringstream shaderName;
249         shaderName << "comp" << planeNdx;
250         sourceCollections.glslSources.add(shaderName.str())
251             << glu::ComputeSource(src.str())
252             << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3,
253                                       vk::ShaderBuildOptions::FLAG_ALLOW_SCALAR_OFFSETS);
254     }
255 }
256 
checkSupport(Context & context) const257 void ImageSparseResidencyCase::checkSupport(Context &context) const
258 {
259     const InstanceInterface &instance     = context.getInstanceInterface();
260     const VkPhysicalDevice physicalDevice = context.getPhysicalDevice();
261 
262 #ifndef CTS_USES_VULKANSC
263     if (m_format == VK_FORMAT_A8_UNORM_KHR)
264     {
265         context.requireDeviceFunctionality("VK_KHR_maintenance5");
266         const auto properties = context.getFormatProperties(m_format);
267         if ((properties.optimalTilingFeatures & VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT_KHR) == 0u)
268             TCU_THROW(NotSupportedError, "Format does not support writes without format");
269     }
270 #endif // CTS_USES_VULKANSC
271 
272     // Check if image size does not exceed device limits
273     if (!isImageSizeSupported(instance, physicalDevice, m_imageType, m_imageSize))
274         TCU_THROW(NotSupportedError, "Image size not supported for device");
275 
276     // Check if device supports sparse operations for image type
277     if (!checkSparseSupportForImageType(instance, physicalDevice, m_imageType))
278         TCU_THROW(NotSupportedError, "Sparse residency for image type is not supported");
279 
280     //Check if image format supports storage images
281     const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(instance, physicalDevice, m_format);
282     if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) == 0)
283         TCU_THROW(NotSupportedError, "Storage images are not supported for this format");
284 
285     if (formatIsR64(m_format))
286     {
287         context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
288 
289         if (context.getShaderImageAtomicInt64FeaturesEXT().shaderImageInt64Atomics == VK_FALSE)
290         {
291             TCU_THROW(NotSupportedError, "shaderImageInt64Atomics is not supported");
292         }
293 
294         if (context.getShaderImageAtomicInt64FeaturesEXT().sparseImageInt64Atomics == VK_FALSE)
295         {
296             TCU_THROW(NotSupportedError, "sparseImageInt64Atomics is not supported for device");
297         }
298     }
299 }
300 
301 class ImageSparseResidencyInstance : public SparseResourcesBaseInstance
302 {
303 public:
304     ImageSparseResidencyInstance(Context &context, const ImageType imageType, const tcu::UVec3 &imageSize,
305                                  const VkFormat format, const bool useDeviceGroups);
306 
307     tcu::TestStatus iterate(void);
308 
309 private:
310     const bool m_useDeviceGroups;
311     const ImageType m_imageType;
312     const tcu::UVec3 m_imageSize;
313     const VkFormat m_format;
314 };
315 
ImageSparseResidencyInstance(Context & context,const ImageType imageType,const tcu::UVec3 & imageSize,const VkFormat format,const bool useDeviceGroups)316 ImageSparseResidencyInstance::ImageSparseResidencyInstance(Context &context, const ImageType imageType,
317                                                            const tcu::UVec3 &imageSize, const VkFormat format,
318                                                            const bool useDeviceGroups)
319     : SparseResourcesBaseInstance(context, useDeviceGroups)
320     , m_useDeviceGroups(useDeviceGroups)
321     , m_imageType(imageType)
322     , m_imageSize(imageSize)
323     , m_format(format)
324 {
325 }
326 
iterate(void)327 tcu::TestStatus ImageSparseResidencyInstance::iterate(void)
328 {
329     const auto isAlphaOnly            = isAlphaOnlyFormat(m_format);
330     const float epsilon               = 1e-5f;
331     const InstanceInterface &instance = m_context.getInstanceInterface();
332 
333     {
334         // Create logical device supporting both sparse and compute queues
335         QueueRequirementsVec queueRequirements;
336         queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
337         queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
338 
339         createDeviceSupportingQueues(queueRequirements, formatIsR64(m_format), isAlphaOnly);
340     }
341 
342     VkImageCreateInfo imageCreateInfo;
343     std::vector<DeviceMemorySp> deviceMemUniquePtrVec;
344 
345     const DeviceInterface &deviceInterface          = getDeviceInterface();
346     const Queue &sparseQueue                        = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
347     const Queue &computeQueue                       = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
348     const PlanarFormatDescription formatDescription = getPlanarFormatDescription(m_format);
349 
350     // Go through all physical devices
351     for (uint32_t physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
352     {
353         const uint32_t firstDeviceID  = physDevID;
354         const uint32_t secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
355 
356         const VkPhysicalDevice physicalDevice = getPhysicalDevice(firstDeviceID);
357         const VkPhysicalDeviceProperties physicalDeviceProperties =
358             getPhysicalDeviceProperties(instance, physicalDevice);
359 
360         imageCreateInfo.sType         = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
361         imageCreateInfo.pNext         = DE_NULL;
362         imageCreateInfo.flags         = VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
363         imageCreateInfo.imageType     = mapImageType(m_imageType);
364         imageCreateInfo.format        = m_format;
365         imageCreateInfo.extent        = makeExtent3D(getLayerSize(m_imageType, m_imageSize));
366         imageCreateInfo.mipLevels     = 1u;
367         imageCreateInfo.arrayLayers   = getNumLayers(m_imageType, m_imageSize);
368         imageCreateInfo.samples       = VK_SAMPLE_COUNT_1_BIT;
369         imageCreateInfo.tiling        = VK_IMAGE_TILING_OPTIMAL;
370         imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
371         imageCreateInfo.usage         = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
372         imageCreateInfo.sharingMode   = VK_SHARING_MODE_EXCLUSIVE;
373         imageCreateInfo.queueFamilyIndexCount = 0u;
374         imageCreateInfo.pQueueFamilyIndices   = DE_NULL;
375 
376         if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
377         {
378             imageCreateInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
379         }
380 
381         // check if we need to create VkImageView with different VkFormat than VkImage format
382         VkFormat planeCompatibleFormat0 = getPlaneCompatibleFormatForWriting(formatDescription, 0);
383         if (planeCompatibleFormat0 != getPlaneCompatibleFormat(formatDescription, 0))
384         {
385             imageCreateInfo.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
386         }
387 
388         // Check if device supports sparse operations for image format
389         if (!checkSparseSupportForImageFormat(instance, physicalDevice, imageCreateInfo))
390             TCU_THROW(NotSupportedError, "The image format does not support sparse operations");
391 
392         // Create sparse image
393         const Unique<VkImage> imageSparse(createImage(deviceInterface, getDevice(), &imageCreateInfo));
394 
395         // Create sparse image memory bind semaphore
396         const Unique<VkSemaphore> imageMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
397 
398         std::vector<VkSparseImageMemoryRequirements> sparseMemoryRequirements;
399 
400         {
401             // Get image general memory requirements
402             const VkMemoryRequirements imageMemoryRequirements =
403                 getImageMemoryRequirements(deviceInterface, getDevice(), *imageSparse);
404 
405             if (imageMemoryRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
406                 TCU_THROW(NotSupportedError, "Required memory size for sparse resource exceeds device limits");
407 
408             DE_ASSERT((imageMemoryRequirements.size % imageMemoryRequirements.alignment) == 0);
409 
410             const uint32_t memoryType = findMatchingMemoryType(instance, getPhysicalDevice(secondDeviceID),
411                                                                imageMemoryRequirements, MemoryRequirement::Any);
412 
413             if (memoryType == NO_MATCH_FOUND)
414                 return tcu::TestStatus::fail("No matching memory type found");
415 
416             if (firstDeviceID != secondDeviceID)
417             {
418                 VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
419                 const uint32_t heapIndex =
420                     getHeapIndexForMemoryType(instance, getPhysicalDevice(secondDeviceID), memoryType);
421                 deviceInterface.getDeviceGroupPeerMemoryFeatures(getDevice(), heapIndex, firstDeviceID, secondDeviceID,
422                                                                  &peerMemoryFeatureFlags);
423 
424                 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT) == 0) ||
425                     ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT) == 0))
426                 {
427                     TCU_THROW(NotSupportedError, "Peer memory does not support COPY_SRC and GENERIC_DST");
428                 }
429             }
430 
431             // Get sparse image sparse memory requirements
432             sparseMemoryRequirements = getImageSparseMemoryRequirements(deviceInterface, getDevice(), *imageSparse);
433             DE_ASSERT(sparseMemoryRequirements.size() != 0);
434 
435             const uint32_t metadataAspectIndex =
436                 getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_METADATA_BIT);
437 
438             std::vector<VkSparseImageMemoryBind> imageResidencyMemoryBinds;
439             std::vector<VkSparseMemoryBind> imageMipTailMemoryBinds;
440 
441             // Bind device memory for each aspect
442             for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
443             {
444                 const VkImageAspectFlags aspect =
445                     (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
446                 const uint32_t aspectIndex = getSparseAspectRequirementsIndex(sparseMemoryRequirements, aspect);
447 
448                 if (aspectIndex == NO_MATCH_FOUND)
449                     TCU_THROW(NotSupportedError, "Not supported image aspect");
450 
451                 VkSparseImageMemoryRequirements aspectRequirements = sparseMemoryRequirements[aspectIndex];
452                 VkExtent3D imageGranularity = aspectRequirements.formatProperties.imageGranularity;
453 
454                 for (uint32_t layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
455                 {
456                     for (uint32_t mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
457                     {
458                         const VkImageSubresource subresource = {aspect, mipLevelNdx, layerNdx};
459                         const VkExtent3D planeExtent =
460                             getPlaneExtent(formatDescription, imageCreateInfo.extent, planeNdx, mipLevelNdx);
461                         const tcu::UVec3 numSparseBinds  = alignedDivide(planeExtent, imageGranularity);
462                         const tcu::UVec3 lastBlockExtent = tcu::UVec3(
463                             planeExtent.width % imageGranularity.width ? planeExtent.width % imageGranularity.width :
464                                                                          imageGranularity.width,
465                             planeExtent.height % imageGranularity.height ?
466                                 planeExtent.height % imageGranularity.height :
467                                 imageGranularity.height,
468                             planeExtent.depth % imageGranularity.depth ? planeExtent.depth % imageGranularity.depth :
469                                                                          imageGranularity.depth);
470 
471                         for (uint32_t z = 0; z < numSparseBinds.z(); ++z)
472                             for (uint32_t y = 0; y < numSparseBinds.y(); ++y)
473                                 for (uint32_t x = 0; x < numSparseBinds.x(); ++x)
474                                 {
475                                     const uint32_t linearIndex =
476                                         x + y * numSparseBinds.x() + z * numSparseBinds.x() * numSparseBinds.y() +
477                                         layerNdx * numSparseBinds.x() * numSparseBinds.y() * numSparseBinds.z();
478 
479                                     if (linearIndex % 2u == 0u)
480                                     {
481                                         VkOffset3D offset;
482                                         offset.x = x * imageGranularity.width;
483                                         offset.y = y * imageGranularity.height;
484                                         offset.z = z * imageGranularity.depth;
485 
486                                         VkExtent3D extent;
487                                         extent.width  = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() :
488                                                                                         imageGranularity.width;
489                                         extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() :
490                                                                                         imageGranularity.height;
491                                         extent.depth  = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() :
492                                                                                         imageGranularity.depth;
493 
494                                         const VkSparseImageMemoryBind imageMemoryBind = makeSparseImageMemoryBind(
495                                             deviceInterface, getDevice(), imageMemoryRequirements.alignment, memoryType,
496                                             subresource, offset, extent);
497 
498                                         deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(
499                                             check<VkDeviceMemory>(imageMemoryBind.memory),
500                                             Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
501 
502                                         imageResidencyMemoryBinds.push_back(imageMemoryBind);
503                                     }
504                                 }
505                     }
506 
507                     if (!(aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) &&
508                         aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
509                     {
510                         const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(
511                             deviceInterface, getDevice(), aspectRequirements.imageMipTailSize, memoryType,
512                             aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
513 
514                         deviceMemUniquePtrVec.push_back(makeVkSharedPtr(
515                             Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory),
516                                                  Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
517 
518                         imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
519                     }
520 
521                     // Metadata
522                     if (metadataAspectIndex != NO_MATCH_FOUND)
523                     {
524                         const VkSparseImageMemoryRequirements metadataAspectRequirements =
525                             sparseMemoryRequirements[metadataAspectIndex];
526 
527                         if (!(metadataAspectRequirements.formatProperties.flags &
528                               VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
529                         {
530                             const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(
531                                 deviceInterface, getDevice(), metadataAspectRequirements.imageMipTailSize, memoryType,
532                                 metadataAspectRequirements.imageMipTailOffset +
533                                     layerNdx * metadataAspectRequirements.imageMipTailStride,
534                                 VK_SPARSE_MEMORY_BIND_METADATA_BIT);
535 
536                             deviceMemUniquePtrVec.push_back(makeVkSharedPtr(
537                                 Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory),
538                                                      Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
539 
540                             imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
541                         }
542                     }
543                 }
544 
545                 if ((aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) &&
546                     aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
547                 {
548                     const VkSparseMemoryBind imageMipTailMemoryBind =
549                         makeSparseMemoryBind(deviceInterface, getDevice(), aspectRequirements.imageMipTailSize,
550                                              memoryType, aspectRequirements.imageMipTailOffset);
551 
552                     deviceMemUniquePtrVec.push_back(makeVkSharedPtr(
553                         Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory),
554                                              Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
555 
556                     imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
557                 }
558             }
559 
560             // Metadata
561             if (metadataAspectIndex != NO_MATCH_FOUND)
562             {
563                 const VkSparseImageMemoryRequirements metadataAspectRequirements =
564                     sparseMemoryRequirements[metadataAspectIndex];
565 
566                 if ((metadataAspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
567                 {
568                     const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(
569                         deviceInterface, getDevice(), metadataAspectRequirements.imageMipTailSize, memoryType,
570                         metadataAspectRequirements.imageMipTailOffset, VK_SPARSE_MEMORY_BIND_METADATA_BIT);
571 
572                     deviceMemUniquePtrVec.push_back(makeVkSharedPtr(
573                         Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory),
574                                              Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
575 
576                     imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
577                 }
578             }
579 
580             const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo = {
581                 VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO, //VkStructureType sType;
582                 DE_NULL,                                         //const void* pNext;
583                 firstDeviceID,                                   //uint32_t resourceDeviceIndex;
584                 secondDeviceID,                                  //uint32_t memoryDeviceIndex;
585             };
586 
587             VkBindSparseInfo bindSparseInfo = {
588                 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,                    //VkStructureType sType;
589                 m_useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL, //const void* pNext;
590                 0u,                                                    //uint32_t waitSemaphoreCount;
591                 DE_NULL,                                               //const VkSemaphore* pWaitSemaphores;
592                 0u,                                                    //uint32_t bufferBindCount;
593                 DE_NULL,                        //const VkSparseBufferMemoryBindInfo* pBufferBinds;
594                 0u,                             //uint32_t imageOpaqueBindCount;
595                 DE_NULL,                        //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
596                 0u,                             //uint32_t imageBindCount;
597                 DE_NULL,                        //const VkSparseImageMemoryBindInfo* pImageBinds;
598                 1u,                             //uint32_t signalSemaphoreCount;
599                 &imageMemoryBindSemaphore.get() //const VkSemaphore* pSignalSemaphores;
600             };
601 
602             VkSparseImageMemoryBindInfo imageResidencyBindInfo;
603             VkSparseImageOpaqueMemoryBindInfo imageMipTailBindInfo;
604 
605             if (imageResidencyMemoryBinds.size() > 0)
606             {
607                 imageResidencyBindInfo.image     = *imageSparse;
608                 imageResidencyBindInfo.bindCount = static_cast<uint32_t>(imageResidencyMemoryBinds.size());
609                 imageResidencyBindInfo.pBinds    = imageResidencyMemoryBinds.data();
610 
611                 bindSparseInfo.imageBindCount = 1u;
612                 bindSparseInfo.pImageBinds    = &imageResidencyBindInfo;
613             }
614 
615             if (imageMipTailMemoryBinds.size() > 0)
616             {
617                 imageMipTailBindInfo.image     = *imageSparse;
618                 imageMipTailBindInfo.bindCount = static_cast<uint32_t>(imageMipTailMemoryBinds.size());
619                 imageMipTailBindInfo.pBinds    = imageMipTailMemoryBinds.data();
620 
621                 bindSparseInfo.imageOpaqueBindCount = 1u;
622                 bindSparseInfo.pImageOpaqueBinds    = &imageMipTailBindInfo;
623             }
624 
625             // Submit sparse bind commands for execution
626             VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
627         }
628 
629         // Create command buffer for compute and transfer operations
630         const Unique<VkCommandPool> commandPool(
631             makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
632         const Unique<VkCommandBuffer> commandBuffer(
633             allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
634 
635         // Start recording commands
636         beginCommandBuffer(deviceInterface, *commandBuffer);
637 
638         // Create descriptor set layout
639         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
640             DescriptorSetLayoutBuilder()
641                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
642                 .build(deviceInterface, getDevice()));
643 
644         // Create and bind descriptor set
645         const Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
646                                                           .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u)
647                                                           .build(deviceInterface, getDevice(),
648                                                                  VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
649                                                                  vk::PlanarFormatDescription::MAX_PLANES));
650 
651         const Unique<VkPipelineLayout> pipelineLayout(
652             makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
653         std::vector<de::SharedPtr<vk::Unique<vk::VkShaderModule>>> shaderModules;
654         std::vector<de::SharedPtr<vk::Unique<vk::VkPipeline>>> computePipelines;
655         std::vector<de::SharedPtr<vk::Unique<vk::VkDescriptorSet>>> descriptorSets;
656         std::vector<de::SharedPtr<vk::Unique<vk::VkImageView>>> imageViews;
657 
658         const tcu::UVec3 shaderGridSize = getShaderGridSize(m_imageType, m_imageSize);
659 
660         // Run compute shader for each image plane
661         for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
662         {
663             const VkImageAspectFlags aspect =
664                 (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
665             const VkImageSubresourceRange subresourceRange =
666                 makeImageSubresourceRange(aspect, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
667             VkFormat planeCompatibleFormat = getPlaneCompatibleFormatForWriting(formatDescription, planeNdx);
668             vk::PlanarFormatDescription compatibleFormatDescription =
669                 (planeCompatibleFormat != getPlaneCompatibleFormat(formatDescription, planeNdx)) ?
670                     getPlanarFormatDescription(planeCompatibleFormat) :
671                     formatDescription;
672             const tcu::UVec3 compatibleShaderGridSize(shaderGridSize.x() / formatDescription.blockWidth,
673                                                       shaderGridSize.y() / formatDescription.blockHeight,
674                                                       shaderGridSize.z() / 1u);
675             VkExtent3D shaderExtent = getPlaneExtent(
676                 compatibleFormatDescription,
677                 VkExtent3D{compatibleShaderGridSize.x(), compatibleShaderGridSize.y(), compatibleShaderGridSize.z()},
678                 planeNdx, 0u);
679 
680             // Create and bind compute pipeline
681             std::ostringstream shaderName;
682             shaderName << "comp" << planeNdx;
683             auto shaderModule = makeVkSharedPtr(createShaderModule(
684                 deviceInterface, getDevice(), m_context.getBinaryCollection().get(shaderName.str()), DE_NULL));
685             shaderModules.push_back(shaderModule);
686             auto computePipeline = makeVkSharedPtr(
687                 makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, shaderModule->get()));
688             computePipelines.push_back(computePipeline);
689             deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline->get());
690 
691             auto descriptorSet =
692                 makeVkSharedPtr(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
693             descriptorSets.push_back(descriptorSet);
694 
695             auto imageView =
696                 makeVkSharedPtr(makeImageView(deviceInterface, getDevice(), *imageSparse, mapImageViewType(m_imageType),
697                                               planeCompatibleFormat, subresourceRange));
698             imageViews.push_back(imageView);
699             const VkDescriptorImageInfo imageSparseInfo =
700                 makeDescriptorImageInfo(DE_NULL, imageView->get(), VK_IMAGE_LAYOUT_GENERAL);
701 
702             DescriptorSetUpdateBuilder()
703                 .writeSingle(descriptorSet->get(), DescriptorSetUpdateBuilder::Location::binding(0u),
704                              VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageSparseInfo)
705                 .update(deviceInterface, getDevice());
706 
707             deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u,
708                                                   1u, &descriptorSet->get(), 0u, DE_NULL);
709 
710             {
711                 const VkImageMemoryBarrier imageSparseLayoutChangeBarrier = makeImageMemoryBarrier(
712                     0u, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *imageSparse,
713                     subresourceRange,
714                     sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? sparseQueue.queueFamilyIndex :
715                                                                                     VK_QUEUE_FAMILY_IGNORED,
716                     sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? computeQueue.queueFamilyIndex :
717                                                                                     VK_QUEUE_FAMILY_IGNORED);
718 
719                 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
720                                                    VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL,
721                                                    1u, &imageSparseLayoutChangeBarrier);
722             }
723 
724             {
725                 const tcu::UVec3 workGroupSize = computeWorkGroupSize(shaderExtent);
726 
727                 const uint32_t xWorkGroupCount =
728                     shaderExtent.width / workGroupSize.x() + (shaderExtent.width % workGroupSize.x() ? 1u : 0u);
729                 const uint32_t yWorkGroupCount =
730                     shaderExtent.height / workGroupSize.y() + (shaderExtent.height % workGroupSize.y() ? 1u : 0u);
731                 const uint32_t zWorkGroupCount =
732                     shaderExtent.depth / workGroupSize.z() + (shaderExtent.depth % workGroupSize.z() ? 1u : 0u);
733 
734                 const tcu::UVec3 maxComputeWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
735 
736                 if (maxComputeWorkGroupCount.x() < xWorkGroupCount || maxComputeWorkGroupCount.y() < yWorkGroupCount ||
737                     maxComputeWorkGroupCount.z() < zWorkGroupCount)
738                 {
739                     TCU_THROW(NotSupportedError, "Image size is not supported");
740                 }
741 
742                 deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
743             }
744 
745             {
746                 const VkImageMemoryBarrier imageSparseTransferBarrier = makeImageMemoryBarrier(
747                     VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
748                     VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *imageSparse, subresourceRange);
749 
750                 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
751                                                    VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u,
752                                                    &imageSparseTransferBarrier);
753             }
754         }
755 
756         uint32_t imageSizeInBytes = 0;
757         uint32_t planeOffsets[PlanarFormatDescription::MAX_PLANES];
758         uint32_t planeRowPitches[PlanarFormatDescription::MAX_PLANES];
759 
760         for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
761         {
762             planeOffsets[planeNdx] = imageSizeInBytes;
763             const uint32_t planeW  = imageCreateInfo.extent.width /
764                                     (formatDescription.blockWidth * formatDescription.planes[planeNdx].widthDivisor);
765             planeRowPitches[planeNdx] = formatDescription.planes[planeNdx].elementSizeBytes * planeW;
766             imageSizeInBytes +=
767                 getImageMipLevelSizeInBytes(imageCreateInfo.extent, imageCreateInfo.arrayLayers, formatDescription,
768                                             planeNdx, 0, BUFFER_IMAGE_COPY_OFFSET_GRANULARITY);
769         }
770 
771         const VkBufferCreateInfo outputBufferCreateInfo =
772             makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
773         const Unique<VkBuffer> outputBuffer(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
774         const de::UniquePtr<Allocation> outputBufferAlloc(
775             bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
776         std::vector<VkBufferImageCopy> bufferImageCopy(formatDescription.numPlanes);
777 
778         for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
779         {
780             const VkImageAspectFlags aspect =
781                 (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
782 
783             bufferImageCopy[planeNdx] = {
784                 planeOffsets[planeNdx], // VkDeviceSize bufferOffset;
785                 0u,                     // uint32_t bufferRowLength;
786                 0u,                     // uint32_t bufferImageHeight;
787                 makeImageSubresourceLayers(aspect, 0u, 0u,
788                                            imageCreateInfo.arrayLayers), // VkImageSubresourceLayers imageSubresource;
789                 makeOffset3D(0, 0, 0),                                   // VkOffset3D imageOffset;
790                 vk::getPlaneExtent(formatDescription, imageCreateInfo.extent, planeNdx,
791                                    0) // VkExtent3D imageExtent;
792             };
793         }
794         deviceInterface.cmdCopyImageToBuffer(*commandBuffer, *imageSparse, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
795                                              *outputBuffer, static_cast<uint32_t>(bufferImageCopy.size()),
796                                              bufferImageCopy.data());
797 
798         {
799             const VkBufferMemoryBarrier outputBufferHostReadBarrier = makeBufferMemoryBarrier(
800                 VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0u, imageSizeInBytes);
801 
802             deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
803                                                VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u,
804                                                &outputBufferHostReadBarrier, 0u, DE_NULL);
805         }
806 
807         // End recording commands
808         endCommandBuffer(deviceInterface, *commandBuffer);
809 
810         // The stage at which execution is going to wait for finish of sparse binding operations
811         const VkPipelineStageFlags stageBits[] = {VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT};
812 
813         // Submit commands for execution and wait for completion
814         submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u,
815                               &imageMemoryBindSemaphore.get(), stageBits, 0, DE_NULL, m_useDeviceGroups, firstDeviceID);
816 
817         // Retrieve data from buffer to host memory
818         invalidateAlloc(deviceInterface, getDevice(), *outputBufferAlloc);
819         uint8_t *outputData = static_cast<uint8_t *>(outputBufferAlloc->getHostPtr());
820         void *planePointers[PlanarFormatDescription::MAX_PLANES];
821 
822         for (uint32_t planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
823             planePointers[planeNdx] = outputData + static_cast<size_t>(planeOffsets[planeNdx]);
824 
825         // Wait for sparse queue to become idle
826         //vsk fails:
827         deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
828 
829         // write result images to log file
830         for (uint32_t channelNdx = 0; channelNdx < 4; ++channelNdx)
831         {
832             if (!formatDescription.hasChannelNdx(channelNdx))
833                 continue;
834             uint32_t planeNdx                  = formatDescription.channels[channelNdx].planeNdx;
835             vk::VkFormat planeCompatibleFormat = getPlaneCompatibleFormatForWriting(formatDescription, planeNdx);
836             vk::PlanarFormatDescription compatibleFormatDescription =
837                 (planeCompatibleFormat != getPlaneCompatibleFormat(formatDescription, planeNdx)) ?
838                     getPlanarFormatDescription(planeCompatibleFormat) :
839                     formatDescription;
840             const tcu::UVec3 compatibleShaderGridSize(shaderGridSize.x() / formatDescription.blockWidth,
841                                                       shaderGridSize.y() / formatDescription.blockHeight,
842                                                       shaderGridSize.z() / 1u);
843             tcu::ConstPixelBufferAccess pixelBuffer =
844                 vk::getChannelAccess(compatibleFormatDescription, compatibleShaderGridSize, planeRowPitches,
845                                      (const void *const *)planePointers, channelNdx);
846             std::ostringstream str;
847             str << "image" << channelNdx;
848             m_context.getTestContext().getLog() << tcu::LogImage(str.str(), str.str(), pixelBuffer);
849         }
850 
851         // Validate results
852         for (uint32_t channelNdx = 0; channelNdx < 4; ++channelNdx)
853         {
854             if (!formatDescription.hasChannelNdx(channelNdx))
855                 continue;
856 
857             uint32_t planeNdx = formatDescription.channels[channelNdx].planeNdx;
858             const VkImageAspectFlags aspect =
859                 (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
860             const uint32_t aspectIndex = getSparseAspectRequirementsIndex(sparseMemoryRequirements, aspect);
861 
862             if (aspectIndex == NO_MATCH_FOUND)
863                 TCU_THROW(NotSupportedError, "Not supported image aspect");
864 
865             VkSparseImageMemoryRequirements aspectRequirements = sparseMemoryRequirements[aspectIndex];
866 
867             vk::VkFormat planeCompatibleFormat = getPlaneCompatibleFormatForWriting(formatDescription, planeNdx);
868             vk::PlanarFormatDescription compatibleFormatDescription =
869                 (planeCompatibleFormat != getPlaneCompatibleFormat(formatDescription, planeNdx)) ?
870                     getPlanarFormatDescription(planeCompatibleFormat) :
871                     formatDescription;
872             const tcu::UVec3 compatibleShaderGridSize(shaderGridSize.x() / formatDescription.blockWidth,
873                                                       shaderGridSize.y() / formatDescription.blockHeight,
874                                                       shaderGridSize.z() / 1u);
875             VkExtent3D compatibleImageSize{imageCreateInfo.extent.width / formatDescription.blockWidth,
876                                            imageCreateInfo.extent.height / formatDescription.blockHeight,
877                                            imageCreateInfo.extent.depth / 1u};
878             VkExtent3D compatibleImageGranularity{
879                 aspectRequirements.formatProperties.imageGranularity.width / formatDescription.blockWidth,
880                 aspectRequirements.formatProperties.imageGranularity.height / formatDescription.blockHeight,
881                 aspectRequirements.formatProperties.imageGranularity.depth / 1u};
882             tcu::ConstPixelBufferAccess pixelBuffer =
883                 vk::getChannelAccess(compatibleFormatDescription, compatibleShaderGridSize, planeRowPitches,
884                                      (const void *const *)planePointers, channelNdx);
885             VkExtent3D planeExtent  = getPlaneExtent(compatibleFormatDescription, compatibleImageSize, planeNdx, 0u);
886             tcu::IVec3 pixelDivider = pixelBuffer.getDivider();
887 
888             if (aspectRequirements.imageMipTailFirstLod > 0u)
889             {
890                 const tcu::UVec3 numSparseBinds = alignedDivide(planeExtent, compatibleImageGranularity);
891                 const tcu::UVec3 lastBlockExtent =
892                     tcu::UVec3(planeExtent.width % compatibleImageGranularity.width ?
893                                    planeExtent.width % compatibleImageGranularity.width :
894                                    compatibleImageGranularity.width,
895                                planeExtent.height % compatibleImageGranularity.height ?
896                                    planeExtent.height % compatibleImageGranularity.height :
897                                    compatibleImageGranularity.height,
898                                planeExtent.depth % compatibleImageGranularity.depth ?
899                                    planeExtent.depth % compatibleImageGranularity.depth :
900                                    compatibleImageGranularity.depth);
901 
902                 for (uint32_t layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
903                 {
904                     for (uint32_t z = 0; z < numSparseBinds.z(); ++z)
905                         for (uint32_t y = 0; y < numSparseBinds.y(); ++y)
906                             for (uint32_t x = 0; x < numSparseBinds.x(); ++x)
907                             {
908                                 VkExtent3D offset;
909                                 offset.width  = x * compatibleImageGranularity.width;
910                                 offset.height = y * compatibleImageGranularity.height;
911                                 offset.depth  = z * compatibleImageGranularity.depth +
912                                                layerNdx * numSparseBinds.z() * compatibleImageGranularity.depth;
913 
914                                 VkExtent3D extent;
915                                 extent.width  = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() :
916                                                                                 compatibleImageGranularity.width;
917                                 extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() :
918                                                                                 compatibleImageGranularity.height;
919                                 extent.depth  = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() :
920                                                                                 compatibleImageGranularity.depth;
921 
922                                 const uint32_t linearIndex =
923                                     x + y * numSparseBinds.x() + z * numSparseBinds.x() * numSparseBinds.y() +
924                                     layerNdx * numSparseBinds.x() * numSparseBinds.y() * numSparseBinds.z();
925 
926                                 if (linearIndex % 2u == 0u)
927                                 {
928                                     for (uint32_t offsetZ = offset.depth; offsetZ < offset.depth + extent.depth;
929                                          ++offsetZ)
930                                         for (uint32_t offsetY = offset.height; offsetY < offset.height + extent.height;
931                                              ++offsetY)
932                                             for (uint32_t offsetX = offset.width; offsetX < offset.width + extent.width;
933                                                  ++offsetX)
934                                             {
935                                                 uint32_t iReferenceValue;
936                                                 float fReferenceValue;
937 
938                                                 switch (channelNdx)
939                                                 {
940                                                 case 0:
941                                                     iReferenceValue = offsetX % 127u;
942                                                     fReferenceValue = static_cast<float>(iReferenceValue) / 127.f;
943                                                     break;
944                                                 case 1:
945                                                     iReferenceValue = offsetY % 127u;
946                                                     fReferenceValue = static_cast<float>(iReferenceValue) / 127.f;
947                                                     break;
948                                                 case 2:
949                                                     iReferenceValue = offsetZ % 127u;
950                                                     fReferenceValue = static_cast<float>(iReferenceValue) / 127.f;
951                                                     break;
952                                                 case 3:
953                                                     // For A8_UNORM we use the same values as the normal red channel, as per the shader.
954                                                     iReferenceValue = (isAlphaOnly ? offsetX % 127u : 1u);
955                                                     fReferenceValue =
956                                                         (isAlphaOnly ? static_cast<float>(iReferenceValue) / 127.f :
957                                                                        1.f);
958                                                     break;
959                                                 default:
960                                                     DE_FATAL("Unexpected channel index");
961                                                     break;
962                                                 }
963 
964                                                 float acceptableError = epsilon;
965 
966                                                 switch (formatDescription.channels[channelNdx].type)
967                                                 {
968                                                 case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
969                                                 case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
970                                                 {
971                                                     const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(
972                                                         offsetX * pixelDivider.x(), offsetY * pixelDivider.y(),
973                                                         offsetZ * pixelDivider.z());
974 
975                                                     if (outputValue.x() != iReferenceValue)
976                                                         return tcu::TestStatus::fail("Failed");
977 
978                                                     break;
979                                                 }
980                                                 case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
981                                                 case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
982                                                 {
983                                                     float fixedPointError =
984                                                         tcu::TexVerifierUtil::computeFixedPointError(
985                                                             formatDescription.channels[channelNdx].sizeBits);
986                                                     acceptableError += fixedPointError;
987                                                     const tcu::Vec4 outputValue = pixelBuffer.getPixel(
988                                                         offsetX * pixelDivider.x(), offsetY * pixelDivider.y(),
989                                                         offsetZ * pixelDivider.z());
990 
991                                                     if (deAbs(outputValue.x() - fReferenceValue) > acceptableError)
992                                                         return tcu::TestStatus::fail("Failed");
993 
994                                                     break;
995                                                 }
996                                                 case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
997                                                 {
998                                                     const tcu::Vec4 outputValue = pixelBuffer.getPixel(
999                                                         offsetX * pixelDivider.x(), offsetY * pixelDivider.y(),
1000                                                         offsetZ * pixelDivider.z());
1001 
1002                                                     if (deAbs(outputValue.x() - fReferenceValue) > acceptableError)
1003                                                         return tcu::TestStatus::fail("Failed");
1004 
1005                                                     break;
1006                                                 }
1007                                                 default:
1008                                                     DE_FATAL("Unexpected channel type");
1009                                                     break;
1010                                                 }
1011                                             }
1012                                 }
1013                                 else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
1014                                 {
1015                                     for (uint32_t offsetZ = offset.depth; offsetZ < offset.depth + extent.depth;
1016                                          ++offsetZ)
1017                                         for (uint32_t offsetY = offset.height; offsetY < offset.height + extent.height;
1018                                              ++offsetY)
1019                                             for (uint32_t offsetX = offset.width; offsetX < offset.width + extent.width;
1020                                                  ++offsetX)
1021                                             {
1022                                                 float acceptableError = epsilon;
1023 
1024                                                 switch (formatDescription.channels[channelNdx].type)
1025                                                 {
1026                                                 case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
1027                                                 case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
1028                                                 {
1029                                                     const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(
1030                                                         offsetX * pixelDivider.x(), offsetY * pixelDivider.y(),
1031                                                         offsetZ * pixelDivider.z());
1032 
1033                                                     if (outputValue.x() != 0u)
1034                                                         return tcu::TestStatus::fail("Failed");
1035 
1036                                                     break;
1037                                                 }
1038                                                 case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
1039                                                 case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
1040                                                 {
1041                                                     float fixedPointError =
1042                                                         tcu::TexVerifierUtil::computeFixedPointError(
1043                                                             formatDescription.channels[channelNdx].sizeBits);
1044                                                     acceptableError += fixedPointError;
1045                                                     const tcu::Vec4 outputValue = pixelBuffer.getPixel(
1046                                                         offsetX * pixelDivider.x(), offsetY * pixelDivider.y(),
1047                                                         offsetZ * pixelDivider.z());
1048 
1049                                                     if (deAbs(outputValue.x()) > acceptableError)
1050                                                         return tcu::TestStatus::fail("Failed");
1051 
1052                                                     break;
1053                                                 }
1054                                                 case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
1055                                                 {
1056                                                     const tcu::Vec4 outputValue = pixelBuffer.getPixel(
1057                                                         offsetX * pixelDivider.x(), offsetY * pixelDivider.y(),
1058                                                         offsetZ * pixelDivider.z());
1059 
1060                                                     if (deAbs(outputValue.x()) > acceptableError)
1061                                                         return tcu::TestStatus::fail("Failed");
1062 
1063                                                     break;
1064                                                 }
1065                                                 default:
1066                                                     DE_FATAL("Unexpected channel type");
1067                                                     break;
1068                                                 }
1069                                             }
1070                                 }
1071                             }
1072                 }
1073             }
1074             else
1075             {
1076                 for (uint32_t offsetZ = 0u; offsetZ < planeExtent.depth * imageCreateInfo.arrayLayers; ++offsetZ)
1077                     for (uint32_t offsetY = 0u; offsetY < planeExtent.height; ++offsetY)
1078                         for (uint32_t offsetX = 0u; offsetX < planeExtent.width; ++offsetX)
1079                         {
1080                             uint32_t iReferenceValue;
1081                             float fReferenceValue;
1082                             switch (channelNdx)
1083                             {
1084                             case 0:
1085                                 iReferenceValue = offsetX % 127u;
1086                                 fReferenceValue = static_cast<float>(iReferenceValue) / 127.f;
1087                                 break;
1088                             case 1:
1089                                 iReferenceValue = offsetY % 127u;
1090                                 fReferenceValue = static_cast<float>(iReferenceValue) / 127.f;
1091                                 break;
1092                             case 2:
1093                                 iReferenceValue = offsetZ % 127u;
1094                                 fReferenceValue = static_cast<float>(iReferenceValue) / 127.f;
1095                                 break;
1096                             case 3:
1097                                 iReferenceValue = (isAlphaOnly ? offsetX % 127u : 1u);
1098                                 fReferenceValue = (isAlphaOnly ? static_cast<float>(iReferenceValue) / 127.f : 1.f);
1099                                 break;
1100                             default:
1101                                 DE_FATAL("Unexpected channel index");
1102                                 break;
1103                             }
1104                             float acceptableError = epsilon;
1105 
1106                             switch (formatDescription.channels[channelNdx].type)
1107                             {
1108                             case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
1109                             case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
1110                             {
1111                                 const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(
1112                                     offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
1113 
1114                                 if (outputValue.x() != iReferenceValue)
1115                                     return tcu::TestStatus::fail("Failed");
1116 
1117                                 break;
1118                             }
1119                             case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
1120                             case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
1121                             {
1122                                 float fixedPointError = tcu::TexVerifierUtil::computeFixedPointError(
1123                                     formatDescription.channels[channelNdx].sizeBits);
1124                                 acceptableError += fixedPointError;
1125                                 const tcu::Vec4 outputValue = pixelBuffer.getPixel(
1126                                     offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
1127 
1128                                 if (deAbs(outputValue.x() - fReferenceValue) > acceptableError)
1129                                     return tcu::TestStatus::fail("Failed");
1130 
1131                                 break;
1132                             }
1133                             case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
1134                             {
1135                                 const tcu::Vec4 outputValue = pixelBuffer.getPixel(
1136                                     offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
1137 
1138                                 if (deAbs(outputValue.x() - fReferenceValue) > acceptableError)
1139                                     return tcu::TestStatus::fail("Failed");
1140 
1141                                 break;
1142                             }
1143                             default:
1144                                 DE_FATAL("Unexpected channel type");
1145                                 break;
1146                             }
1147                         }
1148             }
1149         }
1150     }
1151 
1152     return tcu::TestStatus::pass("Passed");
1153 }
1154 
createInstance(Context & context) const1155 TestInstance *ImageSparseResidencyCase::createInstance(Context &context) const
1156 {
1157     return new ImageSparseResidencyInstance(context, m_imageType, m_imageSize, m_format, m_useDeviceGroups);
1158 }
1159 
getSparseResidencyTestFormats(ImageType imageType,bool addExtraFormat)1160 std::vector<TestFormat> getSparseResidencyTestFormats(ImageType imageType, bool addExtraFormat)
1161 {
1162     auto formats = getTestFormats(imageType);
1163 #ifndef CTS_USES_VULKANSC
1164     if (addExtraFormat)
1165         formats.push_back(TestFormat{VK_FORMAT_A8_UNORM_KHR});
1166 #endif // CTS_USES_VULKANSC
1167     return formats;
1168 }
1169 
1170 } // namespace
1171 
createImageSparseResidencyTestsCommon(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> testGroup,const bool useDeviceGroup=false)1172 tcu::TestCaseGroup *createImageSparseResidencyTestsCommon(tcu::TestContext &testCtx,
1173                                                           de::MovePtr<tcu::TestCaseGroup> testGroup,
1174                                                           const bool useDeviceGroup = false)
1175 {
1176     const std::vector<TestImageParameters> imageParameters{
1177         {IMAGE_TYPE_2D,
1178          {tcu::UVec3(512u, 256u, 1u), tcu::UVec3(1024u, 128u, 1u), tcu::UVec3(11u, 137u, 1u)},
1179          getSparseResidencyTestFormats(IMAGE_TYPE_2D, !useDeviceGroup)},
1180         {IMAGE_TYPE_2D_ARRAY,
1181          {tcu::UVec3(512u, 256u, 6u), tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u, 137u, 3u)},
1182          getSparseResidencyTestFormats(IMAGE_TYPE_2D_ARRAY, !useDeviceGroup)},
1183         {IMAGE_TYPE_CUBE,
1184          {tcu::UVec3(256u, 256u, 1u), tcu::UVec3(128u, 128u, 1u), tcu::UVec3(137u, 137u, 1u)},
1185          getSparseResidencyTestFormats(IMAGE_TYPE_CUBE, !useDeviceGroup)},
1186         {IMAGE_TYPE_CUBE_ARRAY,
1187          {tcu::UVec3(256u, 256u, 6u), tcu::UVec3(128u, 128u, 8u), tcu::UVec3(137u, 137u, 3u)},
1188          getSparseResidencyTestFormats(IMAGE_TYPE_CUBE_ARRAY, !useDeviceGroup)},
1189         {IMAGE_TYPE_3D,
1190          {tcu::UVec3(512u, 256u, 16u), tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u, 137u, 3u)},
1191          getSparseResidencyTestFormats(IMAGE_TYPE_3D, !useDeviceGroup)},
1192     };
1193 
1194     for (size_t imageTypeNdx = 0; imageTypeNdx < imageParameters.size(); ++imageTypeNdx)
1195     {
1196         const ImageType imageType = imageParameters[imageTypeNdx].imageType;
1197         de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(
1198             new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str()));
1199 
1200         for (size_t formatNdx = 0; formatNdx < imageParameters[imageTypeNdx].formats.size(); ++formatNdx)
1201         {
1202             const VkFormat format         = imageParameters[imageTypeNdx].formats[formatNdx].format;
1203             tcu::UVec3 imageSizeAlignment = getImageSizeAlignment(format);
1204             de::MovePtr<tcu::TestCaseGroup> formatGroup(
1205                 new tcu::TestCaseGroup(testCtx, getImageFormatID(format).c_str()));
1206 
1207             for (size_t imageSizeNdx = 0; imageSizeNdx < imageParameters[imageTypeNdx].imageSizes.size();
1208                  ++imageSizeNdx)
1209             {
1210                 const tcu::UVec3 imageSize = imageParameters[imageTypeNdx].imageSizes[imageSizeNdx];
1211 
1212                 // skip test for images with odd sizes for some YCbCr formats
1213                 if ((imageSize.x() % imageSizeAlignment.x()) != 0)
1214                     continue;
1215                 if ((imageSize.y() % imageSizeAlignment.y()) != 0)
1216                     continue;
1217 
1218                 std::ostringstream stream;
1219                 stream << imageSize.x() << "_" << imageSize.y() << "_" << imageSize.z();
1220 
1221                 formatGroup->addChild(new ImageSparseResidencyCase(testCtx, stream.str(), imageType, imageSize, format,
1222                                                                    glu::GLSL_VERSION_440, useDeviceGroup));
1223             }
1224             imageTypeGroup->addChild(formatGroup.release());
1225         }
1226         testGroup->addChild(imageTypeGroup.release());
1227     }
1228 
1229     return testGroup.release();
1230 }
1231 
createImageSparseResidencyTests(tcu::TestContext & testCtx)1232 tcu::TestCaseGroup *createImageSparseResidencyTests(tcu::TestContext &testCtx)
1233 {
1234     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_sparse_residency"));
1235     return createImageSparseResidencyTestsCommon(testCtx, testGroup);
1236 }
1237 
createDeviceGroupImageSparseResidencyTests(tcu::TestContext & testCtx)1238 tcu::TestCaseGroup *createDeviceGroupImageSparseResidencyTests(tcu::TestContext &testCtx)
1239 {
1240     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "device_group_image_sparse_residency"));
1241     return createImageSparseResidencyTestsCommon(testCtx, testGroup, true);
1242 }
1243 
1244 } // namespace sparse
1245 } // namespace vkt
1246