xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/ycbcr/vktYCbCrUtil.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 Google Inc.
6  * Copyright (c) 2019 The Khronos Group Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief YCbCr Test Utilities
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktYCbCrUtil.hpp"
26 
27 #include "vkQueryUtil.hpp"
28 #include "vkRefUtil.hpp"
29 #include "vkTypeUtil.hpp"
30 #include "vkCmdUtil.hpp"
31 
32 #include "tcuTextureUtil.hpp"
33 #include "deMath.h"
34 #include "tcuFloat.hpp"
35 #include "tcuVector.hpp"
36 #include "tcuVectorUtil.hpp"
37 
38 #include "deSTLUtil.hpp"
39 #include "deUniquePtr.hpp"
40 
41 #include <limits>
42 
43 namespace vkt
44 {
45 namespace ycbcr
46 {
47 
48 using namespace vk;
49 
50 using de::MovePtr;
51 using std::string;
52 using std::vector;
53 using tcu::FloatFormat;
54 using tcu::Interval;
55 using tcu::IVec2;
56 using tcu::IVec4;
57 using tcu::UVec2;
58 using tcu::UVec4;
59 using tcu::Vec2;
60 using tcu::Vec4;
61 
62 // MultiPlaneImageData
63 
MultiPlaneImageData(VkFormat format,const UVec2 & size)64 MultiPlaneImageData::MultiPlaneImageData(VkFormat format, const UVec2 &size)
65     : m_format(format)
66     , m_description(getPlanarFormatDescription(format))
67     , m_size(size)
68 {
69     for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
70         m_planeData[planeNdx].resize(
71             getPlaneSizeInBytes(m_description, size, planeNdx, 0, BUFFER_IMAGE_COPY_OFFSET_GRANULARITY));
72 }
73 
MultiPlaneImageData(const MultiPlaneImageData & other)74 MultiPlaneImageData::MultiPlaneImageData(const MultiPlaneImageData &other)
75     : m_format(other.m_format)
76     , m_description(other.m_description)
77     , m_size(other.m_size)
78 {
79     for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
80         m_planeData[planeNdx] = other.m_planeData[planeNdx];
81 }
82 
~MultiPlaneImageData(void)83 MultiPlaneImageData::~MultiPlaneImageData(void)
84 {
85 }
86 
getChannelAccess(uint32_t channelNdx)87 tcu::PixelBufferAccess MultiPlaneImageData::getChannelAccess(uint32_t channelNdx)
88 {
89     void *planePtrs[PlanarFormatDescription::MAX_PLANES];
90     uint32_t planeRowPitches[PlanarFormatDescription::MAX_PLANES];
91 
92     for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
93     {
94         const uint32_t planeW = m_size.x() / (m_description.blockWidth * m_description.planes[planeNdx].widthDivisor);
95         planeRowPitches[planeNdx] = m_description.planes[planeNdx].elementSizeBytes * planeW;
96         planePtrs[planeNdx]       = &m_planeData[planeNdx][0];
97     }
98 
99     return vk::getChannelAccess(m_description, m_size, planeRowPitches, planePtrs, channelNdx);
100 }
101 
getChannelAccess(uint32_t channelNdx) const102 tcu::ConstPixelBufferAccess MultiPlaneImageData::getChannelAccess(uint32_t channelNdx) const
103 {
104     const void *planePtrs[PlanarFormatDescription::MAX_PLANES];
105     uint32_t planeRowPitches[PlanarFormatDescription::MAX_PLANES];
106 
107     for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
108     {
109         const uint32_t planeW = m_size.x() / (m_description.blockWidth * m_description.planes[planeNdx].widthDivisor);
110         planeRowPitches[planeNdx] = m_description.planes[planeNdx].elementSizeBytes * planeW;
111         planePtrs[planeNdx]       = &m_planeData[planeNdx][0];
112     }
113 
114     return vk::getChannelAccess(m_description, m_size, planeRowPitches, planePtrs, channelNdx);
115 }
116 
117 // Misc utilities
118 
119 namespace
120 {
121 
allocateStagingBuffers(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,const MultiPlaneImageData & imageData,vector<VkBufferSp> * buffers,vector<AllocationSp> * allocations)122 void allocateStagingBuffers(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
123                             const MultiPlaneImageData &imageData, vector<VkBufferSp> *buffers,
124                             vector<AllocationSp> *allocations)
125 {
126     for (uint32_t planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
127     {
128         const VkBufferCreateInfo bufferInfo = {
129             VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
130             DE_NULL,
131             (VkBufferCreateFlags)0u,
132             (VkDeviceSize)imageData.getPlaneSize(planeNdx),
133             VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
134             VK_SHARING_MODE_EXCLUSIVE,
135             0u,
136             (const uint32_t *)DE_NULL,
137         };
138         Move<VkBuffer> buffer(createBuffer(vkd, device, &bufferInfo));
139         MovePtr<Allocation> allocation(allocator.allocate(getBufferMemoryRequirements(vkd, device, *buffer),
140                                                           MemoryRequirement::HostVisible | MemoryRequirement::Any));
141 
142         VK_CHECK(vkd.bindBufferMemory(device, *buffer, allocation->getMemory(), allocation->getOffset()));
143 
144         buffers->push_back(VkBufferSp(new Unique<VkBuffer>(buffer)));
145         allocations->push_back(AllocationSp(allocation.release()));
146     }
147 }
148 
allocateAndWriteStagingBuffers(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,const MultiPlaneImageData & imageData,vector<VkBufferSp> * buffers,vector<AllocationSp> * allocations)149 void allocateAndWriteStagingBuffers(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
150                                     const MultiPlaneImageData &imageData, vector<VkBufferSp> *buffers,
151                                     vector<AllocationSp> *allocations)
152 {
153     allocateStagingBuffers(vkd, device, allocator, imageData, buffers, allocations);
154 
155     for (uint32_t planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
156     {
157         deMemcpy((*allocations)[planeNdx]->getHostPtr(), imageData.getPlanePtr(planeNdx),
158                  imageData.getPlaneSize(planeNdx));
159         flushMappedMemoryRange(vkd, device, (*allocations)[planeNdx]->getMemory(), 0u, VK_WHOLE_SIZE);
160     }
161 }
162 
readStagingBuffers(MultiPlaneImageData * imageData,const DeviceInterface & vkd,VkDevice device,const vector<AllocationSp> & allocations)163 void readStagingBuffers(MultiPlaneImageData *imageData, const DeviceInterface &vkd, VkDevice device,
164                         const vector<AllocationSp> &allocations)
165 {
166     for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
167     {
168         invalidateMappedMemoryRange(vkd, device, allocations[planeNdx]->getMemory(), 0u, VK_WHOLE_SIZE);
169         deMemcpy(imageData->getPlanePtr(planeNdx), allocations[planeNdx]->getHostPtr(),
170                  imageData->getPlaneSize(planeNdx));
171     }
172 }
173 
174 } // namespace
175 
checkImageSupport(Context & context,VkFormat format,VkImageCreateFlags createFlags,VkImageTiling tiling)176 void checkImageSupport(Context &context, VkFormat format, VkImageCreateFlags createFlags, VkImageTiling tiling)
177 {
178     const bool disjoint                                           = (createFlags & VK_IMAGE_CREATE_DISJOINT_BIT) != 0;
179     const VkPhysicalDeviceSamplerYcbcrConversionFeatures features = context.getSamplerYcbcrConversionFeatures();
180 
181     if (features.samplerYcbcrConversion == VK_FALSE)
182         TCU_THROW(NotSupportedError, "samplerYcbcrConversion is not supported");
183 
184     if (disjoint)
185     {
186         context.requireDeviceFunctionality("VK_KHR_bind_memory2");
187         context.requireDeviceFunctionality("VK_KHR_get_memory_requirements2");
188     }
189 
190     {
191         const VkFormatProperties formatProperties =
192             getPhysicalDeviceFormatProperties(context.getInstanceInterface(), context.getPhysicalDevice(), format);
193         const VkFormatFeatureFlags featureFlags = tiling == VK_IMAGE_TILING_OPTIMAL ?
194                                                       formatProperties.optimalTilingFeatures :
195                                                       formatProperties.linearTilingFeatures;
196 
197         if ((featureFlags &
198              (VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT | VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT)) == 0)
199             TCU_THROW(NotSupportedError, "YCbCr conversion is not supported for format");
200 
201         if (disjoint && ((featureFlags & VK_FORMAT_FEATURE_DISJOINT_BIT) == 0))
202             TCU_THROW(NotSupportedError, "Disjoint planes are not supported for format");
203     }
204 }
205 
fillRandomNoNaN(de::Random * randomGen,uint8_t * const data,uint32_t size,const vk::VkFormat format)206 void fillRandomNoNaN(de::Random *randomGen, uint8_t *const data, uint32_t size, const vk::VkFormat format)
207 {
208     bool isFloat    = false;
209     uint32_t stride = 1;
210 
211     switch (format)
212     {
213     case vk::VK_FORMAT_B10G11R11_UFLOAT_PACK32:
214         isFloat = true;
215         stride  = 1;
216         break;
217     case vk::VK_FORMAT_R16_SFLOAT:
218     case vk::VK_FORMAT_R16G16_SFLOAT:
219     case vk::VK_FORMAT_R16G16B16_SFLOAT:
220     case vk::VK_FORMAT_R16G16B16A16_SFLOAT:
221         isFloat = true;
222         stride  = 2;
223         break;
224     case vk::VK_FORMAT_R32_SFLOAT:
225     case vk::VK_FORMAT_R32G32_SFLOAT:
226     case vk::VK_FORMAT_R32G32B32_SFLOAT:
227     case vk::VK_FORMAT_R32G32B32A32_SFLOAT:
228         isFloat = true;
229         stride  = 4;
230         break;
231     case vk::VK_FORMAT_R64_SFLOAT:
232     case vk::VK_FORMAT_R64G64_SFLOAT:
233     case vk::VK_FORMAT_R64G64B64_SFLOAT:
234     case vk::VK_FORMAT_R64G64B64A64_SFLOAT:
235         isFloat = true;
236         stride  = 8;
237         break;
238     default:
239         stride = 1;
240         break;
241     }
242 
243     if (isFloat)
244     {
245         uint32_t ndx = 0;
246         for (; ndx < size - stride + 1; ndx += stride)
247         {
248             if (stride == 1)
249             {
250                 // Set first bit of each channel to 0 to avoid NaNs, only format is B10G11R11
251                 const uint8_t mask[] = {0x7F, 0xDF, 0xFB, 0xFF};
252                 // Apply mask for both endians
253                 data[ndx] = (randomGen->getUint8() & mask[ndx % 4]) & mask[3 - ndx % 4];
254             }
255             else if (stride == 2)
256             {
257                 tcu::float16_t *const ptr = reinterpret_cast<tcu::float16_t *>(&data[ndx]);
258                 *ptr                      = tcu::Float16(randomGen->getFloat()).bits();
259             }
260             else if (stride == 4)
261             {
262                 float *ptr = reinterpret_cast<float *>(&data[ndx]);
263                 *ptr       = randomGen->getFloat();
264             }
265             else if (stride == 8)
266             {
267                 double *ptr = reinterpret_cast<double *>(&data[ndx]);
268                 *ptr        = randomGen->getDouble();
269             }
270         }
271         while (ndx < size)
272         {
273             data[ndx] = 0;
274         }
275     }
276     else
277     {
278         for (uint32_t ndx = 0; ndx < size; ++ndx)
279         {
280             data[ndx] = randomGen->getUint8();
281         }
282     }
283 }
284 
285 // When noNan is true, fillRandom does not generate NaNs in float formats.
fillRandom(de::Random * randomGen,MultiPlaneImageData * imageData,const vk::VkFormat format,const bool noNan)286 void fillRandom(de::Random *randomGen, MultiPlaneImageData *imageData, const vk::VkFormat format, const bool noNan)
287 {
288     for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
289     {
290         const size_t planeSize  = imageData->getPlaneSize(planeNdx);
291         uint8_t *const planePtr = (uint8_t *)imageData->getPlanePtr(planeNdx);
292 
293         if (noNan)
294         {
295             fillRandomNoNaN(randomGen, planePtr, (uint32_t)planeSize, format);
296         }
297         else
298         {
299             for (size_t ndx = 0; ndx < planeSize; ++ndx)
300             {
301                 planePtr[ndx] = randomGen->getUint8();
302             }
303         }
304     }
305 }
306 
fillGradient(MultiPlaneImageData * imageData,const tcu::Vec4 & minVal,const tcu::Vec4 & maxVal)307 void fillGradient(MultiPlaneImageData *imageData, const tcu::Vec4 &minVal, const tcu::Vec4 &maxVal)
308 {
309     const PlanarFormatDescription &formatInfo = imageData->getDescription();
310 
311     // \todo [pyry] Optimize: no point in re-rendering source gradient for each channel.
312 
313     for (uint32_t channelNdx = 0; channelNdx < 4; channelNdx++)
314     {
315         if (formatInfo.hasChannelNdx(channelNdx))
316         {
317             const tcu::PixelBufferAccess channelAccess = imageData->getChannelAccess(channelNdx);
318             tcu::TextureLevel tmpTexture(tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::FLOAT),
319                                          channelAccess.getWidth(), channelAccess.getHeight());
320             const tcu::ConstPixelBufferAccess tmpAccess = tmpTexture.getAccess();
321 
322             tcu::fillWithComponentGradients(tmpTexture, minVal, maxVal);
323 
324             for (int y = 0; y < channelAccess.getHeight(); ++y)
325                 for (int x = 0; x < channelAccess.getWidth(); ++x)
326                 {
327                     channelAccess.setPixel(tcu::Vec4(tmpAccess.getPixel(x, y)[channelNdx]), x, y);
328                 }
329         }
330     }
331 }
332 
fillZero(MultiPlaneImageData * imageData)333 void fillZero(MultiPlaneImageData *imageData)
334 {
335     for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
336         deMemset(imageData->getPlanePtr(planeNdx), 0, imageData->getPlaneSize(planeNdx));
337 }
338 
allocateAndBindImageMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkImage image,VkFormat format,VkImageCreateFlags createFlags,vk::MemoryRequirement requirement)339 vector<AllocationSp> allocateAndBindImageMemory(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
340                                                 VkImage image, VkFormat format, VkImageCreateFlags createFlags,
341                                                 vk::MemoryRequirement requirement)
342 {
343     vector<AllocationSp> allocations;
344 
345     if ((createFlags & VK_IMAGE_CREATE_DISJOINT_BIT) != 0)
346     {
347         const uint32_t numPlanes = getPlaneCount(format);
348 
349         bindImagePlanesMemory(vkd, device, image, numPlanes, allocations, allocator, requirement);
350     }
351     else
352     {
353         const VkMemoryRequirements reqs = getImageMemoryRequirements(vkd, device, image);
354 
355         allocations.push_back(AllocationSp(allocator.allocate(reqs, requirement).release()));
356 
357         VK_CHECK(vkd.bindImageMemory(device, image, allocations.back()->getMemory(), allocations.back()->getOffset()));
358     }
359 
360     return allocations;
361 }
362 // Accept only NV12
uploadImage(const DeviceInterface & vkd,VkDevice device,uint32_t queueFamilyNdx,Allocator & allocator,VkImage image,const MultiPlaneImageData & imageData,VkAccessFlags nextAccess,VkImageLayout finalLayout,uint32_t arrayLayer)363 void uploadImage(const DeviceInterface &vkd, VkDevice device, uint32_t queueFamilyNdx, Allocator &allocator,
364                  VkImage image, const MultiPlaneImageData &imageData, VkAccessFlags nextAccess,
365                  VkImageLayout finalLayout, uint32_t arrayLayer)
366 {
367     const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
368     const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
369     const Unique<VkCommandBuffer> cmdBuffer(
370         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
371     vector<VkBufferSp> stagingBuffers;
372     vector<AllocationSp> stagingMemory;
373 
374     const PlanarFormatDescription &formatDesc = imageData.getDescription();
375 
376     allocateAndWriteStagingBuffers(vkd, device, allocator, imageData, &stagingBuffers, &stagingMemory);
377 
378     beginCommandBuffer(vkd, *cmdBuffer);
379 
380     for (uint32_t planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
381     {
382         const VkImageAspectFlagBits aspect =
383             (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
384         const VkExtent3D imageExtent = makeExtent3D(imageData.getSize().x(), imageData.getSize().y(), 1u);
385         const VkExtent3D planeExtent = getPlaneExtent(formatDesc, imageExtent, planeNdx, 0);
386         const VkBufferImageCopy copy = {0u, // bufferOffset
387                                         0u, // bufferRowLength
388                                         0u, // bufferImageHeight
389                                         {(VkImageAspectFlags)aspect, 0u, arrayLayer, 1u},
390                                         makeOffset3D(0u, 0u, 0u),
391                                         planeExtent};
392 
393         {
394             const VkImageMemoryBarrier preCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
395                                                          DE_NULL,
396                                                          (VkAccessFlags)0,
397                                                          VK_ACCESS_TRANSFER_WRITE_BIT,
398                                                          VK_IMAGE_LAYOUT_UNDEFINED,
399                                                          VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
400                                                          VK_QUEUE_FAMILY_IGNORED,
401                                                          VK_QUEUE_FAMILY_IGNORED,
402                                                          image,
403                                                          {(VkImageAspectFlags)aspect, 0u, 1u, arrayLayer, 1u}};
404 
405             vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
406                                    (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0u, 0u,
407                                    (const VkMemoryBarrier *)DE_NULL, 0u, (const VkBufferMemoryBarrier *)DE_NULL, 1u,
408                                    &preCopyBarrier);
409         }
410 
411         vkd.cmdCopyBufferToImage(*cmdBuffer, **stagingBuffers[planeNdx], image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
412                                  1u, &copy);
413 
414         {
415             const VkImageMemoryBarrier postCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
416                                                           DE_NULL,
417                                                           VK_ACCESS_TRANSFER_WRITE_BIT,
418                                                           nextAccess,
419                                                           VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
420                                                           finalLayout,
421                                                           VK_QUEUE_FAMILY_IGNORED,
422                                                           VK_QUEUE_FAMILY_IGNORED,
423                                                           image,
424                                                           {(VkImageAspectFlags)aspect, 0u, 1u, arrayLayer, 1u}};
425 
426             vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
427                                    (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, (VkDependencyFlags)0u, 0u,
428                                    (const VkMemoryBarrier *)DE_NULL, 0u, (const VkBufferMemoryBarrier *)DE_NULL, 1u,
429                                    &postCopyBarrier);
430         }
431     }
432 
433     endCommandBuffer(vkd, *cmdBuffer);
434 
435     submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
436 }
437 
fillImageMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,uint32_t queueFamilyNdx,vk::VkImage image,const std::vector<de::SharedPtr<vk::Allocation>> & allocations,const MultiPlaneImageData & imageData,vk::VkAccessFlags nextAccess,vk::VkImageLayout finalLayout,uint32_t arrayLayer)438 void fillImageMemory(const vk::DeviceInterface &vkd, vk::VkDevice device, uint32_t queueFamilyNdx, vk::VkImage image,
439                      const std::vector<de::SharedPtr<vk::Allocation>> &allocations,
440                      const MultiPlaneImageData &imageData, vk::VkAccessFlags nextAccess, vk::VkImageLayout finalLayout,
441                      uint32_t arrayLayer)
442 {
443     const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
444     const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
445     const Unique<VkCommandBuffer> cmdBuffer(
446         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
447     const PlanarFormatDescription &formatDesc = imageData.getDescription();
448 
449     for (uint32_t planeNdx = 0; planeNdx < formatDesc.numPlanes; ++planeNdx)
450     {
451         const VkImageAspectFlagBits aspect =
452             (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
453         const de::SharedPtr<Allocation> &allocation = allocations.size() > 1 ? allocations[planeNdx] : allocations[0];
454         const size_t planeSize                      = imageData.getPlaneSize(planeNdx);
455         const uint32_t planeH                = imageData.getSize().y() / formatDesc.planes[planeNdx].heightDivisor;
456         const VkImageSubresource subresource = {
457             static_cast<vk::VkImageAspectFlags>(aspect),
458             0u,
459             arrayLayer,
460         };
461         VkSubresourceLayout layout;
462 
463         vkd.getImageSubresourceLayout(device, image, &subresource, &layout);
464 
465         for (uint32_t row = 0; row < planeH; ++row)
466         {
467             const size_t rowSize     = planeSize / planeH;
468             void *const dstPtr       = ((uint8_t *)allocation->getHostPtr()) + layout.offset + layout.rowPitch * row;
469             const void *const srcPtr = ((const uint8_t *)imageData.getPlanePtr(planeNdx)) + row * rowSize;
470 
471             deMemcpy(dstPtr, srcPtr, rowSize);
472         }
473         flushMappedMemoryRange(vkd, device, allocation->getMemory(), 0u, VK_WHOLE_SIZE);
474     }
475 
476     beginCommandBuffer(vkd, *cmdBuffer);
477 
478     {
479         const VkImageMemoryBarrier postCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
480                                                       DE_NULL,
481                                                       0u,
482                                                       nextAccess,
483                                                       VK_IMAGE_LAYOUT_PREINITIALIZED,
484                                                       finalLayout,
485                                                       VK_QUEUE_FAMILY_IGNORED,
486                                                       VK_QUEUE_FAMILY_IGNORED,
487                                                       image,
488                                                       {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, arrayLayer, 1u}};
489 
490         vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
491                                (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, (VkDependencyFlags)0u, 0u,
492                                (const VkMemoryBarrier *)DE_NULL, 0u, (const VkBufferMemoryBarrier *)DE_NULL, 1u,
493                                &postCopyBarrier);
494     }
495 
496     endCommandBuffer(vkd, *cmdBuffer);
497 
498     submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
499 }
500 
downloadImage(const DeviceInterface & vkd,VkDevice device,uint32_t queueFamilyNdx,Allocator & allocator,VkImage image,MultiPlaneImageData * imageData,VkAccessFlags prevAccess,VkImageLayout initialLayout,uint32_t baseArrayLayer)501 void downloadImage(const DeviceInterface &vkd, VkDevice device, uint32_t queueFamilyNdx, Allocator &allocator,
502                    VkImage image, MultiPlaneImageData *imageData, VkAccessFlags prevAccess, VkImageLayout initialLayout,
503                    uint32_t baseArrayLayer)
504 {
505     const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
506     const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
507     const Unique<VkCommandBuffer> cmdBuffer(
508         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
509     vector<VkBufferSp> stagingBuffers;
510     vector<AllocationSp> stagingMemory;
511 
512     const PlanarFormatDescription &formatDesc = imageData->getDescription();
513 
514     allocateStagingBuffers(vkd, device, allocator, *imageData, &stagingBuffers, &stagingMemory);
515 
516     beginCommandBuffer(vkd, *cmdBuffer);
517 
518     for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
519     {
520         const VkImageAspectFlagBits aspect =
521             (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
522         {
523             const VkImageMemoryBarrier preCopyBarrier = {
524                 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
525                 DE_NULL,
526                 prevAccess,
527                 VK_ACCESS_TRANSFER_READ_BIT,
528                 initialLayout,
529                 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
530                 VK_QUEUE_FAMILY_IGNORED,
531                 VK_QUEUE_FAMILY_IGNORED,
532                 image,
533                 {static_cast<vk::VkImageAspectFlags>(aspect), 0u, 1u, baseArrayLayer, 1u}};
534 
535             vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
536                                    (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0u, 0u,
537                                    (const VkMemoryBarrier *)DE_NULL, 0u, (const VkBufferMemoryBarrier *)DE_NULL, 1u,
538                                    &preCopyBarrier);
539         }
540         {
541             const VkExtent3D imageExtent = makeExtent3D(imageData->getSize().x(), imageData->getSize().y(), 1u);
542             const VkExtent3D planeExtent = getPlaneExtent(formatDesc, imageExtent, planeNdx, 0);
543             const VkBufferImageCopy copy = {0u, // bufferOffset
544                                             0u, // bufferRowLength
545                                             0u, // bufferImageHeight
546                                             {(VkImageAspectFlags)aspect, 0u, baseArrayLayer, 1u},
547                                             makeOffset3D(0u, 0u, 0u),
548                                             planeExtent};
549 
550             vkd.cmdCopyImageToBuffer(*cmdBuffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
551                                      **stagingBuffers[planeNdx], 1u, &copy);
552         }
553         {
554             const VkBufferMemoryBarrier postCopyBarrier = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
555                                                            DE_NULL,
556                                                            VK_ACCESS_TRANSFER_WRITE_BIT,
557                                                            VK_ACCESS_HOST_READ_BIT,
558                                                            VK_QUEUE_FAMILY_IGNORED,
559                                                            VK_QUEUE_FAMILY_IGNORED,
560                                                            **stagingBuffers[planeNdx],
561                                                            0u,
562                                                            VK_WHOLE_SIZE};
563 
564             vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
565                                    (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0u, 0u,
566                                    (const VkMemoryBarrier *)DE_NULL, 1u, &postCopyBarrier, 0u,
567                                    (const VkImageMemoryBarrier *)DE_NULL);
568         }
569     }
570 
571     endCommandBuffer(vkd, *cmdBuffer);
572 
573     submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
574 
575     readStagingBuffers(imageData, vkd, device, stagingMemory);
576 }
577 
readImageMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,uint32_t queueFamilyNdx,vk::VkImage image,const std::vector<de::SharedPtr<vk::Allocation>> & allocations,MultiPlaneImageData * imageData,vk::VkAccessFlags prevAccess,vk::VkImageLayout initialLayout)578 void readImageMemory(const vk::DeviceInterface &vkd, vk::VkDevice device, uint32_t queueFamilyNdx, vk::VkImage image,
579                      const std::vector<de::SharedPtr<vk::Allocation>> &allocations, MultiPlaneImageData *imageData,
580                      vk::VkAccessFlags prevAccess, vk::VkImageLayout initialLayout)
581 {
582     const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
583     const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
584     const Unique<VkCommandBuffer> cmdBuffer(
585         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
586     const PlanarFormatDescription &formatDesc = imageData->getDescription();
587 
588     beginCommandBuffer(vkd, *cmdBuffer);
589 
590     {
591         const VkImageMemoryBarrier preCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
592                                                      DE_NULL,
593                                                      prevAccess,
594                                                      vk::VK_ACCESS_HOST_READ_BIT,
595                                                      initialLayout,
596                                                      VK_IMAGE_LAYOUT_GENERAL,
597                                                      VK_QUEUE_FAMILY_IGNORED,
598                                                      VK_QUEUE_FAMILY_IGNORED,
599                                                      image,
600                                                      {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}};
601 
602         vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
603                                (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0u, 0u,
604                                (const VkMemoryBarrier *)DE_NULL, 0u, (const VkBufferMemoryBarrier *)DE_NULL, 1u,
605                                &preCopyBarrier);
606     }
607 
608     endCommandBuffer(vkd, *cmdBuffer);
609 
610     submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
611 
612     for (uint32_t planeNdx = 0; planeNdx < formatDesc.numPlanes; ++planeNdx)
613     {
614         const VkImageAspectFlagBits aspect =
615             (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
616         const de::SharedPtr<Allocation> &allocation = allocations.size() > 1 ? allocations[planeNdx] : allocations[0];
617         const size_t planeSize                      = imageData->getPlaneSize(planeNdx);
618         const uint32_t planeH                = imageData->getSize().y() / formatDesc.planes[planeNdx].heightDivisor;
619         const VkImageSubresource subresource = {
620             static_cast<vk::VkImageAspectFlags>(aspect),
621             0u,
622             0u,
623         };
624         VkSubresourceLayout layout;
625 
626         vkd.getImageSubresourceLayout(device, image, &subresource, &layout);
627 
628         invalidateMappedMemoryRange(vkd, device, allocation->getMemory(), 0u, VK_WHOLE_SIZE);
629 
630         for (uint32_t row = 0; row < planeH; ++row)
631         {
632             const size_t rowSize = planeSize / planeH;
633             const void *const srcPtr =
634                 ((const uint8_t *)allocation->getHostPtr()) + layout.offset + layout.rowPitch * row;
635             void *const dstPtr = ((uint8_t *)imageData->getPlanePtr(planeNdx)) + row * rowSize;
636 
637             deMemcpy(dstPtr, srcPtr, rowSize);
638         }
639     }
640 }
641 
642 // ChannelAccess utilities
643 namespace
644 {
645 
646 //! Extend < 32b signed integer to 32b
signExtend(uint32_t src,int bits)647 inline int32_t signExtend(uint32_t src, int bits)
648 {
649     const uint32_t signBit = 1u << (bits - 1);
650 
651     src |= ~((src & signBit) - 1);
652 
653     return (int32_t)src;
654 }
655 
divRoundUp(uint32_t a,uint32_t b)656 uint32_t divRoundUp(uint32_t a, uint32_t b)
657 {
658     if (a % b == 0)
659         return a / b;
660     else
661         return (a / b) + 1;
662 }
663 
664 // \todo Taken from tcuTexture.cpp
665 // \todo [2011-09-21 pyry] Move to tcutil?
666 template <typename T>
convertSatRte(float f)667 inline T convertSatRte(float f)
668 {
669     // \note Doesn't work for 64-bit types
670     DE_STATIC_ASSERT(sizeof(T) < sizeof(uint64_t));
671     DE_STATIC_ASSERT((-3 % 2 != 0) && (-4 % 2 == 0));
672 
673     int64_t minVal = std::numeric_limits<T>::min();
674     int64_t maxVal = std::numeric_limits<T>::max();
675     float q        = deFloatFrac(f);
676     int64_t intVal = (int64_t)(f - q);
677 
678     // Rounding.
679     if (q == 0.5f)
680     {
681         if (intVal % 2 != 0)
682             intVal++;
683     }
684     else if (q > 0.5f)
685         intVal++;
686     // else Don't add anything
687 
688     // Saturate.
689     intVal = de::max(minVal, de::min(maxVal, intVal));
690 
691     return (T)intVal;
692 }
693 
694 } // namespace
695 
ChannelAccess(tcu::TextureChannelClass channelClass,uint8_t channelSize,const tcu::IVec3 & size,const tcu::IVec3 & bitPitch,void * data,uint32_t bitOffset)696 ChannelAccess::ChannelAccess(tcu::TextureChannelClass channelClass, uint8_t channelSize, const tcu::IVec3 &size,
697                              const tcu::IVec3 &bitPitch, void *data, uint32_t bitOffset)
698     : m_channelClass(channelClass)
699     , m_channelSize(channelSize)
700     , m_size(size)
701     , m_bitPitch(bitPitch)
702     , m_data((uint8_t *)data + (bitOffset / 8))
703     , m_bitOffset(bitOffset % 8)
704 {
705 }
706 
getChannelUint(const tcu::IVec3 & pos) const707 uint32_t ChannelAccess::getChannelUint(const tcu::IVec3 &pos) const
708 {
709     DE_ASSERT(pos[0] < m_size[0]);
710     DE_ASSERT(pos[1] < m_size[1]);
711     DE_ASSERT(pos[2] < m_size[2]);
712 
713     const int32_t bitOffset(m_bitOffset + tcu::dot(m_bitPitch, pos));
714     const uint8_t *const firstByte = ((const uint8_t *)m_data) + (bitOffset / 8);
715     const uint32_t byteCount       = divRoundUp((bitOffset + m_channelSize) - 8u * (bitOffset / 8u), 8u);
716     const uint32_t mask(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
717     const uint32_t offset = bitOffset % 8;
718     uint32_t bits         = 0u;
719 
720     deMemcpy(&bits, firstByte, byteCount);
721 
722     return (bits >> offset) & mask;
723 }
724 
setChannel(const tcu::IVec3 & pos,uint32_t x)725 void ChannelAccess::setChannel(const tcu::IVec3 &pos, uint32_t x)
726 {
727     DE_ASSERT(pos[0] < m_size[0]);
728     DE_ASSERT(pos[1] < m_size[1]);
729     DE_ASSERT(pos[2] < m_size[2]);
730 
731     const int32_t bitOffset(m_bitOffset + tcu::dot(m_bitPitch, pos));
732     uint8_t *const firstByte = ((uint8_t *)m_data) + (bitOffset / 8);
733     const uint32_t byteCount = divRoundUp((bitOffset + m_channelSize) - 8u * (bitOffset / 8u), 8u);
734     const uint32_t mask(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
735     const uint32_t offset = bitOffset % 8;
736 
737     const uint32_t bits = (x & mask) << offset;
738     uint32_t oldBits    = 0;
739 
740     deMemcpy(&oldBits, firstByte, byteCount);
741 
742     {
743         const uint32_t newBits = bits | (oldBits & (~(mask << offset)));
744 
745         deMemcpy(firstByte, &newBits, byteCount);
746     }
747 }
748 
getChannel(const tcu::IVec3 & pos) const749 float ChannelAccess::getChannel(const tcu::IVec3 &pos) const
750 {
751     const uint32_t bits(getChannelUint(pos));
752 
753     switch (m_channelClass)
754     {
755     case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
756         return (float)bits / (float)(m_channelSize == 32 ? ~0x0u : ((0x1u << m_channelSize) - 1u));
757 
758     case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
759         return (float)bits;
760 
761     case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
762         return de::max(-1.0f, (float)signExtend(bits, m_channelSize) / (float)((0x1u << (m_channelSize - 1u)) - 1u));
763 
764     case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
765         return (float)signExtend(bits, m_channelSize);
766 
767     case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
768         if (m_channelSize == 32)
769             return tcu::Float32(bits).asFloat();
770         else
771         {
772             DE_FATAL("Float type not supported");
773             return -1.0f;
774         }
775 
776     default:
777         DE_FATAL("Unknown texture channel class");
778         return -1.0f;
779     }
780 }
781 
getChannel(const tcu::FloatFormat & conversionFormat,const tcu::IVec3 & pos) const782 tcu::Interval ChannelAccess::getChannel(const tcu::FloatFormat &conversionFormat, const tcu::IVec3 &pos) const
783 {
784     const uint32_t bits(getChannelUint(pos));
785 
786     switch (m_channelClass)
787     {
788     case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
789         return conversionFormat.roundOut(
790             conversionFormat.roundOut((double)bits, false) /
791                 conversionFormat.roundOut((double)(m_channelSize == 32 ? ~0x0u : ((0x1u << m_channelSize) - 1u)),
792                                           false),
793             false);
794 
795     case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
796         return conversionFormat.roundOut((double)bits, false);
797 
798     case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
799     {
800         const tcu::Interval result(conversionFormat.roundOut(
801             conversionFormat.roundOut((double)signExtend(bits, m_channelSize), false) /
802                 conversionFormat.roundOut((double)((0x1u << (m_channelSize - 1u)) - 1u), false),
803             false));
804 
805         return tcu::Interval(de::max(-1.0, result.lo()), de::max(-1.0, result.hi()));
806     }
807 
808     case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
809         return conversionFormat.roundOut((double)signExtend(bits, m_channelSize), false);
810 
811     case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
812         if (m_channelSize == 32)
813             return conversionFormat.roundOut(tcu::Float32(bits).asFloat(), false);
814         else
815         {
816             DE_FATAL("Float type not supported");
817             return tcu::Interval();
818         }
819 
820     default:
821         DE_FATAL("Unknown texture channel class");
822         return tcu::Interval();
823     }
824 }
825 
setChannel(const tcu::IVec3 & pos,float x)826 void ChannelAccess::setChannel(const tcu::IVec3 &pos, float x)
827 {
828     DE_ASSERT(pos[0] < m_size[0]);
829     DE_ASSERT(pos[1] < m_size[1]);
830     DE_ASSERT(pos[2] < m_size[2]);
831 
832     const uint32_t mask(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
833 
834     switch (m_channelClass)
835     {
836     case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
837     {
838         const uint32_t maxValue(mask);
839         const uint32_t value(de::min(maxValue, (uint32_t)convertSatRte<uint32_t>(x * (float)maxValue)));
840         setChannel(pos, value);
841         break;
842     }
843 
844     case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
845     {
846         const int32_t range((0x1u << (m_channelSize - 1u)) - 1u);
847         const uint32_t value((uint32_t)de::clamp<int32_t>(convertSatRte<int32_t>(x * (float)range), -range, range));
848         setChannel(pos, value);
849         break;
850     }
851 
852     case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
853     {
854         const uint32_t maxValue(mask);
855         const uint32_t value(de::min(maxValue, (uint32_t)x));
856         setChannel(pos, value);
857         break;
858     }
859 
860     case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
861     {
862         const int32_t minValue(-(int32_t)(1u << (m_channelSize - 1u)));
863         const int32_t maxValue((int32_t)((1u << (m_channelSize - 1u)) - 1u));
864         const uint32_t value((uint32_t)de::clamp((int32_t)x, minValue, maxValue));
865         setChannel(pos, value);
866         break;
867     }
868 
869     case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
870     {
871         if (m_channelSize == 32)
872         {
873             const uint32_t value = tcu::Float32(x).bits();
874             setChannel(pos, value);
875         }
876         else
877             DE_FATAL("Float type not supported");
878         break;
879     }
880 
881     default:
882         DE_FATAL("Unknown texture channel class");
883     }
884 }
885 
getChannelAccess(MultiPlaneImageData & data,const vk::PlanarFormatDescription & formatInfo,const UVec2 & size,int channelNdx)886 ChannelAccess getChannelAccess(MultiPlaneImageData &data, const vk::PlanarFormatDescription &formatInfo,
887                                const UVec2 &size, int channelNdx)
888 {
889     DE_ASSERT(formatInfo.hasChannelNdx(channelNdx));
890 
891     const uint32_t planeNdx         = formatInfo.channels[channelNdx].planeNdx;
892     const uint32_t valueOffsetBits  = formatInfo.channels[channelNdx].offsetBits;
893     const uint32_t pixelStrideBytes = formatInfo.channels[channelNdx].strideBytes;
894     const uint32_t pixelStrideBits  = pixelStrideBytes * 8;
895     const uint8_t sizeBits          = formatInfo.channels[channelNdx].sizeBits;
896 
897     DE_ASSERT(size.x() % (formatInfo.blockWidth * formatInfo.planes[planeNdx].widthDivisor) == 0);
898     DE_ASSERT(size.y() % (formatInfo.blockHeight * formatInfo.planes[planeNdx].heightDivisor) == 0);
899 
900     uint32_t accessWidth            = size.x() / (formatInfo.blockWidth * formatInfo.planes[planeNdx].widthDivisor);
901     const uint32_t accessHeight     = size.y() / (formatInfo.blockHeight * formatInfo.planes[planeNdx].heightDivisor);
902     const uint32_t elementSizeBytes = formatInfo.planes[planeNdx].elementSizeBytes;
903     const uint32_t rowPitch         = formatInfo.planes[planeNdx].elementSizeBytes * accessWidth;
904     const uint32_t rowPitchBits     = rowPitch * 8;
905 
906     if (pixelStrideBytes != elementSizeBytes)
907     {
908         DE_ASSERT(elementSizeBytes % pixelStrideBytes == 0);
909         accessWidth *= elementSizeBytes / pixelStrideBytes;
910     }
911 
912     return ChannelAccess((tcu::TextureChannelClass)formatInfo.channels[channelNdx].type, sizeBits,
913                          tcu::IVec3(accessWidth, accessHeight, 1u),
914                          tcu::IVec3((int)pixelStrideBits, (int)rowPitchBits, 0), data.getPlanePtr(planeNdx),
915                          (uint32_t)valueOffsetBits);
916 }
917 
isXChromaSubsampled(vk::VkFormat format)918 bool isXChromaSubsampled(vk::VkFormat format)
919 {
920     switch (format)
921     {
922     case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
923     case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
924     case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
925     case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
926     case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
927     case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
928     case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
929     case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
930     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
931     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
932     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
933     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
934     case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
935     case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
936     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
937     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
938     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
939     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
940     case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
941     case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
942     case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
943     case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
944     case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
945     case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
946         return true;
947 
948     default:
949         return false;
950     }
951 }
952 
isYChromaSubsampled(vk::VkFormat format)953 bool isYChromaSubsampled(vk::VkFormat format)
954 {
955     switch (format)
956     {
957     case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
958     case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
959     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
960     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
961     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
962     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
963     case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
964     case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
965         return true;
966 
967     default:
968         return false;
969     }
970 }
971 
areLsb6BitsDontCare(vk::VkFormat srcFormat,vk::VkFormat dstFormat)972 bool areLsb6BitsDontCare(vk::VkFormat srcFormat, vk::VkFormat dstFormat)
973 {
974     if ((srcFormat == vk::VK_FORMAT_R10X6_UNORM_PACK16) || (dstFormat == vk::VK_FORMAT_R10X6_UNORM_PACK16) ||
975         (srcFormat == vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16) ||
976         (dstFormat == vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16) ||
977         (srcFormat == vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16) ||
978         (dstFormat == vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16) ||
979         (srcFormat == vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16) ||
980         (dstFormat == vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16) ||
981         (srcFormat == vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16) ||
982         (dstFormat == vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16) ||
983         (srcFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) ||
984         (dstFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) ||
985         (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16) ||
986         (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16) ||
987         (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16) ||
988         (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16) ||
989         (srcFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16) ||
990         (dstFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16) ||
991         (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16) ||
992         (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16))
993     {
994         return true;
995     }
996 
997     return false;
998 }
999 
areLsb4BitsDontCare(vk::VkFormat srcFormat,vk::VkFormat dstFormat)1000 bool areLsb4BitsDontCare(vk::VkFormat srcFormat, vk::VkFormat dstFormat)
1001 {
1002     if ((srcFormat == vk::VK_FORMAT_R12X4_UNORM_PACK16) || (dstFormat == vk::VK_FORMAT_R12X4_UNORM_PACK16) ||
1003         (srcFormat == vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16) ||
1004         (dstFormat == vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16) ||
1005         (srcFormat == vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16) ||
1006         (dstFormat == vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16) ||
1007         (srcFormat == vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16) ||
1008         (dstFormat == vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16) ||
1009         (srcFormat == vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16) ||
1010         (dstFormat == vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16) ||
1011         (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16) ||
1012         (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16) ||
1013         (srcFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16) ||
1014         (dstFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16) ||
1015         (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16) ||
1016         (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16) ||
1017         (srcFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16) ||
1018         (dstFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16) ||
1019         (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16) ||
1020         (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16))
1021     {
1022         return true;
1023     }
1024 
1025     return false;
1026 }
1027 
1028 // \note Used for range expansion
getYCbCrBitDepth(vk::VkFormat format)1029 tcu::UVec4 getYCbCrBitDepth(vk::VkFormat format)
1030 {
1031     switch (format)
1032     {
1033     case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
1034     case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
1035     case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1036     case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1037     case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
1038     case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1039     case vk::VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
1040     case vk::VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
1041         return tcu::UVec4(8, 8, 8, 0);
1042 
1043     case vk::VK_FORMAT_R10X6_UNORM_PACK16:
1044         return tcu::UVec4(10, 0, 0, 0);
1045 
1046     case vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16:
1047         return tcu::UVec4(10, 10, 0, 0);
1048 
1049     case vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
1050         return tcu::UVec4(10, 10, 10, 10);
1051 
1052     case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1053     case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1054     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1055     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1056     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1057     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1058     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
1059     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT:
1060         return tcu::UVec4(10, 10, 10, 0);
1061 
1062     case vk::VK_FORMAT_R12X4_UNORM_PACK16:
1063         return tcu::UVec4(12, 0, 0, 0);
1064 
1065     case vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16:
1066         return tcu::UVec4(12, 12, 0, 0);
1067 
1068     case vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
1069     case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1070     case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1071     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1072     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1073     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1074     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1075     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
1076     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT:
1077         return tcu::UVec4(12, 12, 12, 12);
1078 
1079     case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1080     case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1081     case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1082     case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1083     case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1084     case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1085     case vk::VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
1086     case vk::VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT:
1087         return tcu::UVec4(16, 16, 16, 0);
1088 
1089     default:
1090         return tcu::getTextureFormatBitDepth(vk::mapVkFormat(format)).cast<uint32_t>();
1091     }
1092 }
1093 
getPrecision(VkFormat format)1094 std::vector<tcu::FloatFormat> getPrecision(VkFormat format)
1095 {
1096     std::vector<FloatFormat> floatFormats;
1097     UVec4 channelDepth = getYCbCrBitDepth(format);
1098 
1099     for (uint32_t channelIdx = 0; channelIdx < 4; channelIdx++)
1100         floatFormats.push_back(tcu::FloatFormat(0, 0, channelDepth[channelIdx], false, tcu::YES));
1101 
1102     return floatFormats;
1103 }
1104 
getYCbCrFormatChannelCount(vk::VkFormat format)1105 uint32_t getYCbCrFormatChannelCount(vk::VkFormat format)
1106 {
1107     switch (format)
1108     {
1109     case vk::VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1110     case vk::VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1111     case vk::VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1112     case vk::VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1113     case vk::VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1114     case vk::VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1115     case vk::VK_FORMAT_B8G8R8A8_UNORM:
1116     case vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
1117     case vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
1118     case vk::VK_FORMAT_R16G16B16A16_UNORM:
1119     case vk::VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1120     case vk::VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1121     case vk::VK_FORMAT_R8G8B8A8_UNORM:
1122         return 4;
1123 
1124     case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1125     case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1126     case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1127     case vk::VK_FORMAT_B5G6R5_UNORM_PACK16:
1128     case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
1129     case vk::VK_FORMAT_B8G8R8_UNORM:
1130     case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1131     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1132     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1133     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT:
1134     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1135     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1136     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
1137     case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1138     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1139     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1140     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT:
1141     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1142     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1143     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
1144     case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1145     case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1146     case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1147     case vk::VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT:
1148     case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1149     case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1150     case vk::VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
1151     case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
1152     case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1153     case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1154     case vk::VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
1155     case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1156     case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
1157     case vk::VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
1158     case vk::VK_FORMAT_R16G16B16_UNORM:
1159     case vk::VK_FORMAT_R5G6B5_UNORM_PACK16:
1160     case vk::VK_FORMAT_R8G8B8_UNORM:
1161         return 3;
1162 
1163     case vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16:
1164     case vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16:
1165         return 2;
1166 
1167     case vk::VK_FORMAT_R10X6_UNORM_PACK16:
1168     case vk::VK_FORMAT_R12X4_UNORM_PACK16:
1169         return 1;
1170 
1171     default:
1172         DE_FATAL("Unknown number of channels");
1173         return -1;
1174     }
1175 }
1176 
1177 // YCbCr color conversion utilities
1178 namespace
1179 {
1180 
rangeExpandChroma(vk::VkSamplerYcbcrRange range,const tcu::FloatFormat & conversionFormat,const uint32_t bits,const tcu::Interval & sample)1181 tcu::Interval rangeExpandChroma(vk::VkSamplerYcbcrRange range, const tcu::FloatFormat &conversionFormat,
1182                                 const uint32_t bits, const tcu::Interval &sample)
1183 {
1184     const uint32_t values(0x1u << bits);
1185 
1186     switch (range)
1187     {
1188     case vk::VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
1189         return conversionFormat.roundOut(
1190             sample - conversionFormat.roundOut(
1191                          tcu::Interval((double)(0x1u << (bits - 1u)) / (double)((0x1u << bits) - 1u)), false),
1192             false);
1193 
1194     case vk::VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
1195     {
1196         const tcu::Interval a(conversionFormat.roundOut(sample * tcu::Interval((double)(values - 1u)), false));
1197         const tcu::Interval dividend(
1198             conversionFormat.roundOut(a - tcu::Interval((double)(128u * (0x1u << (bits - 8u)))), false));
1199         const tcu::Interval divisor((double)(224u * (0x1u << (bits - 8u))));
1200         const tcu::Interval result(conversionFormat.roundOut(dividend / divisor, false));
1201 
1202         return result;
1203     }
1204 
1205     default:
1206         DE_FATAL("Unknown YCbCrRange");
1207         return tcu::Interval();
1208     }
1209 }
1210 
rangeExpandLuma(vk::VkSamplerYcbcrRange range,const tcu::FloatFormat & conversionFormat,const uint32_t bits,const tcu::Interval & sample)1211 tcu::Interval rangeExpandLuma(vk::VkSamplerYcbcrRange range, const tcu::FloatFormat &conversionFormat,
1212                               const uint32_t bits, const tcu::Interval &sample)
1213 {
1214     const uint32_t values(0x1u << bits);
1215 
1216     switch (range)
1217     {
1218     case vk::VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
1219         return conversionFormat.roundOut(sample, false);
1220 
1221     case vk::VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
1222     {
1223         const tcu::Interval a(conversionFormat.roundOut(sample * tcu::Interval((double)(values - 1u)), false));
1224         const tcu::Interval dividend(
1225             conversionFormat.roundOut(a - tcu::Interval((double)(16u * (0x1u << (bits - 8u)))), false));
1226         const tcu::Interval divisor((double)(219u * (0x1u << (bits - 8u))));
1227         const tcu::Interval result(conversionFormat.roundOut(dividend / divisor, false));
1228 
1229         return result;
1230     }
1231 
1232     default:
1233         DE_FATAL("Unknown YCbCrRange");
1234         return tcu::Interval();
1235     }
1236 }
1237 
clampMaybe(const tcu::Interval & x,double min,double max)1238 tcu::Interval clampMaybe(const tcu::Interval &x, double min, double max)
1239 {
1240     tcu::Interval result = x;
1241 
1242     DE_ASSERT(min <= max);
1243 
1244     if (x.lo() < min)
1245         result = result | tcu::Interval(min);
1246 
1247     if (x.hi() > max)
1248         result = result | tcu::Interval(max);
1249 
1250     return result;
1251 }
1252 
convertColor(vk::VkSamplerYcbcrModelConversion colorModel,vk::VkSamplerYcbcrRange range,const vector<tcu::FloatFormat> & conversionFormat,const tcu::UVec4 & bitDepth,const tcu::Interval input[4],tcu::Interval output[4])1253 void convertColor(vk::VkSamplerYcbcrModelConversion colorModel, vk::VkSamplerYcbcrRange range,
1254                   const vector<tcu::FloatFormat> &conversionFormat, const tcu::UVec4 &bitDepth,
1255                   const tcu::Interval input[4], tcu::Interval output[4])
1256 {
1257     switch (colorModel)
1258     {
1259     case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
1260     {
1261         for (size_t ndx = 0; ndx < 4; ndx++)
1262             output[ndx] = input[ndx];
1263         break;
1264     }
1265 
1266     case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
1267     {
1268         output[0] = clampMaybe(rangeExpandChroma(range, conversionFormat[0], bitDepth[0], input[0]), -0.5, 0.5);
1269         output[1] = clampMaybe(rangeExpandLuma(range, conversionFormat[1], bitDepth[1], input[1]), 0.0, 1.0);
1270         output[2] = clampMaybe(rangeExpandChroma(range, conversionFormat[2], bitDepth[2], input[2]), -0.5, 0.5);
1271         output[3] = input[3];
1272         break;
1273     }
1274 
1275     case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
1276     case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
1277     case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
1278     {
1279         const tcu::Interval y(rangeExpandLuma(range, conversionFormat[1], bitDepth[1], input[1]));
1280         const tcu::Interval cr(rangeExpandChroma(range, conversionFormat[0], bitDepth[0], input[0]));
1281         const tcu::Interval cb(rangeExpandChroma(range, conversionFormat[2], bitDepth[2], input[2]));
1282 
1283         const tcu::Interval yClamped(clampMaybe(y, 0.0, 1.0));
1284         const tcu::Interval crClamped(clampMaybe(cr, -0.5, 0.5));
1285         const tcu::Interval cbClamped(clampMaybe(cb, -0.5, 0.5));
1286 
1287         if (colorModel == vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601)
1288         {
1289             output[0] =
1290                 conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.402 * crClamped, false), false);
1291             output[1] = conversionFormat[1].roundOut(
1292                 conversionFormat[1].roundOut(
1293                     yClamped - conversionFormat[1].roundOut((0.202008 / 0.587) * cbClamped, false), false) -
1294                     conversionFormat[1].roundOut((0.419198 / 0.587) * crClamped, false),
1295                 false);
1296             output[2] =
1297                 conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.772 * cbClamped, false), false);
1298         }
1299         else if (colorModel == vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709)
1300         {
1301             output[0] =
1302                 conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.5748 * crClamped, false), false);
1303             output[1] = conversionFormat[1].roundOut(
1304                 conversionFormat[1].roundOut(
1305                     yClamped - conversionFormat[1].roundOut((0.13397432 / 0.7152) * cbClamped, false), false) -
1306                     conversionFormat[1].roundOut((0.33480248 / 0.7152) * crClamped, false),
1307                 false);
1308             output[2] =
1309                 conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.8556 * cbClamped, false), false);
1310         }
1311         else
1312         {
1313             output[0] =
1314                 conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.4746 * crClamped, false), false);
1315             output[1] = conversionFormat[1].roundOut(
1316                 conversionFormat[1].roundOut(
1317                     yClamped - conversionFormat[1].roundOut(
1318                                    conversionFormat[1].roundOut(0.11156702 / 0.6780, false) * cbClamped, false),
1319                     false) -
1320                     conversionFormat[1].roundOut(conversionFormat[1].roundOut(0.38737742 / 0.6780, false) * crClamped,
1321                                                  false),
1322                 false);
1323             output[2] =
1324                 conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.8814 * cbClamped, false), false);
1325         }
1326         output[3] = input[3];
1327         break;
1328     }
1329 
1330     default:
1331         DE_FATAL("Unknown YCbCrModel");
1332     }
1333 
1334     if (colorModel != vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
1335     {
1336         for (int ndx = 0; ndx < 3; ndx++)
1337             output[ndx] = clampMaybe(output[ndx], 0.0, 1.0);
1338     }
1339 }
1340 
mirror(int coord)1341 int mirror(int coord)
1342 {
1343     return coord >= 0 ? coord : -(1 + coord);
1344 }
1345 
imod(int a,int b)1346 int imod(int a, int b)
1347 {
1348     int m = a % b;
1349     return m < 0 ? m + b : m;
1350 }
1351 
frac(const tcu::Interval & x)1352 tcu::Interval frac(const tcu::Interval &x)
1353 {
1354     if (x.hi() - x.lo() >= 1.0)
1355         return tcu::Interval(0.0, 1.0);
1356     else
1357     {
1358         const tcu::Interval ret(deFrac(x.lo()), deFrac(x.hi()));
1359 
1360         return ret;
1361     }
1362 }
1363 
calculateUV(const tcu::FloatFormat & coordFormat,const tcu::Interval & st,const int size)1364 tcu::Interval calculateUV(const tcu::FloatFormat &coordFormat, const tcu::Interval &st, const int size)
1365 {
1366     return coordFormat.roundOut(coordFormat.roundOut(st, false) * tcu::Interval((double)size), false);
1367 }
1368 
calculateNearestIJRange(const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1369 tcu::IVec2 calculateNearestIJRange(const tcu::FloatFormat &coordFormat, const tcu::Interval &uv)
1370 {
1371     const tcu::Interval ij(coordFormat.roundOut(coordFormat.roundOut(uv, false) - tcu::Interval(0.5), false));
1372 
1373     return tcu::IVec2(deRoundToInt32(ij.lo() - coordFormat.ulp(ij.lo(), 1)),
1374                       deRoundToInt32(ij.hi() + coordFormat.ulp(ij.hi(), 1)));
1375 }
1376 
1377 // Calculate range of pixel coordinates that can be used as lower coordinate for linear sampling
calculateLinearIJRange(const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1378 tcu::IVec2 calculateLinearIJRange(const tcu::FloatFormat &coordFormat, const tcu::Interval &uv)
1379 {
1380     const tcu::Interval ij(coordFormat.roundOut(uv - tcu::Interval(0.5), false));
1381 
1382     return tcu::IVec2(deFloorToInt32(ij.lo()), deFloorToInt32(ij.hi()));
1383 }
1384 
calculateIJRange(vk::VkFilter filter,const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1385 tcu::IVec2 calculateIJRange(vk::VkFilter filter, const tcu::FloatFormat &coordFormat, const tcu::Interval &uv)
1386 {
1387     DE_ASSERT(filter == vk::VK_FILTER_NEAREST || filter == vk::VK_FILTER_LINEAR);
1388     return (filter == vk::VK_FILTER_LINEAR) ? calculateLinearIJRange(coordFormat, uv) :
1389                                               calculateNearestIJRange(coordFormat, uv);
1390 }
1391 
calculateAB(const uint32_t subTexelPrecisionBits,const tcu::Interval & uv,int ij)1392 tcu::Interval calculateAB(const uint32_t subTexelPrecisionBits, const tcu::Interval &uv, int ij)
1393 {
1394     const uint32_t subdivisions = 0x1u << subTexelPrecisionBits;
1395     const tcu::Interval ab(frac((uv - 0.5) & tcu::Interval((double)ij, (double)(ij + 1))));
1396     const tcu::Interval gridAB(ab * tcu::Interval(subdivisions));
1397     const tcu::Interval rounded(de::max(deFloor(gridAB.lo()) / subdivisions, 0.0),
1398                                 de::min(deCeil(gridAB.hi()) / subdivisions, 1.0));
1399 
1400     return rounded;
1401 }
1402 
lookupWrapped(const ChannelAccess & access,const tcu::FloatFormat & conversionFormat,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const tcu::IVec2 & coord)1403 tcu::Interval lookupWrapped(const ChannelAccess &access, const tcu::FloatFormat &conversionFormat,
1404                             vk::VkSamplerAddressMode addressModeU, vk::VkSamplerAddressMode addressModeV,
1405                             const tcu::IVec2 &coord)
1406 {
1407     tcu::Interval interval =
1408         access.getChannel(conversionFormat, tcu::IVec3(wrap(addressModeU, coord.x(), access.getSize().x()),
1409                                                        wrap(addressModeV, coord.y(), access.getSize().y()), 0));
1410 
1411     // Expand range for 10-bit conversions to +/-1.0 ULP
1412     if (conversionFormat.getFractionBits() == 10)
1413     {
1414         interval |= interval.lo() - interval.length() / 2.0;
1415         interval |= interval.hi() + interval.length() / 2.0;
1416     }
1417 
1418     return interval;
1419 }
1420 
linearInterpolate(const tcu::FloatFormat & filteringFormat,const tcu::Interval & a,const tcu::Interval & b,const tcu::Interval & p00,const tcu::Interval & p10,const tcu::Interval & p01,const tcu::Interval & p11)1421 tcu::Interval linearInterpolate(const tcu::FloatFormat &filteringFormat, const tcu::Interval &a, const tcu::Interval &b,
1422                                 const tcu::Interval &p00, const tcu::Interval &p10, const tcu::Interval &p01,
1423                                 const tcu::Interval &p11)
1424 {
1425     const tcu::Interval p[4] = {p00, p10, p01, p11};
1426     tcu::Interval result(0.0);
1427 
1428     for (size_t ndx = 0; ndx < 4; ndx++)
1429     {
1430         const tcu::Interval weightA(filteringFormat.roundOut((ndx % 2) == 0 ? (1.0 - a) : a, false));
1431         const tcu::Interval weightB(filteringFormat.roundOut((ndx / 2) == 0 ? (1.0 - b) : b, false));
1432         const tcu::Interval weight(filteringFormat.roundOut(weightA * weightB, false));
1433 
1434         result = filteringFormat.roundOut(result + filteringFormat.roundOut(p[ndx] * weight, false), false);
1435     }
1436 
1437     return result;
1438 }
1439 
calculateImplicitChromaUV(const tcu::FloatFormat & coordFormat,vk::VkChromaLocation offset,const tcu::Interval & uv)1440 tcu::Interval calculateImplicitChromaUV(const tcu::FloatFormat &coordFormat, vk::VkChromaLocation offset,
1441                                         const tcu::Interval &uv)
1442 {
1443     if (offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN)
1444         return coordFormat.roundOut(0.5 * coordFormat.roundOut(uv + 0.5, false), false);
1445     else
1446         return coordFormat.roundOut(0.5 * uv, false);
1447 }
1448 
linearSample(const ChannelAccess & access,const tcu::FloatFormat & conversionFormat,const tcu::FloatFormat & filteringFormat,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const tcu::IVec2 & coord,const tcu::Interval & a,const tcu::Interval & b)1449 tcu::Interval linearSample(const ChannelAccess &access, const tcu::FloatFormat &conversionFormat,
1450                            const tcu::FloatFormat &filteringFormat, vk::VkSamplerAddressMode addressModeU,
1451                            vk::VkSamplerAddressMode addressModeV, const tcu::IVec2 &coord, const tcu::Interval &a,
1452                            const tcu::Interval &b)
1453 {
1454     return linearInterpolate(
1455         filteringFormat, a, b,
1456         lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(0, 0)),
1457         lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(1, 0)),
1458         lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(0, 1)),
1459         lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(1, 1)));
1460 }
1461 
reconstructLinearXChromaSample(const tcu::FloatFormat & filteringFormat,const tcu::FloatFormat & conversionFormat,vk::VkChromaLocation offset,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const ChannelAccess & access,int i,int j)1462 tcu::Interval reconstructLinearXChromaSample(const tcu::FloatFormat &filteringFormat,
1463                                              const tcu::FloatFormat &conversionFormat, vk::VkChromaLocation offset,
1464                                              vk::VkSamplerAddressMode addressModeU,
1465                                              vk::VkSamplerAddressMode addressModeV, const ChannelAccess &access, int i,
1466                                              int j)
1467 {
1468     const int subI = offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? divFloor(i, 2) :
1469                                                                      (i % 2 == 0 ? divFloor(i, 2) - 1 : divFloor(i, 2));
1470     const double a =
1471         offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? (i % 2 == 0 ? 0.0 : 0.5) : (i % 2 == 0 ? 0.25 : 0.75);
1472 
1473     const tcu::Interval A(filteringFormat.roundOut(
1474         a * lookupWrapped(access, conversionFormat, addressModeU, addressModeV, tcu::IVec2(subI, j)), false));
1475     const tcu::Interval B(filteringFormat.roundOut(
1476         (1.0 - a) * lookupWrapped(access, conversionFormat, addressModeU, addressModeV, tcu::IVec2(subI + 1, j)),
1477         false));
1478     return filteringFormat.roundOut(A + B, false);
1479 }
1480 
reconstructLinearXYChromaSample(const tcu::FloatFormat & filteringFormat,const tcu::FloatFormat & conversionFormat,vk::VkChromaLocation xOffset,vk::VkChromaLocation yOffset,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const ChannelAccess & access,int i,int j)1481 tcu::Interval reconstructLinearXYChromaSample(const tcu::FloatFormat &filteringFormat,
1482                                               const tcu::FloatFormat &conversionFormat, vk::VkChromaLocation xOffset,
1483                                               vk::VkChromaLocation yOffset, vk::VkSamplerAddressMode addressModeU,
1484                                               vk::VkSamplerAddressMode addressModeV, const ChannelAccess &access, int i,
1485                                               int j)
1486 {
1487     const int subI = xOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ?
1488                          divFloor(i, 2) :
1489                          (i % 2 == 0 ? divFloor(i, 2) - 1 : divFloor(i, 2));
1490     const int subJ = yOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ?
1491                          divFloor(j, 2) :
1492                          (j % 2 == 0 ? divFloor(j, 2) - 1 : divFloor(j, 2));
1493 
1494     const double a =
1495         xOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? (i % 2 == 0 ? 0.0 : 0.5) : (i % 2 == 0 ? 0.25 : 0.75);
1496     const double b =
1497         yOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? (j % 2 == 0 ? 0.0 : 0.5) : (j % 2 == 0 ? 0.25 : 0.75);
1498 
1499     return linearSample(access, conversionFormat, filteringFormat, addressModeU, addressModeV, tcu::IVec2(subI, subJ),
1500                         a, b);
1501 }
1502 
swizzle(vk::VkComponentSwizzle swizzle,const ChannelAccess & identityPlane,const ChannelAccess & rPlane,const ChannelAccess & gPlane,const ChannelAccess & bPlane,const ChannelAccess & aPlane)1503 const ChannelAccess &swizzle(vk::VkComponentSwizzle swizzle, const ChannelAccess &identityPlane,
1504                              const ChannelAccess &rPlane, const ChannelAccess &gPlane, const ChannelAccess &bPlane,
1505                              const ChannelAccess &aPlane)
1506 {
1507     switch (swizzle)
1508     {
1509     case vk::VK_COMPONENT_SWIZZLE_IDENTITY:
1510         return identityPlane;
1511     case vk::VK_COMPONENT_SWIZZLE_R:
1512         return rPlane;
1513     case vk::VK_COMPONENT_SWIZZLE_G:
1514         return gPlane;
1515     case vk::VK_COMPONENT_SWIZZLE_B:
1516         return bPlane;
1517     case vk::VK_COMPONENT_SWIZZLE_A:
1518         return aPlane;
1519 
1520     default:
1521         DE_FATAL("Unsupported swizzle");
1522         return identityPlane;
1523     }
1524 }
1525 
1526 } // namespace
1527 
wrap(vk::VkSamplerAddressMode addressMode,int coord,int size)1528 int wrap(vk::VkSamplerAddressMode addressMode, int coord, int size)
1529 {
1530     switch (addressMode)
1531     {
1532     case vk::VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
1533         return (size - 1) - mirror(imod(coord, 2 * size) - size);
1534 
1535     case vk::VK_SAMPLER_ADDRESS_MODE_REPEAT:
1536         return imod(coord, size);
1537 
1538     case vk::VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
1539         return de::clamp(coord, 0, size - 1);
1540 
1541     case vk::VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
1542         return de::clamp(mirror(coord), 0, size - 1);
1543 
1544     default:
1545         DE_FATAL("Unknown wrap mode");
1546         return ~0;
1547     }
1548 }
1549 
divFloor(int a,int b)1550 int divFloor(int a, int b)
1551 {
1552     if (a % b == 0)
1553         return a / b;
1554     else if (a > 0)
1555         return a / b;
1556     else
1557         return (a / b) - 1;
1558 }
1559 
calculateBounds(const ChannelAccess & rPlane,const ChannelAccess & gPlane,const ChannelAccess & bPlane,const ChannelAccess & aPlane,const UVec4 & bitDepth,const vector<Vec2> & sts,const vector<FloatFormat> & filteringFormat,const vector<FloatFormat> & conversionFormat,const uint32_t subTexelPrecisionBits,vk::VkFilter filter,vk::VkSamplerYcbcrModelConversion colorModel,vk::VkSamplerYcbcrRange range,vk::VkFilter chromaFilter,vk::VkChromaLocation xChromaOffset,vk::VkChromaLocation yChromaOffset,const vk::VkComponentMapping & componentMapping,bool explicitReconstruction,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,std::vector<Vec4> & minBounds,std::vector<Vec4> & maxBounds,std::vector<Vec4> & uvBounds,std::vector<IVec4> & ijBounds)1560 void calculateBounds(const ChannelAccess &rPlane, const ChannelAccess &gPlane, const ChannelAccess &bPlane,
1561                      const ChannelAccess &aPlane, const UVec4 &bitDepth, const vector<Vec2> &sts,
1562                      const vector<FloatFormat> &filteringFormat, const vector<FloatFormat> &conversionFormat,
1563                      const uint32_t subTexelPrecisionBits, vk::VkFilter filter,
1564                      vk::VkSamplerYcbcrModelConversion colorModel, vk::VkSamplerYcbcrRange range,
1565                      vk::VkFilter chromaFilter, vk::VkChromaLocation xChromaOffset, vk::VkChromaLocation yChromaOffset,
1566                      const vk::VkComponentMapping &componentMapping, bool explicitReconstruction,
1567                      vk::VkSamplerAddressMode addressModeU, vk::VkSamplerAddressMode addressModeV,
1568                      std::vector<Vec4> &minBounds, std::vector<Vec4> &maxBounds, std::vector<Vec4> &uvBounds,
1569                      std::vector<IVec4> &ijBounds)
1570 {
1571     const FloatFormat highp(-126, 127, 23, true,
1572                             tcu::MAYBE,  // subnormals
1573                             tcu::YES,    // infinities
1574                             tcu::MAYBE); // NaN
1575     const FloatFormat coordFormat(-32, 32, 16, true);
1576     const ChannelAccess &rAccess(swizzle(componentMapping.r, rPlane, rPlane, gPlane, bPlane, aPlane));
1577     const ChannelAccess &gAccess(swizzle(componentMapping.g, gPlane, rPlane, gPlane, bPlane, aPlane));
1578     const ChannelAccess &bAccess(swizzle(componentMapping.b, bPlane, rPlane, gPlane, bPlane, aPlane));
1579     const ChannelAccess &aAccess(swizzle(componentMapping.a, aPlane, rPlane, gPlane, bPlane, aPlane));
1580 
1581     const bool subsampledX = gAccess.getSize().x() > rAccess.getSize().x();
1582     const bool subsampledY = gAccess.getSize().y() > rAccess.getSize().y();
1583 
1584     minBounds.resize(sts.size(), Vec4(TCU_INFINITY));
1585     maxBounds.resize(sts.size(), Vec4(-TCU_INFINITY));
1586 
1587     uvBounds.resize(sts.size(), Vec4(TCU_INFINITY, -TCU_INFINITY, TCU_INFINITY, -TCU_INFINITY));
1588     ijBounds.resize(sts.size(), IVec4(0x7FFFFFFF, -1 - 0x7FFFFFFF, 0x7FFFFFFF, -1 - 0x7FFFFFFF));
1589 
1590     // Chroma plane sizes must match
1591     DE_ASSERT(rAccess.getSize() == bAccess.getSize());
1592 
1593     // Luma plane sizes must match
1594     DE_ASSERT(gAccess.getSize() == aAccess.getSize());
1595 
1596     // Luma plane size must match chroma plane or be twice as big
1597     DE_ASSERT(rAccess.getSize().x() == gAccess.getSize().x() || 2 * rAccess.getSize().x() == gAccess.getSize().x());
1598     DE_ASSERT(rAccess.getSize().y() == gAccess.getSize().y() || 2 * rAccess.getSize().y() == gAccess.getSize().y());
1599 
1600     DE_ASSERT(filter == vk::VK_FILTER_NEAREST || filter == vk::VK_FILTER_LINEAR);
1601     DE_ASSERT(chromaFilter == vk::VK_FILTER_NEAREST || chromaFilter == vk::VK_FILTER_LINEAR);
1602     DE_ASSERT(subsampledX || !subsampledY);
1603 
1604     for (size_t ndx = 0; ndx < sts.size(); ndx++)
1605     {
1606         const Vec2 st(sts[ndx]);
1607         Interval bounds[4];
1608 
1609         const Interval u(calculateUV(coordFormat, st[0], gAccess.getSize().x()));
1610         const Interval v(calculateUV(coordFormat, st[1], gAccess.getSize().y()));
1611 
1612         uvBounds[ndx][0] = (float)u.lo();
1613         uvBounds[ndx][1] = (float)u.hi();
1614 
1615         uvBounds[ndx][2] = (float)v.lo();
1616         uvBounds[ndx][3] = (float)v.hi();
1617 
1618         const IVec2 iRange(calculateIJRange(filter, coordFormat, u));
1619         const IVec2 jRange(calculateIJRange(filter, coordFormat, v));
1620 
1621         ijBounds[ndx][0] = iRange[0];
1622         ijBounds[ndx][1] = iRange[1];
1623 
1624         ijBounds[ndx][2] = jRange[0];
1625         ijBounds[ndx][3] = jRange[1];
1626 
1627         for (int j = jRange.x(); j <= jRange.y(); j++)
1628             for (int i = iRange.x(); i <= iRange.y(); i++)
1629             {
1630                 if (filter == vk::VK_FILTER_NEAREST)
1631                 {
1632                     const Interval gValue(
1633                         lookupWrapped(gAccess, conversionFormat[1], addressModeU, addressModeV, IVec2(i, j)));
1634                     const Interval aValue(
1635                         lookupWrapped(aAccess, conversionFormat[3], addressModeU, addressModeV, IVec2(i, j)));
1636 
1637                     if (explicitReconstruction || !(subsampledX || subsampledY))
1638                     {
1639                         Interval rValue, bValue;
1640                         if (chromaFilter == vk::VK_FILTER_NEAREST || !subsampledX)
1641                         {
1642                             // Reconstruct using nearest if needed, otherwise, just take what's already there.
1643                             const int subI = subsampledX ? i / 2 : i;
1644                             const int subJ = subsampledY ? j / 2 : j;
1645                             rValue         = lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1646                                                            IVec2(subI, subJ));
1647                             bValue         = lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1648                                                            IVec2(subI, subJ));
1649                         }
1650                         else // vk::VK_FILTER_LINEAR
1651                         {
1652                             if (subsampledY)
1653                             {
1654                                 rValue = reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1655                                                                          xChromaOffset, yChromaOffset, addressModeU,
1656                                                                          addressModeV, rAccess, i, j);
1657                                 bValue = reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1658                                                                          xChromaOffset, yChromaOffset, addressModeU,
1659                                                                          addressModeV, bAccess, i, j);
1660                             }
1661                             else
1662                             {
1663                                 rValue = reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1664                                                                         xChromaOffset, addressModeU, addressModeV,
1665                                                                         rAccess, i, j);
1666                                 bValue = reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1667                                                                         xChromaOffset, addressModeU, addressModeV,
1668                                                                         bAccess, i, j);
1669                             }
1670                         }
1671 
1672                         const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1673                         Interval dstColor[4];
1674 
1675                         convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1676 
1677                         for (size_t compNdx = 0; compNdx < 4; compNdx++)
1678                             bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1679                     }
1680                     else
1681                     {
1682                         const Interval chromaU(subsampledX ? calculateImplicitChromaUV(coordFormat, xChromaOffset, u) :
1683                                                              u);
1684                         const Interval chromaV(subsampledY ? calculateImplicitChromaUV(coordFormat, yChromaOffset, v) :
1685                                                              v);
1686 
1687                         // Reconstructed chroma samples with implicit filtering
1688                         const IVec2 chromaIRange(subsampledX ? calculateIJRange(chromaFilter, coordFormat, chromaU) :
1689                                                                IVec2(i, i));
1690                         const IVec2 chromaJRange(subsampledY ? calculateIJRange(chromaFilter, coordFormat, chromaV) :
1691                                                                IVec2(j, j));
1692 
1693                         for (int chromaJ = chromaJRange.x(); chromaJ <= chromaJRange.y(); chromaJ++)
1694                             for (int chromaI = chromaIRange.x(); chromaI <= chromaIRange.y(); chromaI++)
1695                             {
1696                                 Interval rValue, bValue;
1697 
1698                                 if (chromaFilter == vk::VK_FILTER_NEAREST)
1699                                 {
1700                                     rValue = lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1701                                                            IVec2(chromaI, chromaJ));
1702                                     bValue = lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1703                                                            IVec2(chromaI, chromaJ));
1704                                 }
1705                                 else // vk::VK_FILTER_LINEAR
1706                                 {
1707                                     const Interval chromaA(calculateAB(subTexelPrecisionBits, chromaU, chromaI));
1708                                     const Interval chromaB(calculateAB(subTexelPrecisionBits, chromaV, chromaJ));
1709 
1710                                     rValue =
1711                                         linearSample(rAccess, conversionFormat[0], filteringFormat[0], addressModeU,
1712                                                      addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1713                                     bValue =
1714                                         linearSample(bAccess, conversionFormat[2], filteringFormat[2], addressModeU,
1715                                                      addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1716                                 }
1717 
1718                                 const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1719 
1720                                 Interval dstColor[4];
1721                                 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1722 
1723                                 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1724                                     bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1725                             }
1726                     }
1727                 }
1728                 else // filter == vk::VK_FILTER_LINEAR
1729                 {
1730                     const Interval lumaA(calculateAB(subTexelPrecisionBits, u, i));
1731                     const Interval lumaB(calculateAB(subTexelPrecisionBits, v, j));
1732 
1733                     const Interval gValue(linearSample(gAccess, conversionFormat[1], filteringFormat[1], addressModeU,
1734                                                        addressModeV, IVec2(i, j), lumaA, lumaB));
1735                     const Interval aValue(linearSample(aAccess, conversionFormat[3], filteringFormat[3], addressModeU,
1736                                                        addressModeV, IVec2(i, j), lumaA, lumaB));
1737 
1738                     if (explicitReconstruction || !(subsampledX || subsampledY))
1739                     {
1740                         Interval rValue, bValue;
1741                         if (chromaFilter == vk::VK_FILTER_NEAREST || !subsampledX)
1742                         {
1743                             rValue = linearInterpolate(
1744                                 filteringFormat[0], lumaA, lumaB,
1745                                 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1746                                               IVec2(i / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1747                                 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1748                                               IVec2((i + 1) / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1749                                 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1750                                               IVec2(i / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))),
1751                                 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1752                                               IVec2((i + 1) / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))));
1753                             bValue = linearInterpolate(
1754                                 filteringFormat[2], lumaA, lumaB,
1755                                 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1756                                               IVec2(i / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1757                                 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1758                                               IVec2((i + 1) / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1759                                 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1760                                               IVec2(i / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))),
1761                                 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1762                                               IVec2((i + 1) / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))));
1763                         }
1764                         else // vk::VK_FILTER_LINEAR
1765                         {
1766                             if (subsampledY)
1767                             {
1768                                 // Linear, Reconstructed xx chroma samples with explicit linear filtering
1769                                 rValue = linearInterpolate(
1770                                     filteringFormat[0], lumaA, lumaB,
1771                                     reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1772                                                                     xChromaOffset, yChromaOffset, addressModeU,
1773                                                                     addressModeV, rAccess, i, j),
1774                                     reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1775                                                                     xChromaOffset, yChromaOffset, addressModeU,
1776                                                                     addressModeV, rAccess, i + 1, j),
1777                                     reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1778                                                                     xChromaOffset, yChromaOffset, addressModeU,
1779                                                                     addressModeV, rAccess, i, j + 1),
1780                                     reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1781                                                                     xChromaOffset, yChromaOffset, addressModeU,
1782                                                                     addressModeV, rAccess, i + 1, j + 1));
1783                                 bValue = linearInterpolate(
1784                                     filteringFormat[2], lumaA, lumaB,
1785                                     reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1786                                                                     xChromaOffset, yChromaOffset, addressModeU,
1787                                                                     addressModeV, bAccess, i, j),
1788                                     reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1789                                                                     xChromaOffset, yChromaOffset, addressModeU,
1790                                                                     addressModeV, bAccess, i + 1, j),
1791                                     reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1792                                                                     xChromaOffset, yChromaOffset, addressModeU,
1793                                                                     addressModeV, bAccess, i, j + 1),
1794                                     reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1795                                                                     xChromaOffset, yChromaOffset, addressModeU,
1796                                                                     addressModeV, bAccess, i + 1, j + 1));
1797                             }
1798                             else
1799                             {
1800                                 // Linear, Reconstructed x chroma samples with explicit linear filtering
1801                                 rValue = linearInterpolate(
1802                                     filteringFormat[0], lumaA, lumaB,
1803                                     reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1804                                                                    xChromaOffset, addressModeU, addressModeV, rAccess,
1805                                                                    i, j),
1806                                     reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1807                                                                    xChromaOffset, addressModeU, addressModeV, rAccess,
1808                                                                    i + 1, j),
1809                                     reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1810                                                                    xChromaOffset, addressModeU, addressModeV, rAccess,
1811                                                                    i, j + 1),
1812                                     reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1813                                                                    xChromaOffset, addressModeU, addressModeV, rAccess,
1814                                                                    i + 1, j + 1));
1815                                 bValue = linearInterpolate(
1816                                     filteringFormat[2], lumaA, lumaB,
1817                                     reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1818                                                                    xChromaOffset, addressModeU, addressModeV, bAccess,
1819                                                                    i, j),
1820                                     reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1821                                                                    xChromaOffset, addressModeU, addressModeV, bAccess,
1822                                                                    i + 1, j),
1823                                     reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1824                                                                    xChromaOffset, addressModeU, addressModeV, bAccess,
1825                                                                    i, j + 1),
1826                                     reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1827                                                                    xChromaOffset, addressModeU, addressModeV, bAccess,
1828                                                                    i + 1, j + 1));
1829                             }
1830                         }
1831 
1832                         const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1833                         Interval dstColor[4];
1834 
1835                         convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1836 
1837                         for (size_t compNdx = 0; compNdx < 4; compNdx++)
1838                             bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1839                     }
1840                     else
1841                     {
1842                         const Interval chromaU(subsampledX ? calculateImplicitChromaUV(coordFormat, xChromaOffset, u) :
1843                                                              u);
1844                         const Interval chromaV(subsampledY ? calculateImplicitChromaUV(coordFormat, yChromaOffset, v) :
1845                                                              v);
1846 
1847                         // TODO: It looks incorrect to ignore the chroma filter here. Is it?
1848                         const IVec2 chromaIRange(calculateNearestIJRange(coordFormat, chromaU));
1849                         const IVec2 chromaJRange(calculateNearestIJRange(coordFormat, chromaV));
1850 
1851                         for (int chromaJ = chromaJRange.x(); chromaJ <= chromaJRange.y(); chromaJ++)
1852                             for (int chromaI = chromaIRange.x(); chromaI <= chromaIRange.y(); chromaI++)
1853                             {
1854                                 Interval rValue, bValue;
1855 
1856                                 if (chromaFilter == vk::VK_FILTER_NEAREST)
1857                                 {
1858                                     rValue = lookupWrapped(rAccess, conversionFormat[1], addressModeU, addressModeV,
1859                                                            IVec2(chromaI, chromaJ));
1860                                     bValue = lookupWrapped(bAccess, conversionFormat[3], addressModeU, addressModeV,
1861                                                            IVec2(chromaI, chromaJ));
1862                                 }
1863                                 else // vk::VK_FILTER_LINEAR
1864                                 {
1865                                     const Interval chromaA(calculateAB(subTexelPrecisionBits, chromaU, chromaI));
1866                                     const Interval chromaB(calculateAB(subTexelPrecisionBits, chromaV, chromaJ));
1867 
1868                                     rValue =
1869                                         linearSample(rAccess, conversionFormat[0], filteringFormat[0], addressModeU,
1870                                                      addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1871                                     bValue =
1872                                         linearSample(bAccess, conversionFormat[2], filteringFormat[2], addressModeU,
1873                                                      addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1874                                 }
1875 
1876                                 const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1877                                 Interval dstColor[4];
1878                                 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1879 
1880                                 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1881                                     bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1882                             }
1883                     }
1884                 }
1885             }
1886 
1887         minBounds[ndx] =
1888             Vec4((float)bounds[0].lo(), (float)bounds[1].lo(), (float)bounds[2].lo(), (float)bounds[3].lo());
1889         maxBounds[ndx] =
1890             Vec4((float)bounds[0].hi(), (float)bounds[1].hi(), (float)bounds[2].hi(), (float)bounds[3].hi());
1891     }
1892 }
1893 
1894 } // namespace ycbcr
1895 
1896 } // namespace vkt
1897