1 /*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 Google Inc.
6 * Copyright (c) 2019 The Khronos Group Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief YCbCr Test Utilities
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktYCbCrUtil.hpp"
26
27 #include "vkQueryUtil.hpp"
28 #include "vkRefUtil.hpp"
29 #include "vkTypeUtil.hpp"
30 #include "vkCmdUtil.hpp"
31
32 #include "tcuTextureUtil.hpp"
33 #include "deMath.h"
34 #include "tcuFloat.hpp"
35 #include "tcuVector.hpp"
36 #include "tcuVectorUtil.hpp"
37
38 #include "deSTLUtil.hpp"
39 #include "deUniquePtr.hpp"
40
41 #include <limits>
42
43 namespace vkt
44 {
45 namespace ycbcr
46 {
47
48 using namespace vk;
49
50 using de::MovePtr;
51 using std::string;
52 using std::vector;
53 using tcu::FloatFormat;
54 using tcu::Interval;
55 using tcu::IVec2;
56 using tcu::IVec4;
57 using tcu::UVec2;
58 using tcu::UVec4;
59 using tcu::Vec2;
60 using tcu::Vec4;
61
62 // MultiPlaneImageData
63
MultiPlaneImageData(VkFormat format,const UVec2 & size)64 MultiPlaneImageData::MultiPlaneImageData(VkFormat format, const UVec2 &size)
65 : m_format(format)
66 , m_description(getPlanarFormatDescription(format))
67 , m_size(size)
68 {
69 for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
70 m_planeData[planeNdx].resize(
71 getPlaneSizeInBytes(m_description, size, planeNdx, 0, BUFFER_IMAGE_COPY_OFFSET_GRANULARITY));
72 }
73
MultiPlaneImageData(const MultiPlaneImageData & other)74 MultiPlaneImageData::MultiPlaneImageData(const MultiPlaneImageData &other)
75 : m_format(other.m_format)
76 , m_description(other.m_description)
77 , m_size(other.m_size)
78 {
79 for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
80 m_planeData[planeNdx] = other.m_planeData[planeNdx];
81 }
82
~MultiPlaneImageData(void)83 MultiPlaneImageData::~MultiPlaneImageData(void)
84 {
85 }
86
getChannelAccess(uint32_t channelNdx)87 tcu::PixelBufferAccess MultiPlaneImageData::getChannelAccess(uint32_t channelNdx)
88 {
89 void *planePtrs[PlanarFormatDescription::MAX_PLANES];
90 uint32_t planeRowPitches[PlanarFormatDescription::MAX_PLANES];
91
92 for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
93 {
94 const uint32_t planeW = m_size.x() / (m_description.blockWidth * m_description.planes[planeNdx].widthDivisor);
95 planeRowPitches[planeNdx] = m_description.planes[planeNdx].elementSizeBytes * planeW;
96 planePtrs[planeNdx] = &m_planeData[planeNdx][0];
97 }
98
99 return vk::getChannelAccess(m_description, m_size, planeRowPitches, planePtrs, channelNdx);
100 }
101
getChannelAccess(uint32_t channelNdx) const102 tcu::ConstPixelBufferAccess MultiPlaneImageData::getChannelAccess(uint32_t channelNdx) const
103 {
104 const void *planePtrs[PlanarFormatDescription::MAX_PLANES];
105 uint32_t planeRowPitches[PlanarFormatDescription::MAX_PLANES];
106
107 for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
108 {
109 const uint32_t planeW = m_size.x() / (m_description.blockWidth * m_description.planes[planeNdx].widthDivisor);
110 planeRowPitches[planeNdx] = m_description.planes[planeNdx].elementSizeBytes * planeW;
111 planePtrs[planeNdx] = &m_planeData[planeNdx][0];
112 }
113
114 return vk::getChannelAccess(m_description, m_size, planeRowPitches, planePtrs, channelNdx);
115 }
116
117 // Misc utilities
118
119 namespace
120 {
121
allocateStagingBuffers(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,const MultiPlaneImageData & imageData,vector<VkBufferSp> * buffers,vector<AllocationSp> * allocations)122 void allocateStagingBuffers(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
123 const MultiPlaneImageData &imageData, vector<VkBufferSp> *buffers,
124 vector<AllocationSp> *allocations)
125 {
126 for (uint32_t planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
127 {
128 const VkBufferCreateInfo bufferInfo = {
129 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
130 DE_NULL,
131 (VkBufferCreateFlags)0u,
132 (VkDeviceSize)imageData.getPlaneSize(planeNdx),
133 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
134 VK_SHARING_MODE_EXCLUSIVE,
135 0u,
136 (const uint32_t *)DE_NULL,
137 };
138 Move<VkBuffer> buffer(createBuffer(vkd, device, &bufferInfo));
139 MovePtr<Allocation> allocation(allocator.allocate(getBufferMemoryRequirements(vkd, device, *buffer),
140 MemoryRequirement::HostVisible | MemoryRequirement::Any));
141
142 VK_CHECK(vkd.bindBufferMemory(device, *buffer, allocation->getMemory(), allocation->getOffset()));
143
144 buffers->push_back(VkBufferSp(new Unique<VkBuffer>(buffer)));
145 allocations->push_back(AllocationSp(allocation.release()));
146 }
147 }
148
allocateAndWriteStagingBuffers(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,const MultiPlaneImageData & imageData,vector<VkBufferSp> * buffers,vector<AllocationSp> * allocations)149 void allocateAndWriteStagingBuffers(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
150 const MultiPlaneImageData &imageData, vector<VkBufferSp> *buffers,
151 vector<AllocationSp> *allocations)
152 {
153 allocateStagingBuffers(vkd, device, allocator, imageData, buffers, allocations);
154
155 for (uint32_t planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
156 {
157 deMemcpy((*allocations)[planeNdx]->getHostPtr(), imageData.getPlanePtr(planeNdx),
158 imageData.getPlaneSize(planeNdx));
159 flushMappedMemoryRange(vkd, device, (*allocations)[planeNdx]->getMemory(), 0u, VK_WHOLE_SIZE);
160 }
161 }
162
readStagingBuffers(MultiPlaneImageData * imageData,const DeviceInterface & vkd,VkDevice device,const vector<AllocationSp> & allocations)163 void readStagingBuffers(MultiPlaneImageData *imageData, const DeviceInterface &vkd, VkDevice device,
164 const vector<AllocationSp> &allocations)
165 {
166 for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
167 {
168 invalidateMappedMemoryRange(vkd, device, allocations[planeNdx]->getMemory(), 0u, VK_WHOLE_SIZE);
169 deMemcpy(imageData->getPlanePtr(planeNdx), allocations[planeNdx]->getHostPtr(),
170 imageData->getPlaneSize(planeNdx));
171 }
172 }
173
174 } // namespace
175
checkImageSupport(Context & context,VkFormat format,VkImageCreateFlags createFlags,VkImageTiling tiling)176 void checkImageSupport(Context &context, VkFormat format, VkImageCreateFlags createFlags, VkImageTiling tiling)
177 {
178 const bool disjoint = (createFlags & VK_IMAGE_CREATE_DISJOINT_BIT) != 0;
179 const VkPhysicalDeviceSamplerYcbcrConversionFeatures features = context.getSamplerYcbcrConversionFeatures();
180
181 if (features.samplerYcbcrConversion == VK_FALSE)
182 TCU_THROW(NotSupportedError, "samplerYcbcrConversion is not supported");
183
184 if (disjoint)
185 {
186 context.requireDeviceFunctionality("VK_KHR_bind_memory2");
187 context.requireDeviceFunctionality("VK_KHR_get_memory_requirements2");
188 }
189
190 {
191 const VkFormatProperties formatProperties =
192 getPhysicalDeviceFormatProperties(context.getInstanceInterface(), context.getPhysicalDevice(), format);
193 const VkFormatFeatureFlags featureFlags = tiling == VK_IMAGE_TILING_OPTIMAL ?
194 formatProperties.optimalTilingFeatures :
195 formatProperties.linearTilingFeatures;
196
197 if ((featureFlags &
198 (VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT | VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT)) == 0)
199 TCU_THROW(NotSupportedError, "YCbCr conversion is not supported for format");
200
201 if (disjoint && ((featureFlags & VK_FORMAT_FEATURE_DISJOINT_BIT) == 0))
202 TCU_THROW(NotSupportedError, "Disjoint planes are not supported for format");
203 }
204 }
205
fillRandomNoNaN(de::Random * randomGen,uint8_t * const data,uint32_t size,const vk::VkFormat format)206 void fillRandomNoNaN(de::Random *randomGen, uint8_t *const data, uint32_t size, const vk::VkFormat format)
207 {
208 bool isFloat = false;
209 uint32_t stride = 1;
210
211 switch (format)
212 {
213 case vk::VK_FORMAT_B10G11R11_UFLOAT_PACK32:
214 isFloat = true;
215 stride = 1;
216 break;
217 case vk::VK_FORMAT_R16_SFLOAT:
218 case vk::VK_FORMAT_R16G16_SFLOAT:
219 case vk::VK_FORMAT_R16G16B16_SFLOAT:
220 case vk::VK_FORMAT_R16G16B16A16_SFLOAT:
221 isFloat = true;
222 stride = 2;
223 break;
224 case vk::VK_FORMAT_R32_SFLOAT:
225 case vk::VK_FORMAT_R32G32_SFLOAT:
226 case vk::VK_FORMAT_R32G32B32_SFLOAT:
227 case vk::VK_FORMAT_R32G32B32A32_SFLOAT:
228 isFloat = true;
229 stride = 4;
230 break;
231 case vk::VK_FORMAT_R64_SFLOAT:
232 case vk::VK_FORMAT_R64G64_SFLOAT:
233 case vk::VK_FORMAT_R64G64B64_SFLOAT:
234 case vk::VK_FORMAT_R64G64B64A64_SFLOAT:
235 isFloat = true;
236 stride = 8;
237 break;
238 default:
239 stride = 1;
240 break;
241 }
242
243 if (isFloat)
244 {
245 uint32_t ndx = 0;
246 for (; ndx < size - stride + 1; ndx += stride)
247 {
248 if (stride == 1)
249 {
250 // Set first bit of each channel to 0 to avoid NaNs, only format is B10G11R11
251 const uint8_t mask[] = {0x7F, 0xDF, 0xFB, 0xFF};
252 // Apply mask for both endians
253 data[ndx] = (randomGen->getUint8() & mask[ndx % 4]) & mask[3 - ndx % 4];
254 }
255 else if (stride == 2)
256 {
257 tcu::float16_t *const ptr = reinterpret_cast<tcu::float16_t *>(&data[ndx]);
258 *ptr = tcu::Float16(randomGen->getFloat()).bits();
259 }
260 else if (stride == 4)
261 {
262 float *ptr = reinterpret_cast<float *>(&data[ndx]);
263 *ptr = randomGen->getFloat();
264 }
265 else if (stride == 8)
266 {
267 double *ptr = reinterpret_cast<double *>(&data[ndx]);
268 *ptr = randomGen->getDouble();
269 }
270 }
271 while (ndx < size)
272 {
273 data[ndx] = 0;
274 }
275 }
276 else
277 {
278 for (uint32_t ndx = 0; ndx < size; ++ndx)
279 {
280 data[ndx] = randomGen->getUint8();
281 }
282 }
283 }
284
285 // When noNan is true, fillRandom does not generate NaNs in float formats.
fillRandom(de::Random * randomGen,MultiPlaneImageData * imageData,const vk::VkFormat format,const bool noNan)286 void fillRandom(de::Random *randomGen, MultiPlaneImageData *imageData, const vk::VkFormat format, const bool noNan)
287 {
288 for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
289 {
290 const size_t planeSize = imageData->getPlaneSize(planeNdx);
291 uint8_t *const planePtr = (uint8_t *)imageData->getPlanePtr(planeNdx);
292
293 if (noNan)
294 {
295 fillRandomNoNaN(randomGen, planePtr, (uint32_t)planeSize, format);
296 }
297 else
298 {
299 for (size_t ndx = 0; ndx < planeSize; ++ndx)
300 {
301 planePtr[ndx] = randomGen->getUint8();
302 }
303 }
304 }
305 }
306
fillGradient(MultiPlaneImageData * imageData,const tcu::Vec4 & minVal,const tcu::Vec4 & maxVal)307 void fillGradient(MultiPlaneImageData *imageData, const tcu::Vec4 &minVal, const tcu::Vec4 &maxVal)
308 {
309 const PlanarFormatDescription &formatInfo = imageData->getDescription();
310
311 // \todo [pyry] Optimize: no point in re-rendering source gradient for each channel.
312
313 for (uint32_t channelNdx = 0; channelNdx < 4; channelNdx++)
314 {
315 if (formatInfo.hasChannelNdx(channelNdx))
316 {
317 const tcu::PixelBufferAccess channelAccess = imageData->getChannelAccess(channelNdx);
318 tcu::TextureLevel tmpTexture(tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::FLOAT),
319 channelAccess.getWidth(), channelAccess.getHeight());
320 const tcu::ConstPixelBufferAccess tmpAccess = tmpTexture.getAccess();
321
322 tcu::fillWithComponentGradients(tmpTexture, minVal, maxVal);
323
324 for (int y = 0; y < channelAccess.getHeight(); ++y)
325 for (int x = 0; x < channelAccess.getWidth(); ++x)
326 {
327 channelAccess.setPixel(tcu::Vec4(tmpAccess.getPixel(x, y)[channelNdx]), x, y);
328 }
329 }
330 }
331 }
332
fillZero(MultiPlaneImageData * imageData)333 void fillZero(MultiPlaneImageData *imageData)
334 {
335 for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
336 deMemset(imageData->getPlanePtr(planeNdx), 0, imageData->getPlaneSize(planeNdx));
337 }
338
allocateAndBindImageMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkImage image,VkFormat format,VkImageCreateFlags createFlags,vk::MemoryRequirement requirement)339 vector<AllocationSp> allocateAndBindImageMemory(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
340 VkImage image, VkFormat format, VkImageCreateFlags createFlags,
341 vk::MemoryRequirement requirement)
342 {
343 vector<AllocationSp> allocations;
344
345 if ((createFlags & VK_IMAGE_CREATE_DISJOINT_BIT) != 0)
346 {
347 const uint32_t numPlanes = getPlaneCount(format);
348
349 bindImagePlanesMemory(vkd, device, image, numPlanes, allocations, allocator, requirement);
350 }
351 else
352 {
353 const VkMemoryRequirements reqs = getImageMemoryRequirements(vkd, device, image);
354
355 allocations.push_back(AllocationSp(allocator.allocate(reqs, requirement).release()));
356
357 VK_CHECK(vkd.bindImageMemory(device, image, allocations.back()->getMemory(), allocations.back()->getOffset()));
358 }
359
360 return allocations;
361 }
362 // Accept only NV12
uploadImage(const DeviceInterface & vkd,VkDevice device,uint32_t queueFamilyNdx,Allocator & allocator,VkImage image,const MultiPlaneImageData & imageData,VkAccessFlags nextAccess,VkImageLayout finalLayout,uint32_t arrayLayer)363 void uploadImage(const DeviceInterface &vkd, VkDevice device, uint32_t queueFamilyNdx, Allocator &allocator,
364 VkImage image, const MultiPlaneImageData &imageData, VkAccessFlags nextAccess,
365 VkImageLayout finalLayout, uint32_t arrayLayer)
366 {
367 const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
368 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
369 const Unique<VkCommandBuffer> cmdBuffer(
370 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
371 vector<VkBufferSp> stagingBuffers;
372 vector<AllocationSp> stagingMemory;
373
374 const PlanarFormatDescription &formatDesc = imageData.getDescription();
375
376 allocateAndWriteStagingBuffers(vkd, device, allocator, imageData, &stagingBuffers, &stagingMemory);
377
378 beginCommandBuffer(vkd, *cmdBuffer);
379
380 for (uint32_t planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
381 {
382 const VkImageAspectFlagBits aspect =
383 (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
384 const VkExtent3D imageExtent = makeExtent3D(imageData.getSize().x(), imageData.getSize().y(), 1u);
385 const VkExtent3D planeExtent = getPlaneExtent(formatDesc, imageExtent, planeNdx, 0);
386 const VkBufferImageCopy copy = {0u, // bufferOffset
387 0u, // bufferRowLength
388 0u, // bufferImageHeight
389 {(VkImageAspectFlags)aspect, 0u, arrayLayer, 1u},
390 makeOffset3D(0u, 0u, 0u),
391 planeExtent};
392
393 {
394 const VkImageMemoryBarrier preCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
395 DE_NULL,
396 (VkAccessFlags)0,
397 VK_ACCESS_TRANSFER_WRITE_BIT,
398 VK_IMAGE_LAYOUT_UNDEFINED,
399 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
400 VK_QUEUE_FAMILY_IGNORED,
401 VK_QUEUE_FAMILY_IGNORED,
402 image,
403 {(VkImageAspectFlags)aspect, 0u, 1u, arrayLayer, 1u}};
404
405 vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
406 (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0u, 0u,
407 (const VkMemoryBarrier *)DE_NULL, 0u, (const VkBufferMemoryBarrier *)DE_NULL, 1u,
408 &preCopyBarrier);
409 }
410
411 vkd.cmdCopyBufferToImage(*cmdBuffer, **stagingBuffers[planeNdx], image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
412 1u, ©);
413
414 {
415 const VkImageMemoryBarrier postCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
416 DE_NULL,
417 VK_ACCESS_TRANSFER_WRITE_BIT,
418 nextAccess,
419 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
420 finalLayout,
421 VK_QUEUE_FAMILY_IGNORED,
422 VK_QUEUE_FAMILY_IGNORED,
423 image,
424 {(VkImageAspectFlags)aspect, 0u, 1u, arrayLayer, 1u}};
425
426 vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
427 (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, (VkDependencyFlags)0u, 0u,
428 (const VkMemoryBarrier *)DE_NULL, 0u, (const VkBufferMemoryBarrier *)DE_NULL, 1u,
429 &postCopyBarrier);
430 }
431 }
432
433 endCommandBuffer(vkd, *cmdBuffer);
434
435 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
436 }
437
fillImageMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,uint32_t queueFamilyNdx,vk::VkImage image,const std::vector<de::SharedPtr<vk::Allocation>> & allocations,const MultiPlaneImageData & imageData,vk::VkAccessFlags nextAccess,vk::VkImageLayout finalLayout,uint32_t arrayLayer)438 void fillImageMemory(const vk::DeviceInterface &vkd, vk::VkDevice device, uint32_t queueFamilyNdx, vk::VkImage image,
439 const std::vector<de::SharedPtr<vk::Allocation>> &allocations,
440 const MultiPlaneImageData &imageData, vk::VkAccessFlags nextAccess, vk::VkImageLayout finalLayout,
441 uint32_t arrayLayer)
442 {
443 const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
444 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
445 const Unique<VkCommandBuffer> cmdBuffer(
446 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
447 const PlanarFormatDescription &formatDesc = imageData.getDescription();
448
449 for (uint32_t planeNdx = 0; planeNdx < formatDesc.numPlanes; ++planeNdx)
450 {
451 const VkImageAspectFlagBits aspect =
452 (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
453 const de::SharedPtr<Allocation> &allocation = allocations.size() > 1 ? allocations[planeNdx] : allocations[0];
454 const size_t planeSize = imageData.getPlaneSize(planeNdx);
455 const uint32_t planeH = imageData.getSize().y() / formatDesc.planes[planeNdx].heightDivisor;
456 const VkImageSubresource subresource = {
457 static_cast<vk::VkImageAspectFlags>(aspect),
458 0u,
459 arrayLayer,
460 };
461 VkSubresourceLayout layout;
462
463 vkd.getImageSubresourceLayout(device, image, &subresource, &layout);
464
465 for (uint32_t row = 0; row < planeH; ++row)
466 {
467 const size_t rowSize = planeSize / planeH;
468 void *const dstPtr = ((uint8_t *)allocation->getHostPtr()) + layout.offset + layout.rowPitch * row;
469 const void *const srcPtr = ((const uint8_t *)imageData.getPlanePtr(planeNdx)) + row * rowSize;
470
471 deMemcpy(dstPtr, srcPtr, rowSize);
472 }
473 flushMappedMemoryRange(vkd, device, allocation->getMemory(), 0u, VK_WHOLE_SIZE);
474 }
475
476 beginCommandBuffer(vkd, *cmdBuffer);
477
478 {
479 const VkImageMemoryBarrier postCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
480 DE_NULL,
481 0u,
482 nextAccess,
483 VK_IMAGE_LAYOUT_PREINITIALIZED,
484 finalLayout,
485 VK_QUEUE_FAMILY_IGNORED,
486 VK_QUEUE_FAMILY_IGNORED,
487 image,
488 {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, arrayLayer, 1u}};
489
490 vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
491 (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, (VkDependencyFlags)0u, 0u,
492 (const VkMemoryBarrier *)DE_NULL, 0u, (const VkBufferMemoryBarrier *)DE_NULL, 1u,
493 &postCopyBarrier);
494 }
495
496 endCommandBuffer(vkd, *cmdBuffer);
497
498 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
499 }
500
downloadImage(const DeviceInterface & vkd,VkDevice device,uint32_t queueFamilyNdx,Allocator & allocator,VkImage image,MultiPlaneImageData * imageData,VkAccessFlags prevAccess,VkImageLayout initialLayout,uint32_t baseArrayLayer)501 void downloadImage(const DeviceInterface &vkd, VkDevice device, uint32_t queueFamilyNdx, Allocator &allocator,
502 VkImage image, MultiPlaneImageData *imageData, VkAccessFlags prevAccess, VkImageLayout initialLayout,
503 uint32_t baseArrayLayer)
504 {
505 const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
506 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
507 const Unique<VkCommandBuffer> cmdBuffer(
508 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
509 vector<VkBufferSp> stagingBuffers;
510 vector<AllocationSp> stagingMemory;
511
512 const PlanarFormatDescription &formatDesc = imageData->getDescription();
513
514 allocateStagingBuffers(vkd, device, allocator, *imageData, &stagingBuffers, &stagingMemory);
515
516 beginCommandBuffer(vkd, *cmdBuffer);
517
518 for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
519 {
520 const VkImageAspectFlagBits aspect =
521 (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
522 {
523 const VkImageMemoryBarrier preCopyBarrier = {
524 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
525 DE_NULL,
526 prevAccess,
527 VK_ACCESS_TRANSFER_READ_BIT,
528 initialLayout,
529 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
530 VK_QUEUE_FAMILY_IGNORED,
531 VK_QUEUE_FAMILY_IGNORED,
532 image,
533 {static_cast<vk::VkImageAspectFlags>(aspect), 0u, 1u, baseArrayLayer, 1u}};
534
535 vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
536 (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0u, 0u,
537 (const VkMemoryBarrier *)DE_NULL, 0u, (const VkBufferMemoryBarrier *)DE_NULL, 1u,
538 &preCopyBarrier);
539 }
540 {
541 const VkExtent3D imageExtent = makeExtent3D(imageData->getSize().x(), imageData->getSize().y(), 1u);
542 const VkExtent3D planeExtent = getPlaneExtent(formatDesc, imageExtent, planeNdx, 0);
543 const VkBufferImageCopy copy = {0u, // bufferOffset
544 0u, // bufferRowLength
545 0u, // bufferImageHeight
546 {(VkImageAspectFlags)aspect, 0u, baseArrayLayer, 1u},
547 makeOffset3D(0u, 0u, 0u),
548 planeExtent};
549
550 vkd.cmdCopyImageToBuffer(*cmdBuffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
551 **stagingBuffers[planeNdx], 1u, ©);
552 }
553 {
554 const VkBufferMemoryBarrier postCopyBarrier = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
555 DE_NULL,
556 VK_ACCESS_TRANSFER_WRITE_BIT,
557 VK_ACCESS_HOST_READ_BIT,
558 VK_QUEUE_FAMILY_IGNORED,
559 VK_QUEUE_FAMILY_IGNORED,
560 **stagingBuffers[planeNdx],
561 0u,
562 VK_WHOLE_SIZE};
563
564 vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
565 (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0u, 0u,
566 (const VkMemoryBarrier *)DE_NULL, 1u, &postCopyBarrier, 0u,
567 (const VkImageMemoryBarrier *)DE_NULL);
568 }
569 }
570
571 endCommandBuffer(vkd, *cmdBuffer);
572
573 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
574
575 readStagingBuffers(imageData, vkd, device, stagingMemory);
576 }
577
readImageMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,uint32_t queueFamilyNdx,vk::VkImage image,const std::vector<de::SharedPtr<vk::Allocation>> & allocations,MultiPlaneImageData * imageData,vk::VkAccessFlags prevAccess,vk::VkImageLayout initialLayout)578 void readImageMemory(const vk::DeviceInterface &vkd, vk::VkDevice device, uint32_t queueFamilyNdx, vk::VkImage image,
579 const std::vector<de::SharedPtr<vk::Allocation>> &allocations, MultiPlaneImageData *imageData,
580 vk::VkAccessFlags prevAccess, vk::VkImageLayout initialLayout)
581 {
582 const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
583 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
584 const Unique<VkCommandBuffer> cmdBuffer(
585 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
586 const PlanarFormatDescription &formatDesc = imageData->getDescription();
587
588 beginCommandBuffer(vkd, *cmdBuffer);
589
590 {
591 const VkImageMemoryBarrier preCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
592 DE_NULL,
593 prevAccess,
594 vk::VK_ACCESS_HOST_READ_BIT,
595 initialLayout,
596 VK_IMAGE_LAYOUT_GENERAL,
597 VK_QUEUE_FAMILY_IGNORED,
598 VK_QUEUE_FAMILY_IGNORED,
599 image,
600 {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}};
601
602 vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
603 (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0u, 0u,
604 (const VkMemoryBarrier *)DE_NULL, 0u, (const VkBufferMemoryBarrier *)DE_NULL, 1u,
605 &preCopyBarrier);
606 }
607
608 endCommandBuffer(vkd, *cmdBuffer);
609
610 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
611
612 for (uint32_t planeNdx = 0; planeNdx < formatDesc.numPlanes; ++planeNdx)
613 {
614 const VkImageAspectFlagBits aspect =
615 (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
616 const de::SharedPtr<Allocation> &allocation = allocations.size() > 1 ? allocations[planeNdx] : allocations[0];
617 const size_t planeSize = imageData->getPlaneSize(planeNdx);
618 const uint32_t planeH = imageData->getSize().y() / formatDesc.planes[planeNdx].heightDivisor;
619 const VkImageSubresource subresource = {
620 static_cast<vk::VkImageAspectFlags>(aspect),
621 0u,
622 0u,
623 };
624 VkSubresourceLayout layout;
625
626 vkd.getImageSubresourceLayout(device, image, &subresource, &layout);
627
628 invalidateMappedMemoryRange(vkd, device, allocation->getMemory(), 0u, VK_WHOLE_SIZE);
629
630 for (uint32_t row = 0; row < planeH; ++row)
631 {
632 const size_t rowSize = planeSize / planeH;
633 const void *const srcPtr =
634 ((const uint8_t *)allocation->getHostPtr()) + layout.offset + layout.rowPitch * row;
635 void *const dstPtr = ((uint8_t *)imageData->getPlanePtr(planeNdx)) + row * rowSize;
636
637 deMemcpy(dstPtr, srcPtr, rowSize);
638 }
639 }
640 }
641
642 // ChannelAccess utilities
643 namespace
644 {
645
646 //! Extend < 32b signed integer to 32b
signExtend(uint32_t src,int bits)647 inline int32_t signExtend(uint32_t src, int bits)
648 {
649 const uint32_t signBit = 1u << (bits - 1);
650
651 src |= ~((src & signBit) - 1);
652
653 return (int32_t)src;
654 }
655
divRoundUp(uint32_t a,uint32_t b)656 uint32_t divRoundUp(uint32_t a, uint32_t b)
657 {
658 if (a % b == 0)
659 return a / b;
660 else
661 return (a / b) + 1;
662 }
663
664 // \todo Taken from tcuTexture.cpp
665 // \todo [2011-09-21 pyry] Move to tcutil?
666 template <typename T>
convertSatRte(float f)667 inline T convertSatRte(float f)
668 {
669 // \note Doesn't work for 64-bit types
670 DE_STATIC_ASSERT(sizeof(T) < sizeof(uint64_t));
671 DE_STATIC_ASSERT((-3 % 2 != 0) && (-4 % 2 == 0));
672
673 int64_t minVal = std::numeric_limits<T>::min();
674 int64_t maxVal = std::numeric_limits<T>::max();
675 float q = deFloatFrac(f);
676 int64_t intVal = (int64_t)(f - q);
677
678 // Rounding.
679 if (q == 0.5f)
680 {
681 if (intVal % 2 != 0)
682 intVal++;
683 }
684 else if (q > 0.5f)
685 intVal++;
686 // else Don't add anything
687
688 // Saturate.
689 intVal = de::max(minVal, de::min(maxVal, intVal));
690
691 return (T)intVal;
692 }
693
694 } // namespace
695
ChannelAccess(tcu::TextureChannelClass channelClass,uint8_t channelSize,const tcu::IVec3 & size,const tcu::IVec3 & bitPitch,void * data,uint32_t bitOffset)696 ChannelAccess::ChannelAccess(tcu::TextureChannelClass channelClass, uint8_t channelSize, const tcu::IVec3 &size,
697 const tcu::IVec3 &bitPitch, void *data, uint32_t bitOffset)
698 : m_channelClass(channelClass)
699 , m_channelSize(channelSize)
700 , m_size(size)
701 , m_bitPitch(bitPitch)
702 , m_data((uint8_t *)data + (bitOffset / 8))
703 , m_bitOffset(bitOffset % 8)
704 {
705 }
706
getChannelUint(const tcu::IVec3 & pos) const707 uint32_t ChannelAccess::getChannelUint(const tcu::IVec3 &pos) const
708 {
709 DE_ASSERT(pos[0] < m_size[0]);
710 DE_ASSERT(pos[1] < m_size[1]);
711 DE_ASSERT(pos[2] < m_size[2]);
712
713 const int32_t bitOffset(m_bitOffset + tcu::dot(m_bitPitch, pos));
714 const uint8_t *const firstByte = ((const uint8_t *)m_data) + (bitOffset / 8);
715 const uint32_t byteCount = divRoundUp((bitOffset + m_channelSize) - 8u * (bitOffset / 8u), 8u);
716 const uint32_t mask(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
717 const uint32_t offset = bitOffset % 8;
718 uint32_t bits = 0u;
719
720 deMemcpy(&bits, firstByte, byteCount);
721
722 return (bits >> offset) & mask;
723 }
724
setChannel(const tcu::IVec3 & pos,uint32_t x)725 void ChannelAccess::setChannel(const tcu::IVec3 &pos, uint32_t x)
726 {
727 DE_ASSERT(pos[0] < m_size[0]);
728 DE_ASSERT(pos[1] < m_size[1]);
729 DE_ASSERT(pos[2] < m_size[2]);
730
731 const int32_t bitOffset(m_bitOffset + tcu::dot(m_bitPitch, pos));
732 uint8_t *const firstByte = ((uint8_t *)m_data) + (bitOffset / 8);
733 const uint32_t byteCount = divRoundUp((bitOffset + m_channelSize) - 8u * (bitOffset / 8u), 8u);
734 const uint32_t mask(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
735 const uint32_t offset = bitOffset % 8;
736
737 const uint32_t bits = (x & mask) << offset;
738 uint32_t oldBits = 0;
739
740 deMemcpy(&oldBits, firstByte, byteCount);
741
742 {
743 const uint32_t newBits = bits | (oldBits & (~(mask << offset)));
744
745 deMemcpy(firstByte, &newBits, byteCount);
746 }
747 }
748
getChannel(const tcu::IVec3 & pos) const749 float ChannelAccess::getChannel(const tcu::IVec3 &pos) const
750 {
751 const uint32_t bits(getChannelUint(pos));
752
753 switch (m_channelClass)
754 {
755 case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
756 return (float)bits / (float)(m_channelSize == 32 ? ~0x0u : ((0x1u << m_channelSize) - 1u));
757
758 case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
759 return (float)bits;
760
761 case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
762 return de::max(-1.0f, (float)signExtend(bits, m_channelSize) / (float)((0x1u << (m_channelSize - 1u)) - 1u));
763
764 case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
765 return (float)signExtend(bits, m_channelSize);
766
767 case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
768 if (m_channelSize == 32)
769 return tcu::Float32(bits).asFloat();
770 else
771 {
772 DE_FATAL("Float type not supported");
773 return -1.0f;
774 }
775
776 default:
777 DE_FATAL("Unknown texture channel class");
778 return -1.0f;
779 }
780 }
781
getChannel(const tcu::FloatFormat & conversionFormat,const tcu::IVec3 & pos) const782 tcu::Interval ChannelAccess::getChannel(const tcu::FloatFormat &conversionFormat, const tcu::IVec3 &pos) const
783 {
784 const uint32_t bits(getChannelUint(pos));
785
786 switch (m_channelClass)
787 {
788 case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
789 return conversionFormat.roundOut(
790 conversionFormat.roundOut((double)bits, false) /
791 conversionFormat.roundOut((double)(m_channelSize == 32 ? ~0x0u : ((0x1u << m_channelSize) - 1u)),
792 false),
793 false);
794
795 case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
796 return conversionFormat.roundOut((double)bits, false);
797
798 case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
799 {
800 const tcu::Interval result(conversionFormat.roundOut(
801 conversionFormat.roundOut((double)signExtend(bits, m_channelSize), false) /
802 conversionFormat.roundOut((double)((0x1u << (m_channelSize - 1u)) - 1u), false),
803 false));
804
805 return tcu::Interval(de::max(-1.0, result.lo()), de::max(-1.0, result.hi()));
806 }
807
808 case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
809 return conversionFormat.roundOut((double)signExtend(bits, m_channelSize), false);
810
811 case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
812 if (m_channelSize == 32)
813 return conversionFormat.roundOut(tcu::Float32(bits).asFloat(), false);
814 else
815 {
816 DE_FATAL("Float type not supported");
817 return tcu::Interval();
818 }
819
820 default:
821 DE_FATAL("Unknown texture channel class");
822 return tcu::Interval();
823 }
824 }
825
setChannel(const tcu::IVec3 & pos,float x)826 void ChannelAccess::setChannel(const tcu::IVec3 &pos, float x)
827 {
828 DE_ASSERT(pos[0] < m_size[0]);
829 DE_ASSERT(pos[1] < m_size[1]);
830 DE_ASSERT(pos[2] < m_size[2]);
831
832 const uint32_t mask(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
833
834 switch (m_channelClass)
835 {
836 case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
837 {
838 const uint32_t maxValue(mask);
839 const uint32_t value(de::min(maxValue, (uint32_t)convertSatRte<uint32_t>(x * (float)maxValue)));
840 setChannel(pos, value);
841 break;
842 }
843
844 case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
845 {
846 const int32_t range((0x1u << (m_channelSize - 1u)) - 1u);
847 const uint32_t value((uint32_t)de::clamp<int32_t>(convertSatRte<int32_t>(x * (float)range), -range, range));
848 setChannel(pos, value);
849 break;
850 }
851
852 case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
853 {
854 const uint32_t maxValue(mask);
855 const uint32_t value(de::min(maxValue, (uint32_t)x));
856 setChannel(pos, value);
857 break;
858 }
859
860 case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
861 {
862 const int32_t minValue(-(int32_t)(1u << (m_channelSize - 1u)));
863 const int32_t maxValue((int32_t)((1u << (m_channelSize - 1u)) - 1u));
864 const uint32_t value((uint32_t)de::clamp((int32_t)x, minValue, maxValue));
865 setChannel(pos, value);
866 break;
867 }
868
869 case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
870 {
871 if (m_channelSize == 32)
872 {
873 const uint32_t value = tcu::Float32(x).bits();
874 setChannel(pos, value);
875 }
876 else
877 DE_FATAL("Float type not supported");
878 break;
879 }
880
881 default:
882 DE_FATAL("Unknown texture channel class");
883 }
884 }
885
getChannelAccess(MultiPlaneImageData & data,const vk::PlanarFormatDescription & formatInfo,const UVec2 & size,int channelNdx)886 ChannelAccess getChannelAccess(MultiPlaneImageData &data, const vk::PlanarFormatDescription &formatInfo,
887 const UVec2 &size, int channelNdx)
888 {
889 DE_ASSERT(formatInfo.hasChannelNdx(channelNdx));
890
891 const uint32_t planeNdx = formatInfo.channels[channelNdx].planeNdx;
892 const uint32_t valueOffsetBits = formatInfo.channels[channelNdx].offsetBits;
893 const uint32_t pixelStrideBytes = formatInfo.channels[channelNdx].strideBytes;
894 const uint32_t pixelStrideBits = pixelStrideBytes * 8;
895 const uint8_t sizeBits = formatInfo.channels[channelNdx].sizeBits;
896
897 DE_ASSERT(size.x() % (formatInfo.blockWidth * formatInfo.planes[planeNdx].widthDivisor) == 0);
898 DE_ASSERT(size.y() % (formatInfo.blockHeight * formatInfo.planes[planeNdx].heightDivisor) == 0);
899
900 uint32_t accessWidth = size.x() / (formatInfo.blockWidth * formatInfo.planes[planeNdx].widthDivisor);
901 const uint32_t accessHeight = size.y() / (formatInfo.blockHeight * formatInfo.planes[planeNdx].heightDivisor);
902 const uint32_t elementSizeBytes = formatInfo.planes[planeNdx].elementSizeBytes;
903 const uint32_t rowPitch = formatInfo.planes[planeNdx].elementSizeBytes * accessWidth;
904 const uint32_t rowPitchBits = rowPitch * 8;
905
906 if (pixelStrideBytes != elementSizeBytes)
907 {
908 DE_ASSERT(elementSizeBytes % pixelStrideBytes == 0);
909 accessWidth *= elementSizeBytes / pixelStrideBytes;
910 }
911
912 return ChannelAccess((tcu::TextureChannelClass)formatInfo.channels[channelNdx].type, sizeBits,
913 tcu::IVec3(accessWidth, accessHeight, 1u),
914 tcu::IVec3((int)pixelStrideBits, (int)rowPitchBits, 0), data.getPlanePtr(planeNdx),
915 (uint32_t)valueOffsetBits);
916 }
917
isXChromaSubsampled(vk::VkFormat format)918 bool isXChromaSubsampled(vk::VkFormat format)
919 {
920 switch (format)
921 {
922 case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
923 case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
924 case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
925 case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
926 case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
927 case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
928 case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
929 case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
930 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
931 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
932 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
933 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
934 case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
935 case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
936 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
937 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
938 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
939 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
940 case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
941 case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
942 case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
943 case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
944 case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
945 case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
946 return true;
947
948 default:
949 return false;
950 }
951 }
952
isYChromaSubsampled(vk::VkFormat format)953 bool isYChromaSubsampled(vk::VkFormat format)
954 {
955 switch (format)
956 {
957 case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
958 case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
959 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
960 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
961 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
962 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
963 case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
964 case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
965 return true;
966
967 default:
968 return false;
969 }
970 }
971
areLsb6BitsDontCare(vk::VkFormat srcFormat,vk::VkFormat dstFormat)972 bool areLsb6BitsDontCare(vk::VkFormat srcFormat, vk::VkFormat dstFormat)
973 {
974 if ((srcFormat == vk::VK_FORMAT_R10X6_UNORM_PACK16) || (dstFormat == vk::VK_FORMAT_R10X6_UNORM_PACK16) ||
975 (srcFormat == vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16) ||
976 (dstFormat == vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16) ||
977 (srcFormat == vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16) ||
978 (dstFormat == vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16) ||
979 (srcFormat == vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16) ||
980 (dstFormat == vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16) ||
981 (srcFormat == vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16) ||
982 (dstFormat == vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16) ||
983 (srcFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) ||
984 (dstFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) ||
985 (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16) ||
986 (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16) ||
987 (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16) ||
988 (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16) ||
989 (srcFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16) ||
990 (dstFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16) ||
991 (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16) ||
992 (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16))
993 {
994 return true;
995 }
996
997 return false;
998 }
999
areLsb4BitsDontCare(vk::VkFormat srcFormat,vk::VkFormat dstFormat)1000 bool areLsb4BitsDontCare(vk::VkFormat srcFormat, vk::VkFormat dstFormat)
1001 {
1002 if ((srcFormat == vk::VK_FORMAT_R12X4_UNORM_PACK16) || (dstFormat == vk::VK_FORMAT_R12X4_UNORM_PACK16) ||
1003 (srcFormat == vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16) ||
1004 (dstFormat == vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16) ||
1005 (srcFormat == vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16) ||
1006 (dstFormat == vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16) ||
1007 (srcFormat == vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16) ||
1008 (dstFormat == vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16) ||
1009 (srcFormat == vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16) ||
1010 (dstFormat == vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16) ||
1011 (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16) ||
1012 (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16) ||
1013 (srcFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16) ||
1014 (dstFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16) ||
1015 (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16) ||
1016 (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16) ||
1017 (srcFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16) ||
1018 (dstFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16) ||
1019 (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16) ||
1020 (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16))
1021 {
1022 return true;
1023 }
1024
1025 return false;
1026 }
1027
1028 // \note Used for range expansion
getYCbCrBitDepth(vk::VkFormat format)1029 tcu::UVec4 getYCbCrBitDepth(vk::VkFormat format)
1030 {
1031 switch (format)
1032 {
1033 case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
1034 case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
1035 case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1036 case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1037 case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
1038 case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1039 case vk::VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
1040 case vk::VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
1041 return tcu::UVec4(8, 8, 8, 0);
1042
1043 case vk::VK_FORMAT_R10X6_UNORM_PACK16:
1044 return tcu::UVec4(10, 0, 0, 0);
1045
1046 case vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16:
1047 return tcu::UVec4(10, 10, 0, 0);
1048
1049 case vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
1050 return tcu::UVec4(10, 10, 10, 10);
1051
1052 case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1053 case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1054 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1055 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1056 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1057 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1058 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
1059 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT:
1060 return tcu::UVec4(10, 10, 10, 0);
1061
1062 case vk::VK_FORMAT_R12X4_UNORM_PACK16:
1063 return tcu::UVec4(12, 0, 0, 0);
1064
1065 case vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16:
1066 return tcu::UVec4(12, 12, 0, 0);
1067
1068 case vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
1069 case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1070 case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1071 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1072 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1073 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1074 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1075 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
1076 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT:
1077 return tcu::UVec4(12, 12, 12, 12);
1078
1079 case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1080 case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1081 case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1082 case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1083 case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1084 case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1085 case vk::VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
1086 case vk::VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT:
1087 return tcu::UVec4(16, 16, 16, 0);
1088
1089 default:
1090 return tcu::getTextureFormatBitDepth(vk::mapVkFormat(format)).cast<uint32_t>();
1091 }
1092 }
1093
getPrecision(VkFormat format)1094 std::vector<tcu::FloatFormat> getPrecision(VkFormat format)
1095 {
1096 std::vector<FloatFormat> floatFormats;
1097 UVec4 channelDepth = getYCbCrBitDepth(format);
1098
1099 for (uint32_t channelIdx = 0; channelIdx < 4; channelIdx++)
1100 floatFormats.push_back(tcu::FloatFormat(0, 0, channelDepth[channelIdx], false, tcu::YES));
1101
1102 return floatFormats;
1103 }
1104
getYCbCrFormatChannelCount(vk::VkFormat format)1105 uint32_t getYCbCrFormatChannelCount(vk::VkFormat format)
1106 {
1107 switch (format)
1108 {
1109 case vk::VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1110 case vk::VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1111 case vk::VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1112 case vk::VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1113 case vk::VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1114 case vk::VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1115 case vk::VK_FORMAT_B8G8R8A8_UNORM:
1116 case vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
1117 case vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
1118 case vk::VK_FORMAT_R16G16B16A16_UNORM:
1119 case vk::VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1120 case vk::VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1121 case vk::VK_FORMAT_R8G8B8A8_UNORM:
1122 return 4;
1123
1124 case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1125 case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1126 case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1127 case vk::VK_FORMAT_B5G6R5_UNORM_PACK16:
1128 case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
1129 case vk::VK_FORMAT_B8G8R8_UNORM:
1130 case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1131 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1132 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1133 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT:
1134 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1135 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1136 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
1137 case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1138 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1139 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1140 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT:
1141 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1142 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1143 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
1144 case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1145 case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1146 case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1147 case vk::VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT:
1148 case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1149 case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1150 case vk::VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
1151 case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
1152 case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1153 case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1154 case vk::VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
1155 case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1156 case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
1157 case vk::VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
1158 case vk::VK_FORMAT_R16G16B16_UNORM:
1159 case vk::VK_FORMAT_R5G6B5_UNORM_PACK16:
1160 case vk::VK_FORMAT_R8G8B8_UNORM:
1161 return 3;
1162
1163 case vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16:
1164 case vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16:
1165 return 2;
1166
1167 case vk::VK_FORMAT_R10X6_UNORM_PACK16:
1168 case vk::VK_FORMAT_R12X4_UNORM_PACK16:
1169 return 1;
1170
1171 default:
1172 DE_FATAL("Unknown number of channels");
1173 return -1;
1174 }
1175 }
1176
1177 // YCbCr color conversion utilities
1178 namespace
1179 {
1180
rangeExpandChroma(vk::VkSamplerYcbcrRange range,const tcu::FloatFormat & conversionFormat,const uint32_t bits,const tcu::Interval & sample)1181 tcu::Interval rangeExpandChroma(vk::VkSamplerYcbcrRange range, const tcu::FloatFormat &conversionFormat,
1182 const uint32_t bits, const tcu::Interval &sample)
1183 {
1184 const uint32_t values(0x1u << bits);
1185
1186 switch (range)
1187 {
1188 case vk::VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
1189 return conversionFormat.roundOut(
1190 sample - conversionFormat.roundOut(
1191 tcu::Interval((double)(0x1u << (bits - 1u)) / (double)((0x1u << bits) - 1u)), false),
1192 false);
1193
1194 case vk::VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
1195 {
1196 const tcu::Interval a(conversionFormat.roundOut(sample * tcu::Interval((double)(values - 1u)), false));
1197 const tcu::Interval dividend(
1198 conversionFormat.roundOut(a - tcu::Interval((double)(128u * (0x1u << (bits - 8u)))), false));
1199 const tcu::Interval divisor((double)(224u * (0x1u << (bits - 8u))));
1200 const tcu::Interval result(conversionFormat.roundOut(dividend / divisor, false));
1201
1202 return result;
1203 }
1204
1205 default:
1206 DE_FATAL("Unknown YCbCrRange");
1207 return tcu::Interval();
1208 }
1209 }
1210
rangeExpandLuma(vk::VkSamplerYcbcrRange range,const tcu::FloatFormat & conversionFormat,const uint32_t bits,const tcu::Interval & sample)1211 tcu::Interval rangeExpandLuma(vk::VkSamplerYcbcrRange range, const tcu::FloatFormat &conversionFormat,
1212 const uint32_t bits, const tcu::Interval &sample)
1213 {
1214 const uint32_t values(0x1u << bits);
1215
1216 switch (range)
1217 {
1218 case vk::VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
1219 return conversionFormat.roundOut(sample, false);
1220
1221 case vk::VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
1222 {
1223 const tcu::Interval a(conversionFormat.roundOut(sample * tcu::Interval((double)(values - 1u)), false));
1224 const tcu::Interval dividend(
1225 conversionFormat.roundOut(a - tcu::Interval((double)(16u * (0x1u << (bits - 8u)))), false));
1226 const tcu::Interval divisor((double)(219u * (0x1u << (bits - 8u))));
1227 const tcu::Interval result(conversionFormat.roundOut(dividend / divisor, false));
1228
1229 return result;
1230 }
1231
1232 default:
1233 DE_FATAL("Unknown YCbCrRange");
1234 return tcu::Interval();
1235 }
1236 }
1237
clampMaybe(const tcu::Interval & x,double min,double max)1238 tcu::Interval clampMaybe(const tcu::Interval &x, double min, double max)
1239 {
1240 tcu::Interval result = x;
1241
1242 DE_ASSERT(min <= max);
1243
1244 if (x.lo() < min)
1245 result = result | tcu::Interval(min);
1246
1247 if (x.hi() > max)
1248 result = result | tcu::Interval(max);
1249
1250 return result;
1251 }
1252
convertColor(vk::VkSamplerYcbcrModelConversion colorModel,vk::VkSamplerYcbcrRange range,const vector<tcu::FloatFormat> & conversionFormat,const tcu::UVec4 & bitDepth,const tcu::Interval input[4],tcu::Interval output[4])1253 void convertColor(vk::VkSamplerYcbcrModelConversion colorModel, vk::VkSamplerYcbcrRange range,
1254 const vector<tcu::FloatFormat> &conversionFormat, const tcu::UVec4 &bitDepth,
1255 const tcu::Interval input[4], tcu::Interval output[4])
1256 {
1257 switch (colorModel)
1258 {
1259 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
1260 {
1261 for (size_t ndx = 0; ndx < 4; ndx++)
1262 output[ndx] = input[ndx];
1263 break;
1264 }
1265
1266 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
1267 {
1268 output[0] = clampMaybe(rangeExpandChroma(range, conversionFormat[0], bitDepth[0], input[0]), -0.5, 0.5);
1269 output[1] = clampMaybe(rangeExpandLuma(range, conversionFormat[1], bitDepth[1], input[1]), 0.0, 1.0);
1270 output[2] = clampMaybe(rangeExpandChroma(range, conversionFormat[2], bitDepth[2], input[2]), -0.5, 0.5);
1271 output[3] = input[3];
1272 break;
1273 }
1274
1275 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
1276 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
1277 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
1278 {
1279 const tcu::Interval y(rangeExpandLuma(range, conversionFormat[1], bitDepth[1], input[1]));
1280 const tcu::Interval cr(rangeExpandChroma(range, conversionFormat[0], bitDepth[0], input[0]));
1281 const tcu::Interval cb(rangeExpandChroma(range, conversionFormat[2], bitDepth[2], input[2]));
1282
1283 const tcu::Interval yClamped(clampMaybe(y, 0.0, 1.0));
1284 const tcu::Interval crClamped(clampMaybe(cr, -0.5, 0.5));
1285 const tcu::Interval cbClamped(clampMaybe(cb, -0.5, 0.5));
1286
1287 if (colorModel == vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601)
1288 {
1289 output[0] =
1290 conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.402 * crClamped, false), false);
1291 output[1] = conversionFormat[1].roundOut(
1292 conversionFormat[1].roundOut(
1293 yClamped - conversionFormat[1].roundOut((0.202008 / 0.587) * cbClamped, false), false) -
1294 conversionFormat[1].roundOut((0.419198 / 0.587) * crClamped, false),
1295 false);
1296 output[2] =
1297 conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.772 * cbClamped, false), false);
1298 }
1299 else if (colorModel == vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709)
1300 {
1301 output[0] =
1302 conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.5748 * crClamped, false), false);
1303 output[1] = conversionFormat[1].roundOut(
1304 conversionFormat[1].roundOut(
1305 yClamped - conversionFormat[1].roundOut((0.13397432 / 0.7152) * cbClamped, false), false) -
1306 conversionFormat[1].roundOut((0.33480248 / 0.7152) * crClamped, false),
1307 false);
1308 output[2] =
1309 conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.8556 * cbClamped, false), false);
1310 }
1311 else
1312 {
1313 output[0] =
1314 conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.4746 * crClamped, false), false);
1315 output[1] = conversionFormat[1].roundOut(
1316 conversionFormat[1].roundOut(
1317 yClamped - conversionFormat[1].roundOut(
1318 conversionFormat[1].roundOut(0.11156702 / 0.6780, false) * cbClamped, false),
1319 false) -
1320 conversionFormat[1].roundOut(conversionFormat[1].roundOut(0.38737742 / 0.6780, false) * crClamped,
1321 false),
1322 false);
1323 output[2] =
1324 conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.8814 * cbClamped, false), false);
1325 }
1326 output[3] = input[3];
1327 break;
1328 }
1329
1330 default:
1331 DE_FATAL("Unknown YCbCrModel");
1332 }
1333
1334 if (colorModel != vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
1335 {
1336 for (int ndx = 0; ndx < 3; ndx++)
1337 output[ndx] = clampMaybe(output[ndx], 0.0, 1.0);
1338 }
1339 }
1340
mirror(int coord)1341 int mirror(int coord)
1342 {
1343 return coord >= 0 ? coord : -(1 + coord);
1344 }
1345
imod(int a,int b)1346 int imod(int a, int b)
1347 {
1348 int m = a % b;
1349 return m < 0 ? m + b : m;
1350 }
1351
frac(const tcu::Interval & x)1352 tcu::Interval frac(const tcu::Interval &x)
1353 {
1354 if (x.hi() - x.lo() >= 1.0)
1355 return tcu::Interval(0.0, 1.0);
1356 else
1357 {
1358 const tcu::Interval ret(deFrac(x.lo()), deFrac(x.hi()));
1359
1360 return ret;
1361 }
1362 }
1363
calculateUV(const tcu::FloatFormat & coordFormat,const tcu::Interval & st,const int size)1364 tcu::Interval calculateUV(const tcu::FloatFormat &coordFormat, const tcu::Interval &st, const int size)
1365 {
1366 return coordFormat.roundOut(coordFormat.roundOut(st, false) * tcu::Interval((double)size), false);
1367 }
1368
calculateNearestIJRange(const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1369 tcu::IVec2 calculateNearestIJRange(const tcu::FloatFormat &coordFormat, const tcu::Interval &uv)
1370 {
1371 const tcu::Interval ij(coordFormat.roundOut(coordFormat.roundOut(uv, false) - tcu::Interval(0.5), false));
1372
1373 return tcu::IVec2(deRoundToInt32(ij.lo() - coordFormat.ulp(ij.lo(), 1)),
1374 deRoundToInt32(ij.hi() + coordFormat.ulp(ij.hi(), 1)));
1375 }
1376
1377 // Calculate range of pixel coordinates that can be used as lower coordinate for linear sampling
calculateLinearIJRange(const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1378 tcu::IVec2 calculateLinearIJRange(const tcu::FloatFormat &coordFormat, const tcu::Interval &uv)
1379 {
1380 const tcu::Interval ij(coordFormat.roundOut(uv - tcu::Interval(0.5), false));
1381
1382 return tcu::IVec2(deFloorToInt32(ij.lo()), deFloorToInt32(ij.hi()));
1383 }
1384
calculateIJRange(vk::VkFilter filter,const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1385 tcu::IVec2 calculateIJRange(vk::VkFilter filter, const tcu::FloatFormat &coordFormat, const tcu::Interval &uv)
1386 {
1387 DE_ASSERT(filter == vk::VK_FILTER_NEAREST || filter == vk::VK_FILTER_LINEAR);
1388 return (filter == vk::VK_FILTER_LINEAR) ? calculateLinearIJRange(coordFormat, uv) :
1389 calculateNearestIJRange(coordFormat, uv);
1390 }
1391
calculateAB(const uint32_t subTexelPrecisionBits,const tcu::Interval & uv,int ij)1392 tcu::Interval calculateAB(const uint32_t subTexelPrecisionBits, const tcu::Interval &uv, int ij)
1393 {
1394 const uint32_t subdivisions = 0x1u << subTexelPrecisionBits;
1395 const tcu::Interval ab(frac((uv - 0.5) & tcu::Interval((double)ij, (double)(ij + 1))));
1396 const tcu::Interval gridAB(ab * tcu::Interval(subdivisions));
1397 const tcu::Interval rounded(de::max(deFloor(gridAB.lo()) / subdivisions, 0.0),
1398 de::min(deCeil(gridAB.hi()) / subdivisions, 1.0));
1399
1400 return rounded;
1401 }
1402
lookupWrapped(const ChannelAccess & access,const tcu::FloatFormat & conversionFormat,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const tcu::IVec2 & coord)1403 tcu::Interval lookupWrapped(const ChannelAccess &access, const tcu::FloatFormat &conversionFormat,
1404 vk::VkSamplerAddressMode addressModeU, vk::VkSamplerAddressMode addressModeV,
1405 const tcu::IVec2 &coord)
1406 {
1407 tcu::Interval interval =
1408 access.getChannel(conversionFormat, tcu::IVec3(wrap(addressModeU, coord.x(), access.getSize().x()),
1409 wrap(addressModeV, coord.y(), access.getSize().y()), 0));
1410
1411 // Expand range for 10-bit conversions to +/-1.0 ULP
1412 if (conversionFormat.getFractionBits() == 10)
1413 {
1414 interval |= interval.lo() - interval.length() / 2.0;
1415 interval |= interval.hi() + interval.length() / 2.0;
1416 }
1417
1418 return interval;
1419 }
1420
linearInterpolate(const tcu::FloatFormat & filteringFormat,const tcu::Interval & a,const tcu::Interval & b,const tcu::Interval & p00,const tcu::Interval & p10,const tcu::Interval & p01,const tcu::Interval & p11)1421 tcu::Interval linearInterpolate(const tcu::FloatFormat &filteringFormat, const tcu::Interval &a, const tcu::Interval &b,
1422 const tcu::Interval &p00, const tcu::Interval &p10, const tcu::Interval &p01,
1423 const tcu::Interval &p11)
1424 {
1425 const tcu::Interval p[4] = {p00, p10, p01, p11};
1426 tcu::Interval result(0.0);
1427
1428 for (size_t ndx = 0; ndx < 4; ndx++)
1429 {
1430 const tcu::Interval weightA(filteringFormat.roundOut((ndx % 2) == 0 ? (1.0 - a) : a, false));
1431 const tcu::Interval weightB(filteringFormat.roundOut((ndx / 2) == 0 ? (1.0 - b) : b, false));
1432 const tcu::Interval weight(filteringFormat.roundOut(weightA * weightB, false));
1433
1434 result = filteringFormat.roundOut(result + filteringFormat.roundOut(p[ndx] * weight, false), false);
1435 }
1436
1437 return result;
1438 }
1439
calculateImplicitChromaUV(const tcu::FloatFormat & coordFormat,vk::VkChromaLocation offset,const tcu::Interval & uv)1440 tcu::Interval calculateImplicitChromaUV(const tcu::FloatFormat &coordFormat, vk::VkChromaLocation offset,
1441 const tcu::Interval &uv)
1442 {
1443 if (offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN)
1444 return coordFormat.roundOut(0.5 * coordFormat.roundOut(uv + 0.5, false), false);
1445 else
1446 return coordFormat.roundOut(0.5 * uv, false);
1447 }
1448
linearSample(const ChannelAccess & access,const tcu::FloatFormat & conversionFormat,const tcu::FloatFormat & filteringFormat,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const tcu::IVec2 & coord,const tcu::Interval & a,const tcu::Interval & b)1449 tcu::Interval linearSample(const ChannelAccess &access, const tcu::FloatFormat &conversionFormat,
1450 const tcu::FloatFormat &filteringFormat, vk::VkSamplerAddressMode addressModeU,
1451 vk::VkSamplerAddressMode addressModeV, const tcu::IVec2 &coord, const tcu::Interval &a,
1452 const tcu::Interval &b)
1453 {
1454 return linearInterpolate(
1455 filteringFormat, a, b,
1456 lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(0, 0)),
1457 lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(1, 0)),
1458 lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(0, 1)),
1459 lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(1, 1)));
1460 }
1461
reconstructLinearXChromaSample(const tcu::FloatFormat & filteringFormat,const tcu::FloatFormat & conversionFormat,vk::VkChromaLocation offset,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const ChannelAccess & access,int i,int j)1462 tcu::Interval reconstructLinearXChromaSample(const tcu::FloatFormat &filteringFormat,
1463 const tcu::FloatFormat &conversionFormat, vk::VkChromaLocation offset,
1464 vk::VkSamplerAddressMode addressModeU,
1465 vk::VkSamplerAddressMode addressModeV, const ChannelAccess &access, int i,
1466 int j)
1467 {
1468 const int subI = offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? divFloor(i, 2) :
1469 (i % 2 == 0 ? divFloor(i, 2) - 1 : divFloor(i, 2));
1470 const double a =
1471 offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? (i % 2 == 0 ? 0.0 : 0.5) : (i % 2 == 0 ? 0.25 : 0.75);
1472
1473 const tcu::Interval A(filteringFormat.roundOut(
1474 a * lookupWrapped(access, conversionFormat, addressModeU, addressModeV, tcu::IVec2(subI, j)), false));
1475 const tcu::Interval B(filteringFormat.roundOut(
1476 (1.0 - a) * lookupWrapped(access, conversionFormat, addressModeU, addressModeV, tcu::IVec2(subI + 1, j)),
1477 false));
1478 return filteringFormat.roundOut(A + B, false);
1479 }
1480
reconstructLinearXYChromaSample(const tcu::FloatFormat & filteringFormat,const tcu::FloatFormat & conversionFormat,vk::VkChromaLocation xOffset,vk::VkChromaLocation yOffset,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const ChannelAccess & access,int i,int j)1481 tcu::Interval reconstructLinearXYChromaSample(const tcu::FloatFormat &filteringFormat,
1482 const tcu::FloatFormat &conversionFormat, vk::VkChromaLocation xOffset,
1483 vk::VkChromaLocation yOffset, vk::VkSamplerAddressMode addressModeU,
1484 vk::VkSamplerAddressMode addressModeV, const ChannelAccess &access, int i,
1485 int j)
1486 {
1487 const int subI = xOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ?
1488 divFloor(i, 2) :
1489 (i % 2 == 0 ? divFloor(i, 2) - 1 : divFloor(i, 2));
1490 const int subJ = yOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ?
1491 divFloor(j, 2) :
1492 (j % 2 == 0 ? divFloor(j, 2) - 1 : divFloor(j, 2));
1493
1494 const double a =
1495 xOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? (i % 2 == 0 ? 0.0 : 0.5) : (i % 2 == 0 ? 0.25 : 0.75);
1496 const double b =
1497 yOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? (j % 2 == 0 ? 0.0 : 0.5) : (j % 2 == 0 ? 0.25 : 0.75);
1498
1499 return linearSample(access, conversionFormat, filteringFormat, addressModeU, addressModeV, tcu::IVec2(subI, subJ),
1500 a, b);
1501 }
1502
swizzle(vk::VkComponentSwizzle swizzle,const ChannelAccess & identityPlane,const ChannelAccess & rPlane,const ChannelAccess & gPlane,const ChannelAccess & bPlane,const ChannelAccess & aPlane)1503 const ChannelAccess &swizzle(vk::VkComponentSwizzle swizzle, const ChannelAccess &identityPlane,
1504 const ChannelAccess &rPlane, const ChannelAccess &gPlane, const ChannelAccess &bPlane,
1505 const ChannelAccess &aPlane)
1506 {
1507 switch (swizzle)
1508 {
1509 case vk::VK_COMPONENT_SWIZZLE_IDENTITY:
1510 return identityPlane;
1511 case vk::VK_COMPONENT_SWIZZLE_R:
1512 return rPlane;
1513 case vk::VK_COMPONENT_SWIZZLE_G:
1514 return gPlane;
1515 case vk::VK_COMPONENT_SWIZZLE_B:
1516 return bPlane;
1517 case vk::VK_COMPONENT_SWIZZLE_A:
1518 return aPlane;
1519
1520 default:
1521 DE_FATAL("Unsupported swizzle");
1522 return identityPlane;
1523 }
1524 }
1525
1526 } // namespace
1527
wrap(vk::VkSamplerAddressMode addressMode,int coord,int size)1528 int wrap(vk::VkSamplerAddressMode addressMode, int coord, int size)
1529 {
1530 switch (addressMode)
1531 {
1532 case vk::VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
1533 return (size - 1) - mirror(imod(coord, 2 * size) - size);
1534
1535 case vk::VK_SAMPLER_ADDRESS_MODE_REPEAT:
1536 return imod(coord, size);
1537
1538 case vk::VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
1539 return de::clamp(coord, 0, size - 1);
1540
1541 case vk::VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
1542 return de::clamp(mirror(coord), 0, size - 1);
1543
1544 default:
1545 DE_FATAL("Unknown wrap mode");
1546 return ~0;
1547 }
1548 }
1549
divFloor(int a,int b)1550 int divFloor(int a, int b)
1551 {
1552 if (a % b == 0)
1553 return a / b;
1554 else if (a > 0)
1555 return a / b;
1556 else
1557 return (a / b) - 1;
1558 }
1559
calculateBounds(const ChannelAccess & rPlane,const ChannelAccess & gPlane,const ChannelAccess & bPlane,const ChannelAccess & aPlane,const UVec4 & bitDepth,const vector<Vec2> & sts,const vector<FloatFormat> & filteringFormat,const vector<FloatFormat> & conversionFormat,const uint32_t subTexelPrecisionBits,vk::VkFilter filter,vk::VkSamplerYcbcrModelConversion colorModel,vk::VkSamplerYcbcrRange range,vk::VkFilter chromaFilter,vk::VkChromaLocation xChromaOffset,vk::VkChromaLocation yChromaOffset,const vk::VkComponentMapping & componentMapping,bool explicitReconstruction,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,std::vector<Vec4> & minBounds,std::vector<Vec4> & maxBounds,std::vector<Vec4> & uvBounds,std::vector<IVec4> & ijBounds)1560 void calculateBounds(const ChannelAccess &rPlane, const ChannelAccess &gPlane, const ChannelAccess &bPlane,
1561 const ChannelAccess &aPlane, const UVec4 &bitDepth, const vector<Vec2> &sts,
1562 const vector<FloatFormat> &filteringFormat, const vector<FloatFormat> &conversionFormat,
1563 const uint32_t subTexelPrecisionBits, vk::VkFilter filter,
1564 vk::VkSamplerYcbcrModelConversion colorModel, vk::VkSamplerYcbcrRange range,
1565 vk::VkFilter chromaFilter, vk::VkChromaLocation xChromaOffset, vk::VkChromaLocation yChromaOffset,
1566 const vk::VkComponentMapping &componentMapping, bool explicitReconstruction,
1567 vk::VkSamplerAddressMode addressModeU, vk::VkSamplerAddressMode addressModeV,
1568 std::vector<Vec4> &minBounds, std::vector<Vec4> &maxBounds, std::vector<Vec4> &uvBounds,
1569 std::vector<IVec4> &ijBounds)
1570 {
1571 const FloatFormat highp(-126, 127, 23, true,
1572 tcu::MAYBE, // subnormals
1573 tcu::YES, // infinities
1574 tcu::MAYBE); // NaN
1575 const FloatFormat coordFormat(-32, 32, 16, true);
1576 const ChannelAccess &rAccess(swizzle(componentMapping.r, rPlane, rPlane, gPlane, bPlane, aPlane));
1577 const ChannelAccess &gAccess(swizzle(componentMapping.g, gPlane, rPlane, gPlane, bPlane, aPlane));
1578 const ChannelAccess &bAccess(swizzle(componentMapping.b, bPlane, rPlane, gPlane, bPlane, aPlane));
1579 const ChannelAccess &aAccess(swizzle(componentMapping.a, aPlane, rPlane, gPlane, bPlane, aPlane));
1580
1581 const bool subsampledX = gAccess.getSize().x() > rAccess.getSize().x();
1582 const bool subsampledY = gAccess.getSize().y() > rAccess.getSize().y();
1583
1584 minBounds.resize(sts.size(), Vec4(TCU_INFINITY));
1585 maxBounds.resize(sts.size(), Vec4(-TCU_INFINITY));
1586
1587 uvBounds.resize(sts.size(), Vec4(TCU_INFINITY, -TCU_INFINITY, TCU_INFINITY, -TCU_INFINITY));
1588 ijBounds.resize(sts.size(), IVec4(0x7FFFFFFF, -1 - 0x7FFFFFFF, 0x7FFFFFFF, -1 - 0x7FFFFFFF));
1589
1590 // Chroma plane sizes must match
1591 DE_ASSERT(rAccess.getSize() == bAccess.getSize());
1592
1593 // Luma plane sizes must match
1594 DE_ASSERT(gAccess.getSize() == aAccess.getSize());
1595
1596 // Luma plane size must match chroma plane or be twice as big
1597 DE_ASSERT(rAccess.getSize().x() == gAccess.getSize().x() || 2 * rAccess.getSize().x() == gAccess.getSize().x());
1598 DE_ASSERT(rAccess.getSize().y() == gAccess.getSize().y() || 2 * rAccess.getSize().y() == gAccess.getSize().y());
1599
1600 DE_ASSERT(filter == vk::VK_FILTER_NEAREST || filter == vk::VK_FILTER_LINEAR);
1601 DE_ASSERT(chromaFilter == vk::VK_FILTER_NEAREST || chromaFilter == vk::VK_FILTER_LINEAR);
1602 DE_ASSERT(subsampledX || !subsampledY);
1603
1604 for (size_t ndx = 0; ndx < sts.size(); ndx++)
1605 {
1606 const Vec2 st(sts[ndx]);
1607 Interval bounds[4];
1608
1609 const Interval u(calculateUV(coordFormat, st[0], gAccess.getSize().x()));
1610 const Interval v(calculateUV(coordFormat, st[1], gAccess.getSize().y()));
1611
1612 uvBounds[ndx][0] = (float)u.lo();
1613 uvBounds[ndx][1] = (float)u.hi();
1614
1615 uvBounds[ndx][2] = (float)v.lo();
1616 uvBounds[ndx][3] = (float)v.hi();
1617
1618 const IVec2 iRange(calculateIJRange(filter, coordFormat, u));
1619 const IVec2 jRange(calculateIJRange(filter, coordFormat, v));
1620
1621 ijBounds[ndx][0] = iRange[0];
1622 ijBounds[ndx][1] = iRange[1];
1623
1624 ijBounds[ndx][2] = jRange[0];
1625 ijBounds[ndx][3] = jRange[1];
1626
1627 for (int j = jRange.x(); j <= jRange.y(); j++)
1628 for (int i = iRange.x(); i <= iRange.y(); i++)
1629 {
1630 if (filter == vk::VK_FILTER_NEAREST)
1631 {
1632 const Interval gValue(
1633 lookupWrapped(gAccess, conversionFormat[1], addressModeU, addressModeV, IVec2(i, j)));
1634 const Interval aValue(
1635 lookupWrapped(aAccess, conversionFormat[3], addressModeU, addressModeV, IVec2(i, j)));
1636
1637 if (explicitReconstruction || !(subsampledX || subsampledY))
1638 {
1639 Interval rValue, bValue;
1640 if (chromaFilter == vk::VK_FILTER_NEAREST || !subsampledX)
1641 {
1642 // Reconstruct using nearest if needed, otherwise, just take what's already there.
1643 const int subI = subsampledX ? i / 2 : i;
1644 const int subJ = subsampledY ? j / 2 : j;
1645 rValue = lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1646 IVec2(subI, subJ));
1647 bValue = lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1648 IVec2(subI, subJ));
1649 }
1650 else // vk::VK_FILTER_LINEAR
1651 {
1652 if (subsampledY)
1653 {
1654 rValue = reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1655 xChromaOffset, yChromaOffset, addressModeU,
1656 addressModeV, rAccess, i, j);
1657 bValue = reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1658 xChromaOffset, yChromaOffset, addressModeU,
1659 addressModeV, bAccess, i, j);
1660 }
1661 else
1662 {
1663 rValue = reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1664 xChromaOffset, addressModeU, addressModeV,
1665 rAccess, i, j);
1666 bValue = reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1667 xChromaOffset, addressModeU, addressModeV,
1668 bAccess, i, j);
1669 }
1670 }
1671
1672 const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1673 Interval dstColor[4];
1674
1675 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1676
1677 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1678 bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1679 }
1680 else
1681 {
1682 const Interval chromaU(subsampledX ? calculateImplicitChromaUV(coordFormat, xChromaOffset, u) :
1683 u);
1684 const Interval chromaV(subsampledY ? calculateImplicitChromaUV(coordFormat, yChromaOffset, v) :
1685 v);
1686
1687 // Reconstructed chroma samples with implicit filtering
1688 const IVec2 chromaIRange(subsampledX ? calculateIJRange(chromaFilter, coordFormat, chromaU) :
1689 IVec2(i, i));
1690 const IVec2 chromaJRange(subsampledY ? calculateIJRange(chromaFilter, coordFormat, chromaV) :
1691 IVec2(j, j));
1692
1693 for (int chromaJ = chromaJRange.x(); chromaJ <= chromaJRange.y(); chromaJ++)
1694 for (int chromaI = chromaIRange.x(); chromaI <= chromaIRange.y(); chromaI++)
1695 {
1696 Interval rValue, bValue;
1697
1698 if (chromaFilter == vk::VK_FILTER_NEAREST)
1699 {
1700 rValue = lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1701 IVec2(chromaI, chromaJ));
1702 bValue = lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1703 IVec2(chromaI, chromaJ));
1704 }
1705 else // vk::VK_FILTER_LINEAR
1706 {
1707 const Interval chromaA(calculateAB(subTexelPrecisionBits, chromaU, chromaI));
1708 const Interval chromaB(calculateAB(subTexelPrecisionBits, chromaV, chromaJ));
1709
1710 rValue =
1711 linearSample(rAccess, conversionFormat[0], filteringFormat[0], addressModeU,
1712 addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1713 bValue =
1714 linearSample(bAccess, conversionFormat[2], filteringFormat[2], addressModeU,
1715 addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1716 }
1717
1718 const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1719
1720 Interval dstColor[4];
1721 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1722
1723 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1724 bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1725 }
1726 }
1727 }
1728 else // filter == vk::VK_FILTER_LINEAR
1729 {
1730 const Interval lumaA(calculateAB(subTexelPrecisionBits, u, i));
1731 const Interval lumaB(calculateAB(subTexelPrecisionBits, v, j));
1732
1733 const Interval gValue(linearSample(gAccess, conversionFormat[1], filteringFormat[1], addressModeU,
1734 addressModeV, IVec2(i, j), lumaA, lumaB));
1735 const Interval aValue(linearSample(aAccess, conversionFormat[3], filteringFormat[3], addressModeU,
1736 addressModeV, IVec2(i, j), lumaA, lumaB));
1737
1738 if (explicitReconstruction || !(subsampledX || subsampledY))
1739 {
1740 Interval rValue, bValue;
1741 if (chromaFilter == vk::VK_FILTER_NEAREST || !subsampledX)
1742 {
1743 rValue = linearInterpolate(
1744 filteringFormat[0], lumaA, lumaB,
1745 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1746 IVec2(i / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1747 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1748 IVec2((i + 1) / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1749 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1750 IVec2(i / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))),
1751 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1752 IVec2((i + 1) / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))));
1753 bValue = linearInterpolate(
1754 filteringFormat[2], lumaA, lumaB,
1755 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1756 IVec2(i / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1757 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1758 IVec2((i + 1) / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1759 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1760 IVec2(i / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))),
1761 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1762 IVec2((i + 1) / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))));
1763 }
1764 else // vk::VK_FILTER_LINEAR
1765 {
1766 if (subsampledY)
1767 {
1768 // Linear, Reconstructed xx chroma samples with explicit linear filtering
1769 rValue = linearInterpolate(
1770 filteringFormat[0], lumaA, lumaB,
1771 reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1772 xChromaOffset, yChromaOffset, addressModeU,
1773 addressModeV, rAccess, i, j),
1774 reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1775 xChromaOffset, yChromaOffset, addressModeU,
1776 addressModeV, rAccess, i + 1, j),
1777 reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1778 xChromaOffset, yChromaOffset, addressModeU,
1779 addressModeV, rAccess, i, j + 1),
1780 reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1781 xChromaOffset, yChromaOffset, addressModeU,
1782 addressModeV, rAccess, i + 1, j + 1));
1783 bValue = linearInterpolate(
1784 filteringFormat[2], lumaA, lumaB,
1785 reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1786 xChromaOffset, yChromaOffset, addressModeU,
1787 addressModeV, bAccess, i, j),
1788 reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1789 xChromaOffset, yChromaOffset, addressModeU,
1790 addressModeV, bAccess, i + 1, j),
1791 reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1792 xChromaOffset, yChromaOffset, addressModeU,
1793 addressModeV, bAccess, i, j + 1),
1794 reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1795 xChromaOffset, yChromaOffset, addressModeU,
1796 addressModeV, bAccess, i + 1, j + 1));
1797 }
1798 else
1799 {
1800 // Linear, Reconstructed x chroma samples with explicit linear filtering
1801 rValue = linearInterpolate(
1802 filteringFormat[0], lumaA, lumaB,
1803 reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1804 xChromaOffset, addressModeU, addressModeV, rAccess,
1805 i, j),
1806 reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1807 xChromaOffset, addressModeU, addressModeV, rAccess,
1808 i + 1, j),
1809 reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1810 xChromaOffset, addressModeU, addressModeV, rAccess,
1811 i, j + 1),
1812 reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1813 xChromaOffset, addressModeU, addressModeV, rAccess,
1814 i + 1, j + 1));
1815 bValue = linearInterpolate(
1816 filteringFormat[2], lumaA, lumaB,
1817 reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1818 xChromaOffset, addressModeU, addressModeV, bAccess,
1819 i, j),
1820 reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1821 xChromaOffset, addressModeU, addressModeV, bAccess,
1822 i + 1, j),
1823 reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1824 xChromaOffset, addressModeU, addressModeV, bAccess,
1825 i, j + 1),
1826 reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1827 xChromaOffset, addressModeU, addressModeV, bAccess,
1828 i + 1, j + 1));
1829 }
1830 }
1831
1832 const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1833 Interval dstColor[4];
1834
1835 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1836
1837 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1838 bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1839 }
1840 else
1841 {
1842 const Interval chromaU(subsampledX ? calculateImplicitChromaUV(coordFormat, xChromaOffset, u) :
1843 u);
1844 const Interval chromaV(subsampledY ? calculateImplicitChromaUV(coordFormat, yChromaOffset, v) :
1845 v);
1846
1847 // TODO: It looks incorrect to ignore the chroma filter here. Is it?
1848 const IVec2 chromaIRange(calculateNearestIJRange(coordFormat, chromaU));
1849 const IVec2 chromaJRange(calculateNearestIJRange(coordFormat, chromaV));
1850
1851 for (int chromaJ = chromaJRange.x(); chromaJ <= chromaJRange.y(); chromaJ++)
1852 for (int chromaI = chromaIRange.x(); chromaI <= chromaIRange.y(); chromaI++)
1853 {
1854 Interval rValue, bValue;
1855
1856 if (chromaFilter == vk::VK_FILTER_NEAREST)
1857 {
1858 rValue = lookupWrapped(rAccess, conversionFormat[1], addressModeU, addressModeV,
1859 IVec2(chromaI, chromaJ));
1860 bValue = lookupWrapped(bAccess, conversionFormat[3], addressModeU, addressModeV,
1861 IVec2(chromaI, chromaJ));
1862 }
1863 else // vk::VK_FILTER_LINEAR
1864 {
1865 const Interval chromaA(calculateAB(subTexelPrecisionBits, chromaU, chromaI));
1866 const Interval chromaB(calculateAB(subTexelPrecisionBits, chromaV, chromaJ));
1867
1868 rValue =
1869 linearSample(rAccess, conversionFormat[0], filteringFormat[0], addressModeU,
1870 addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1871 bValue =
1872 linearSample(bAccess, conversionFormat[2], filteringFormat[2], addressModeU,
1873 addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1874 }
1875
1876 const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1877 Interval dstColor[4];
1878 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1879
1880 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1881 bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1882 }
1883 }
1884 }
1885 }
1886
1887 minBounds[ndx] =
1888 Vec4((float)bounds[0].lo(), (float)bounds[1].lo(), (float)bounds[2].lo(), (float)bounds[3].lo());
1889 maxBounds[ndx] =
1890 Vec4((float)bounds[0].hi(), (float)bounds[1].hi(), (float)bounds[2].hi(), (float)bounds[3].hi());
1891 }
1892 }
1893
1894 } // namespace ycbcr
1895
1896 } // namespace vkt
1897