xref: /aosp_15_r20/external/deqp/framework/common/tcuAstcUtil.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program Tester Core
3  * ----------------------------------------
4  *
5  * Copyright 2016 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief ASTC Utilities.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "tcuAstcUtil.hpp"
25 #include "deFloat16.h"
26 #include "deRandom.hpp"
27 #include "deMeta.hpp"
28 
29 #include <algorithm>
30 
31 namespace tcu
32 {
33 namespace astc
34 {
35 
36 using std::vector;
37 
38 namespace
39 {
40 
41 // Common utilities
42 
43 enum
44 {
45     MAX_BLOCK_WIDTH  = 12,
46     MAX_BLOCK_HEIGHT = 12
47 };
48 
getBit(uint32_t src,int ndx)49 inline uint32_t getBit(uint32_t src, int ndx)
50 {
51     DE_ASSERT(de::inBounds(ndx, 0, 32));
52     return (src >> ndx) & 1;
53 }
54 
getBits(uint32_t src,int low,int high)55 inline uint32_t getBits(uint32_t src, int low, int high)
56 {
57     const int numBits = (high - low) + 1;
58 
59     DE_ASSERT(de::inRange(numBits, 1, 32));
60 
61     if (numBits < 32)
62         return (uint32_t)((src >> low) & ((1u << numBits) - 1));
63     else
64         return (uint32_t)((src >> low) & 0xFFFFFFFFu);
65 }
66 
isBitSet(uint32_t src,int ndx)67 inline bool isBitSet(uint32_t src, int ndx)
68 {
69     return getBit(src, ndx) != 0;
70 }
71 
reverseBits(uint32_t src,int numBits)72 inline uint32_t reverseBits(uint32_t src, int numBits)
73 {
74     DE_ASSERT(de::inRange(numBits, 0, 32));
75     uint32_t result = 0;
76     for (int i = 0; i < numBits; i++)
77         result |= ((src >> i) & 1) << (numBits - 1 - i);
78     return result;
79 }
80 
bitReplicationScale(uint32_t src,int numSrcBits,int numDstBits)81 inline uint32_t bitReplicationScale(uint32_t src, int numSrcBits, int numDstBits)
82 {
83     DE_ASSERT(numSrcBits <= numDstBits);
84     DE_ASSERT((src & ((1 << numSrcBits) - 1)) == src);
85     uint32_t dst = 0;
86     for (int shift = numDstBits - numSrcBits; shift > -numSrcBits; shift -= numSrcBits)
87         dst |= shift >= 0 ? src << shift : src >> -shift;
88     return dst;
89 }
90 
signExtend(int32_t src,int numSrcBits)91 inline int32_t signExtend(int32_t src, int numSrcBits)
92 {
93     DE_ASSERT(de::inRange(numSrcBits, 2, 31));
94     const bool negative = (src & (1 << (numSrcBits - 1))) != 0;
95     return src | (negative ? ~((1 << numSrcBits) - 1) : 0);
96 }
97 
isFloat16InfOrNan(deFloat16 v)98 inline bool isFloat16InfOrNan(deFloat16 v)
99 {
100     return getBits(v, 10, 14) == 31;
101 }
102 
103 enum ISEMode
104 {
105     ISEMODE_TRIT = 0,
106     ISEMODE_QUINT,
107     ISEMODE_PLAIN_BIT,
108 
109     ISEMODE_LAST
110 };
111 
112 struct ISEParams
113 {
114     ISEMode mode;
115     int numBits;
116 
ISEParamstcu::astc::__anone0fc7f7b0111::ISEParams117     ISEParams(ISEMode mode_, int numBits_) : mode(mode_), numBits(numBits_)
118     {
119     }
120 };
121 
computeNumRequiredBits(const ISEParams & iseParams,int numValues)122 inline int computeNumRequiredBits(const ISEParams &iseParams, int numValues)
123 {
124     switch (iseParams.mode)
125     {
126     case ISEMODE_TRIT:
127         return deDivRoundUp32(numValues * 8, 5) + numValues * iseParams.numBits;
128     case ISEMODE_QUINT:
129         return deDivRoundUp32(numValues * 7, 3) + numValues * iseParams.numBits;
130     case ISEMODE_PLAIN_BIT:
131         return numValues * iseParams.numBits;
132     default:
133         DE_ASSERT(false);
134         return -1;
135     }
136 }
137 
computeMaximumRangeISEParams(int numAvailableBits,int numValuesInSequence)138 ISEParams computeMaximumRangeISEParams(int numAvailableBits, int numValuesInSequence)
139 {
140     int curBitsForTritMode     = 6;
141     int curBitsForQuintMode    = 5;
142     int curBitsForPlainBitMode = 8;
143 
144     while (true)
145     {
146         DE_ASSERT(curBitsForTritMode > 0 || curBitsForQuintMode > 0 || curBitsForPlainBitMode > 0);
147 
148         const int tritRange     = curBitsForTritMode > 0 ? (3 << curBitsForTritMode) - 1 : -1;
149         const int quintRange    = curBitsForQuintMode > 0 ? (5 << curBitsForQuintMode) - 1 : -1;
150         const int plainBitRange = curBitsForPlainBitMode > 0 ? (1 << curBitsForPlainBitMode) - 1 : -1;
151         const int maxRange      = de::max(de::max(tritRange, quintRange), plainBitRange);
152 
153         if (maxRange == tritRange)
154         {
155             const ISEParams params(ISEMODE_TRIT, curBitsForTritMode);
156             if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
157                 return ISEParams(ISEMODE_TRIT, curBitsForTritMode);
158             curBitsForTritMode--;
159         }
160         else if (maxRange == quintRange)
161         {
162             const ISEParams params(ISEMODE_QUINT, curBitsForQuintMode);
163             if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
164                 return ISEParams(ISEMODE_QUINT, curBitsForQuintMode);
165             curBitsForQuintMode--;
166         }
167         else
168         {
169             const ISEParams params(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
170             DE_ASSERT(maxRange == plainBitRange);
171             if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
172                 return ISEParams(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
173             curBitsForPlainBitMode--;
174         }
175     }
176 }
177 
computeNumColorEndpointValues(uint32_t endpointMode)178 inline int computeNumColorEndpointValues(uint32_t endpointMode)
179 {
180     DE_ASSERT(endpointMode < 16);
181     return (endpointMode / 4 + 1) * 2;
182 }
183 
184 // Decompression utilities
185 
186 enum DecompressResult
187 {
188     DECOMPRESS_RESULT_VALID_BLOCK = 0, //!< Decompressed valid block
189     DECOMPRESS_RESULT_ERROR,           //!< Encountered error while decompressing, error color written
190 
191     DECOMPRESS_RESULT_LAST
192 };
193 
194 // A helper for getting bits from a 128-bit block.
195 class Block128
196 {
197 private:
198     typedef uint64_t Word;
199 
200     enum
201     {
202         WORD_BYTES = sizeof(Word),
203         WORD_BITS  = 8 * WORD_BYTES,
204         NUM_WORDS  = 128 / WORD_BITS
205     };
206 
207     DE_STATIC_ASSERT(128 % WORD_BITS == 0);
208 
209 public:
Block128(const uint8_t * src)210     Block128(const uint8_t *src)
211     {
212         for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
213         {
214             m_words[wordNdx] = 0;
215             for (int byteNdx = 0; byteNdx < WORD_BYTES; byteNdx++)
216                 m_words[wordNdx] |= (Word)src[wordNdx * WORD_BYTES + byteNdx] << (8 * byteNdx);
217         }
218     }
219 
getBit(int ndx) const220     uint32_t getBit(int ndx) const
221     {
222         DE_ASSERT(de::inBounds(ndx, 0, 128));
223         return (m_words[ndx / WORD_BITS] >> (ndx % WORD_BITS)) & 1;
224     }
225 
getBits(int low,int high) const226     uint32_t getBits(int low, int high) const
227     {
228         DE_ASSERT(de::inBounds(low, 0, 128));
229         DE_ASSERT(de::inBounds(high, 0, 128));
230         DE_ASSERT(de::inRange(high - low + 1, 0, 32));
231 
232         if (high - low + 1 == 0)
233             return 0;
234 
235         const int word0Ndx = low / WORD_BITS;
236         const int word1Ndx = high / WORD_BITS;
237 
238         // \note "foo << bar << 1" done instead of "foo << (bar+1)" to avoid overflow, i.e. shift amount being too big.
239 
240         if (word0Ndx == word1Ndx)
241             return (uint32_t)((m_words[word0Ndx] & ((((Word)1 << high % WORD_BITS << 1) - 1))) >>
242                               ((Word)low % WORD_BITS));
243         else
244         {
245             DE_ASSERT(word1Ndx == word0Ndx + 1);
246 
247             return (uint32_t)(m_words[word0Ndx] >> (low % WORD_BITS)) |
248                    (uint32_t)((m_words[word1Ndx] & (((Word)1 << high % WORD_BITS << 1) - 1))
249                               << (high - low - high % WORD_BITS));
250         }
251     }
252 
isBitSet(int ndx) const253     bool isBitSet(int ndx) const
254     {
255         DE_ASSERT(de::inBounds(ndx, 0, 128));
256         return getBit(ndx) != 0;
257     }
258 
259 private:
260     Word m_words[NUM_WORDS];
261 };
262 
263 // A helper for sequential access into a Block128.
264 class BitAccessStream
265 {
266 public:
BitAccessStream(const Block128 & src,int startNdxInSrc,int length,bool forward)267     BitAccessStream(const Block128 &src, int startNdxInSrc, int length, bool forward)
268         : m_src(src)
269         , m_startNdxInSrc(startNdxInSrc)
270         , m_length(length)
271         , m_forward(forward)
272         , m_ndx(0)
273     {
274     }
275 
276     // Get the next num bits. Bits at positions greater than or equal to m_length are zeros.
getNext(int num)277     uint32_t getNext(int num)
278     {
279         if (num == 0 || m_ndx >= m_length)
280             return 0;
281 
282         const int end            = m_ndx + num;
283         const int numBitsFromSrc = de::max(0, de::min(m_length, end) - m_ndx);
284         const int low            = m_ndx;
285         const int high           = m_ndx + numBitsFromSrc - 1;
286 
287         m_ndx += num;
288 
289         return m_forward ? m_src.getBits(m_startNdxInSrc + low, m_startNdxInSrc + high) :
290                            reverseBits(m_src.getBits(m_startNdxInSrc - high, m_startNdxInSrc - low), numBitsFromSrc);
291     }
292 
293 private:
294     const Block128 &m_src;
295     const int m_startNdxInSrc;
296     const int m_length;
297     const bool m_forward;
298 
299     int m_ndx;
300 };
301 
302 struct ISEDecodedResult
303 {
304     uint32_t m;
305     uint32_t tq; //!< Trit or quint value, depending on ISE mode.
306     uint32_t v;
307 };
308 
309 // Data from an ASTC block's "block mode" part (i.e. bits [0,10]).
310 struct ASTCBlockMode
311 {
312     bool isError;
313     // \note Following fields only relevant if !isError.
314     bool isVoidExtent;
315     // \note Following fields only relevant if !isVoidExtent.
316     bool isDualPlane;
317     int weightGridWidth;
318     int weightGridHeight;
319     ISEParams weightISEParams;
320 
ASTCBlockModetcu::astc::__anone0fc7f7b0111::ASTCBlockMode321     ASTCBlockMode(void)
322         : isError(true)
323         , isVoidExtent(true)
324         , isDualPlane(true)
325         , weightGridWidth(-1)
326         , weightGridHeight(-1)
327         , weightISEParams(ISEMODE_LAST, -1)
328     {
329     }
330 };
331 
computeNumWeights(const ASTCBlockMode & mode)332 inline int computeNumWeights(const ASTCBlockMode &mode)
333 {
334     return mode.weightGridWidth * mode.weightGridHeight * (mode.isDualPlane ? 2 : 1);
335 }
336 
337 struct ColorEndpointPair
338 {
339     UVec4 e0;
340     UVec4 e1;
341 };
342 
343 struct TexelWeightPair
344 {
345     uint32_t w[2];
346 };
347 
getASTCBlockMode(uint32_t blockModeData)348 ASTCBlockMode getASTCBlockMode(uint32_t blockModeData)
349 {
350     ASTCBlockMode blockMode;
351     blockMode.isError = true; // \note Set to false later, if not error.
352 
353     blockMode.isVoidExtent = getBits(blockModeData, 0, 8) == 0x1fc;
354 
355     if (!blockMode.isVoidExtent)
356     {
357         if ((getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 6, 8) == 7) ||
358             getBits(blockModeData, 0, 3) == 0)
359             return blockMode; // Invalid ("reserved").
360 
361         uint32_t r = (uint32_t)-1; // \note Set in the following branches.
362 
363         if (getBits(blockModeData, 0, 1) == 0)
364         {
365             const uint32_t r0  = getBit(blockModeData, 4);
366             const uint32_t r1  = getBit(blockModeData, 2);
367             const uint32_t r2  = getBit(blockModeData, 3);
368             const uint32_t i78 = getBits(blockModeData, 7, 8);
369 
370             r = (r2 << 2) | (r1 << 1) | (r0 << 0);
371 
372             if (i78 == 3)
373             {
374                 const bool i5              = isBitSet(blockModeData, 5);
375                 blockMode.weightGridWidth  = i5 ? 10 : 6;
376                 blockMode.weightGridHeight = i5 ? 6 : 10;
377             }
378             else
379             {
380                 const uint32_t a = getBits(blockModeData, 5, 6);
381                 switch (i78)
382                 {
383                 case 0:
384                     blockMode.weightGridWidth  = 12;
385                     blockMode.weightGridHeight = a + 2;
386                     break;
387                 case 1:
388                     blockMode.weightGridWidth  = a + 2;
389                     blockMode.weightGridHeight = 12;
390                     break;
391                 case 2:
392                     blockMode.weightGridWidth  = a + 6;
393                     blockMode.weightGridHeight = getBits(blockModeData, 9, 10) + 6;
394                     break;
395                 default:
396                     DE_ASSERT(false);
397                 }
398             }
399         }
400         else
401         {
402             const uint32_t r0  = getBit(blockModeData, 4);
403             const uint32_t r1  = getBit(blockModeData, 0);
404             const uint32_t r2  = getBit(blockModeData, 1);
405             const uint32_t i23 = getBits(blockModeData, 2, 3);
406             const uint32_t a   = getBits(blockModeData, 5, 6);
407 
408             r = (r2 << 2) | (r1 << 1) | (r0 << 0);
409 
410             if (i23 == 3)
411             {
412                 const uint32_t b           = getBit(blockModeData, 7);
413                 const bool i8              = isBitSet(blockModeData, 8);
414                 blockMode.weightGridWidth  = i8 ? b + 2 : a + 2;
415                 blockMode.weightGridHeight = i8 ? a + 2 : b + 6;
416             }
417             else
418             {
419                 const uint32_t b = getBits(blockModeData, 7, 8);
420 
421                 switch (i23)
422                 {
423                 case 0:
424                     blockMode.weightGridWidth  = b + 4;
425                     blockMode.weightGridHeight = a + 2;
426                     break;
427                 case 1:
428                     blockMode.weightGridWidth  = b + 8;
429                     blockMode.weightGridHeight = a + 2;
430                     break;
431                 case 2:
432                     blockMode.weightGridWidth  = a + 2;
433                     blockMode.weightGridHeight = b + 8;
434                     break;
435                 default:
436                     DE_ASSERT(false);
437                 }
438             }
439         }
440 
441         const bool zeroDH     = getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 7, 8) == 2;
442         const bool h          = zeroDH ? 0 : isBitSet(blockModeData, 9);
443         blockMode.isDualPlane = zeroDH ? 0 : isBitSet(blockModeData, 10);
444 
445         {
446             ISEMode &m = blockMode.weightISEParams.mode;
447             int &b     = blockMode.weightISEParams.numBits;
448             m          = ISEMODE_PLAIN_BIT;
449             b          = 0;
450 
451             if (h)
452             {
453                 switch (r)
454                 {
455                 case 2:
456                     m = ISEMODE_QUINT;
457                     b = 1;
458                     break;
459                 case 3:
460                     m = ISEMODE_TRIT;
461                     b = 2;
462                     break;
463                 case 4:
464                     b = 4;
465                     break;
466                 case 5:
467                     m = ISEMODE_QUINT;
468                     b = 2;
469                     break;
470                 case 6:
471                     m = ISEMODE_TRIT;
472                     b = 3;
473                     break;
474                 case 7:
475                     b = 5;
476                     break;
477                 default:
478                     DE_ASSERT(false);
479                 }
480             }
481             else
482             {
483                 switch (r)
484                 {
485                 case 2:
486                     b = 1;
487                     break;
488                 case 3:
489                     m = ISEMODE_TRIT;
490                     break;
491                 case 4:
492                     b = 2;
493                     break;
494                 case 5:
495                     m = ISEMODE_QUINT;
496                     break;
497                 case 6:
498                     m = ISEMODE_TRIT;
499                     b = 1;
500                     break;
501                 case 7:
502                     b = 3;
503                     break;
504                 default:
505                     DE_ASSERT(false);
506                 }
507             }
508         }
509     }
510 
511     blockMode.isError = false;
512     return blockMode;
513 }
514 
setASTCErrorColorBlock(void * dst,int blockWidth,int blockHeight,bool isSRGB)515 inline void setASTCErrorColorBlock(void *dst, int blockWidth, int blockHeight, bool isSRGB)
516 {
517     if (isSRGB)
518     {
519         uint8_t *const dstU = (uint8_t *)dst;
520 
521         for (int i = 0; i < blockWidth * blockHeight; i++)
522         {
523             dstU[4 * i + 0] = 0xff;
524             dstU[4 * i + 1] = 0;
525             dstU[4 * i + 2] = 0xff;
526             dstU[4 * i + 3] = 0xff;
527         }
528     }
529     else
530     {
531         float *const dstF = (float *)dst;
532 
533         for (int i = 0; i < blockWidth * blockHeight; i++)
534         {
535             dstF[4 * i + 0] = 1.0f;
536             dstF[4 * i + 1] = 0.0f;
537             dstF[4 * i + 2] = 1.0f;
538             dstF[4 * i + 3] = 1.0f;
539         }
540     }
541 }
542 
decodeVoidExtentBlock(void * dst,const Block128 & blockData,int blockWidth,int blockHeight,bool isSRGB,bool isLDRMode)543 DecompressResult decodeVoidExtentBlock(void *dst, const Block128 &blockData, int blockWidth, int blockHeight,
544                                        bool isSRGB, bool isLDRMode)
545 {
546     const uint32_t minSExtent = blockData.getBits(12, 24);
547     const uint32_t maxSExtent = blockData.getBits(25, 37);
548     const uint32_t minTExtent = blockData.getBits(38, 50);
549     const uint32_t maxTExtent = blockData.getBits(51, 63);
550     const bool allExtentsAllOnes =
551         minSExtent == 0x1fff && maxSExtent == 0x1fff && minTExtent == 0x1fff && maxTExtent == 0x1fff;
552     const bool isHDRBlock = blockData.isBitSet(9);
553 
554     if ((isLDRMode && isHDRBlock) || (!allExtentsAllOnes && (minSExtent >= maxSExtent || minTExtent >= maxTExtent)))
555     {
556         setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
557         return DECOMPRESS_RESULT_ERROR;
558     }
559 
560     const uint32_t rgba[4] = {blockData.getBits(64, 79), blockData.getBits(80, 95), blockData.getBits(96, 111),
561                               blockData.getBits(112, 127)};
562 
563     if (isSRGB)
564     {
565         uint8_t *const dstU = (uint8_t *)dst;
566         for (int i = 0; i < blockWidth * blockHeight; i++)
567             for (int c = 0; c < 4; c++)
568                 dstU[i * 4 + c] = (uint8_t)((rgba[c] & 0xff00) >> 8);
569     }
570     else
571     {
572         float *const dstF = (float *)dst;
573 
574         if (isHDRBlock)
575         {
576             for (int c = 0; c < 4; c++)
577             {
578                 if (isFloat16InfOrNan((deFloat16)rgba[c]))
579                     throw InternalError("Infinity or NaN color component in HDR void extent block in ASTC texture "
580                                         "(behavior undefined by ASTC specification)");
581             }
582 
583             for (int i = 0; i < blockWidth * blockHeight; i++)
584                 for (int c = 0; c < 4; c++)
585                     dstF[i * 4 + c] = deFloat16To32((deFloat16)rgba[c]);
586         }
587         else
588         {
589             for (int i = 0; i < blockWidth * blockHeight; i++)
590                 for (int c = 0; c < 4; c++)
591                     dstF[i * 4 + c] = rgba[c] == 65535 ? 1.0f : (float)rgba[c] / 65536.0f;
592         }
593     }
594 
595     return DECOMPRESS_RESULT_VALID_BLOCK;
596 }
597 
decodeColorEndpointModes(uint32_t * endpointModesDst,const Block128 & blockData,int numPartitions,int extraCemBitsStart)598 void decodeColorEndpointModes(uint32_t *endpointModesDst, const Block128 &blockData, int numPartitions,
599                               int extraCemBitsStart)
600 {
601     if (numPartitions == 1)
602         endpointModesDst[0] = blockData.getBits(13, 16);
603     else
604     {
605         const uint32_t highLevelSelector = blockData.getBits(23, 24);
606 
607         if (highLevelSelector == 0)
608         {
609             const uint32_t mode = blockData.getBits(25, 28);
610             for (int i = 0; i < numPartitions; i++)
611                 endpointModesDst[i] = mode;
612         }
613         else
614         {
615             for (int partNdx = 0; partNdx < numPartitions; partNdx++)
616             {
617                 const uint32_t cemClass   = highLevelSelector - (blockData.isBitSet(25 + partNdx) ? 0 : 1);
618                 const uint32_t lowBit0Ndx = numPartitions + 2 * partNdx;
619                 const uint32_t lowBit1Ndx = numPartitions + 2 * partNdx + 1;
620                 const uint32_t lowBit0 =
621                     blockData.getBit(lowBit0Ndx < 4 ? 25 + lowBit0Ndx : extraCemBitsStart + lowBit0Ndx - 4);
622                 const uint32_t lowBit1 =
623                     blockData.getBit(lowBit1Ndx < 4 ? 25 + lowBit1Ndx : extraCemBitsStart + lowBit1Ndx - 4);
624 
625                 endpointModesDst[partNdx] = (cemClass << 2) | (lowBit1 << 1) | lowBit0;
626             }
627         }
628     }
629 }
630 
computeNumColorEndpointValues(const uint32_t * endpointModes,int numPartitions)631 int computeNumColorEndpointValues(const uint32_t *endpointModes, int numPartitions)
632 {
633     int result = 0;
634     for (int i = 0; i < numPartitions; i++)
635         result += computeNumColorEndpointValues(endpointModes[i]);
636     return result;
637 }
638 
decodeISETritBlock(ISEDecodedResult * dst,int numValues,BitAccessStream & data,int numBits)639 void decodeISETritBlock(ISEDecodedResult *dst, int numValues, BitAccessStream &data, int numBits)
640 {
641     DE_ASSERT(de::inRange(numValues, 1, 5));
642 
643     uint32_t m[5];
644 
645     m[0]         = data.getNext(numBits);
646     uint32_t T01 = data.getNext(2);
647     m[1]         = data.getNext(numBits);
648     uint32_t T23 = data.getNext(2);
649     m[2]         = data.getNext(numBits);
650     uint32_t T4  = data.getNext(1);
651     m[3]         = data.getNext(numBits);
652     uint32_t T56 = data.getNext(2);
653     m[4]         = data.getNext(numBits);
654     uint32_t T7  = data.getNext(1);
655 
656     switch (numValues)
657     {
658     case 1:
659         T23 = 0;
660     // Fallthrough
661     case 2:
662         T4 = 0;
663     // Fallthrough
664     case 3:
665         T56 = 0;
666     // Fallthrough
667     case 4:
668         T7 = 0;
669     // Fallthrough
670     case 5:
671         break;
672     default:
673         DE_ASSERT(false);
674     }
675 
676     const uint32_t T = (T7 << 7) | (T56 << 5) | (T4 << 4) | (T23 << 2) | (T01 << 0);
677 
678     static const uint32_t tritsFromT[256][5] = {
679         {0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0}, {0, 1, 0, 0, 0}, {1, 1, 0, 0, 0},
680         {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0}, {0, 2, 0, 0, 0}, {1, 2, 0, 0, 0}, {2, 2, 0, 0, 0}, {2, 0, 2, 0, 0},
681         {0, 2, 2, 0, 0}, {1, 2, 2, 0, 0}, {2, 2, 2, 0, 0}, {2, 0, 2, 0, 0}, {0, 0, 1, 0, 0}, {1, 0, 1, 0, 0},
682         {2, 0, 1, 0, 0}, {0, 1, 2, 0, 0}, {0, 1, 1, 0, 0}, {1, 1, 1, 0, 0}, {2, 1, 1, 0, 0}, {1, 1, 2, 0, 0},
683         {0, 2, 1, 0, 0}, {1, 2, 1, 0, 0}, {2, 2, 1, 0, 0}, {2, 1, 2, 0, 0}, {0, 0, 0, 2, 2}, {1, 0, 0, 2, 2},
684         {2, 0, 0, 2, 2}, {0, 0, 2, 2, 2}, {0, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {2, 0, 0, 1, 0}, {0, 0, 2, 1, 0},
685         {0, 1, 0, 1, 0}, {1, 1, 0, 1, 0}, {2, 1, 0, 1, 0}, {1, 0, 2, 1, 0}, {0, 2, 0, 1, 0}, {1, 2, 0, 1, 0},
686         {2, 2, 0, 1, 0}, {2, 0, 2, 1, 0}, {0, 2, 2, 1, 0}, {1, 2, 2, 1, 0}, {2, 2, 2, 1, 0}, {2, 0, 2, 1, 0},
687         {0, 0, 1, 1, 0}, {1, 0, 1, 1, 0}, {2, 0, 1, 1, 0}, {0, 1, 2, 1, 0}, {0, 1, 1, 1, 0}, {1, 1, 1, 1, 0},
688         {2, 1, 1, 1, 0}, {1, 1, 2, 1, 0}, {0, 2, 1, 1, 0}, {1, 2, 1, 1, 0}, {2, 2, 1, 1, 0}, {2, 1, 2, 1, 0},
689         {0, 1, 0, 2, 2}, {1, 1, 0, 2, 2}, {2, 1, 0, 2, 2}, {1, 0, 2, 2, 2}, {0, 0, 0, 2, 0}, {1, 0, 0, 2, 0},
690         {2, 0, 0, 2, 0}, {0, 0, 2, 2, 0}, {0, 1, 0, 2, 0}, {1, 1, 0, 2, 0}, {2, 1, 0, 2, 0}, {1, 0, 2, 2, 0},
691         {0, 2, 0, 2, 0}, {1, 2, 0, 2, 0}, {2, 2, 0, 2, 0}, {2, 0, 2, 2, 0}, {0, 2, 2, 2, 0}, {1, 2, 2, 2, 0},
692         {2, 2, 2, 2, 0}, {2, 0, 2, 2, 0}, {0, 0, 1, 2, 0}, {1, 0, 1, 2, 0}, {2, 0, 1, 2, 0}, {0, 1, 2, 2, 0},
693         {0, 1, 1, 2, 0}, {1, 1, 1, 2, 0}, {2, 1, 1, 2, 0}, {1, 1, 2, 2, 0}, {0, 2, 1, 2, 0}, {1, 2, 1, 2, 0},
694         {2, 2, 1, 2, 0}, {2, 1, 2, 2, 0}, {0, 2, 0, 2, 2}, {1, 2, 0, 2, 2}, {2, 2, 0, 2, 2}, {2, 0, 2, 2, 2},
695         {0, 0, 0, 0, 2}, {1, 0, 0, 0, 2}, {2, 0, 0, 0, 2}, {0, 0, 2, 0, 2}, {0, 1, 0, 0, 2}, {1, 1, 0, 0, 2},
696         {2, 1, 0, 0, 2}, {1, 0, 2, 0, 2}, {0, 2, 0, 0, 2}, {1, 2, 0, 0, 2}, {2, 2, 0, 0, 2}, {2, 0, 2, 0, 2},
697         {0, 2, 2, 0, 2}, {1, 2, 2, 0, 2}, {2, 2, 2, 0, 2}, {2, 0, 2, 0, 2}, {0, 0, 1, 0, 2}, {1, 0, 1, 0, 2},
698         {2, 0, 1, 0, 2}, {0, 1, 2, 0, 2}, {0, 1, 1, 0, 2}, {1, 1, 1, 0, 2}, {2, 1, 1, 0, 2}, {1, 1, 2, 0, 2},
699         {0, 2, 1, 0, 2}, {1, 2, 1, 0, 2}, {2, 2, 1, 0, 2}, {2, 1, 2, 0, 2}, {0, 2, 2, 2, 2}, {1, 2, 2, 2, 2},
700         {2, 2, 2, 2, 2}, {2, 0, 2, 2, 2}, {0, 0, 0, 0, 1}, {1, 0, 0, 0, 1}, {2, 0, 0, 0, 1}, {0, 0, 2, 0, 1},
701         {0, 1, 0, 0, 1}, {1, 1, 0, 0, 1}, {2, 1, 0, 0, 1}, {1, 0, 2, 0, 1}, {0, 2, 0, 0, 1}, {1, 2, 0, 0, 1},
702         {2, 2, 0, 0, 1}, {2, 0, 2, 0, 1}, {0, 2, 2, 0, 1}, {1, 2, 2, 0, 1}, {2, 2, 2, 0, 1}, {2, 0, 2, 0, 1},
703         {0, 0, 1, 0, 1}, {1, 0, 1, 0, 1}, {2, 0, 1, 0, 1}, {0, 1, 2, 0, 1}, {0, 1, 1, 0, 1}, {1, 1, 1, 0, 1},
704         {2, 1, 1, 0, 1}, {1, 1, 2, 0, 1}, {0, 2, 1, 0, 1}, {1, 2, 1, 0, 1}, {2, 2, 1, 0, 1}, {2, 1, 2, 0, 1},
705         {0, 0, 1, 2, 2}, {1, 0, 1, 2, 2}, {2, 0, 1, 2, 2}, {0, 1, 2, 2, 2}, {0, 0, 0, 1, 1}, {1, 0, 0, 1, 1},
706         {2, 0, 0, 1, 1}, {0, 0, 2, 1, 1}, {0, 1, 0, 1, 1}, {1, 1, 0, 1, 1}, {2, 1, 0, 1, 1}, {1, 0, 2, 1, 1},
707         {0, 2, 0, 1, 1}, {1, 2, 0, 1, 1}, {2, 2, 0, 1, 1}, {2, 0, 2, 1, 1}, {0, 2, 2, 1, 1}, {1, 2, 2, 1, 1},
708         {2, 2, 2, 1, 1}, {2, 0, 2, 1, 1}, {0, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}, {0, 1, 2, 1, 1},
709         {0, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {1, 1, 2, 1, 1}, {0, 2, 1, 1, 1}, {1, 2, 1, 1, 1},
710         {2, 2, 1, 1, 1}, {2, 1, 2, 1, 1}, {0, 1, 1, 2, 2}, {1, 1, 1, 2, 2}, {2, 1, 1, 2, 2}, {1, 1, 2, 2, 2},
711         {0, 0, 0, 2, 1}, {1, 0, 0, 2, 1}, {2, 0, 0, 2, 1}, {0, 0, 2, 2, 1}, {0, 1, 0, 2, 1}, {1, 1, 0, 2, 1},
712         {2, 1, 0, 2, 1}, {1, 0, 2, 2, 1}, {0, 2, 0, 2, 1}, {1, 2, 0, 2, 1}, {2, 2, 0, 2, 1}, {2, 0, 2, 2, 1},
713         {0, 2, 2, 2, 1}, {1, 2, 2, 2, 1}, {2, 2, 2, 2, 1}, {2, 0, 2, 2, 1}, {0, 0, 1, 2, 1}, {1, 0, 1, 2, 1},
714         {2, 0, 1, 2, 1}, {0, 1, 2, 2, 1}, {0, 1, 1, 2, 1}, {1, 1, 1, 2, 1}, {2, 1, 1, 2, 1}, {1, 1, 2, 2, 1},
715         {0, 2, 1, 2, 1}, {1, 2, 1, 2, 1}, {2, 2, 1, 2, 1}, {2, 1, 2, 2, 1}, {0, 2, 1, 2, 2}, {1, 2, 1, 2, 2},
716         {2, 2, 1, 2, 2}, {2, 1, 2, 2, 2}, {0, 0, 0, 1, 2}, {1, 0, 0, 1, 2}, {2, 0, 0, 1, 2}, {0, 0, 2, 1, 2},
717         {0, 1, 0, 1, 2}, {1, 1, 0, 1, 2}, {2, 1, 0, 1, 2}, {1, 0, 2, 1, 2}, {0, 2, 0, 1, 2}, {1, 2, 0, 1, 2},
718         {2, 2, 0, 1, 2}, {2, 0, 2, 1, 2}, {0, 2, 2, 1, 2}, {1, 2, 2, 1, 2}, {2, 2, 2, 1, 2}, {2, 0, 2, 1, 2},
719         {0, 0, 1, 1, 2}, {1, 0, 1, 1, 2}, {2, 0, 1, 1, 2}, {0, 1, 2, 1, 2}, {0, 1, 1, 1, 2}, {1, 1, 1, 1, 2},
720         {2, 1, 1, 1, 2}, {1, 1, 2, 1, 2}, {0, 2, 1, 1, 2}, {1, 2, 1, 1, 2}, {2, 2, 1, 1, 2}, {2, 1, 2, 1, 2},
721         {0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 1, 2, 2, 2}};
722 
723     const uint32_t(&trits)[5] = tritsFromT[T];
724 
725     for (int i = 0; i < numValues; i++)
726     {
727         dst[i].m  = m[i];
728         dst[i].tq = trits[i];
729         dst[i].v  = (trits[i] << numBits) + m[i];
730     }
731 }
732 
decodeISEQuintBlock(ISEDecodedResult * dst,int numValues,BitAccessStream & data,int numBits)733 void decodeISEQuintBlock(ISEDecodedResult *dst, int numValues, BitAccessStream &data, int numBits)
734 {
735     DE_ASSERT(de::inRange(numValues, 1, 3));
736 
737     uint32_t m[3];
738 
739     m[0]          = data.getNext(numBits);
740     uint32_t Q012 = data.getNext(3);
741     m[1]          = data.getNext(numBits);
742     uint32_t Q34  = data.getNext(2);
743     m[2]          = data.getNext(numBits);
744     uint32_t Q56  = data.getNext(2);
745 
746     switch (numValues)
747     {
748     case 1:
749         Q34 = 0;
750     // Fallthrough
751     case 2:
752         Q56 = 0;
753     // Fallthrough
754     case 3:
755         break;
756     default:
757         DE_ASSERT(false);
758     }
759 
760     const uint32_t Q = (Q56 << 5) | (Q34 << 3) | (Q012 << 0);
761 
762     static const uint32_t quintsFromQ[256][3] = {
763         {0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0}, {4, 0, 0}, {0, 4, 0}, {4, 4, 0}, {4, 4, 4}, {0, 1, 0}, {1, 1, 0},
764         {2, 1, 0}, {3, 1, 0}, {4, 1, 0}, {1, 4, 0}, {4, 4, 1}, {4, 4, 4}, {0, 2, 0}, {1, 2, 0}, {2, 2, 0}, {3, 2, 0},
765         {4, 2, 0}, {2, 4, 0}, {4, 4, 2}, {4, 4, 4}, {0, 3, 0}, {1, 3, 0}, {2, 3, 0}, {3, 3, 0}, {4, 3, 0}, {3, 4, 0},
766         {4, 4, 3}, {4, 4, 4}, {0, 0, 1}, {1, 0, 1}, {2, 0, 1}, {3, 0, 1}, {4, 0, 1}, {0, 4, 1}, {4, 0, 4}, {0, 4, 4},
767         {0, 1, 1}, {1, 1, 1}, {2, 1, 1}, {3, 1, 1}, {4, 1, 1}, {1, 4, 1}, {4, 1, 4}, {1, 4, 4}, {0, 2, 1}, {1, 2, 1},
768         {2, 2, 1}, {3, 2, 1}, {4, 2, 1}, {2, 4, 1}, {4, 2, 4}, {2, 4, 4}, {0, 3, 1}, {1, 3, 1}, {2, 3, 1}, {3, 3, 1},
769         {4, 3, 1}, {3, 4, 1}, {4, 3, 4}, {3, 4, 4}, {0, 0, 2}, {1, 0, 2}, {2, 0, 2}, {3, 0, 2}, {4, 0, 2}, {0, 4, 2},
770         {2, 0, 4}, {3, 0, 4}, {0, 1, 2}, {1, 1, 2}, {2, 1, 2}, {3, 1, 2}, {4, 1, 2}, {1, 4, 2}, {2, 1, 4}, {3, 1, 4},
771         {0, 2, 2}, {1, 2, 2}, {2, 2, 2}, {3, 2, 2}, {4, 2, 2}, {2, 4, 2}, {2, 2, 4}, {3, 2, 4}, {0, 3, 2}, {1, 3, 2},
772         {2, 3, 2}, {3, 3, 2}, {4, 3, 2}, {3, 4, 2}, {2, 3, 4}, {3, 3, 4}, {0, 0, 3}, {1, 0, 3}, {2, 0, 3}, {3, 0, 3},
773         {4, 0, 3}, {0, 4, 3}, {0, 0, 4}, {1, 0, 4}, {0, 1, 3}, {1, 1, 3}, {2, 1, 3}, {3, 1, 3}, {4, 1, 3}, {1, 4, 3},
774         {0, 1, 4}, {1, 1, 4}, {0, 2, 3}, {1, 2, 3}, {2, 2, 3}, {3, 2, 3}, {4, 2, 3}, {2, 4, 3}, {0, 2, 4}, {1, 2, 4},
775         {0, 3, 3}, {1, 3, 3}, {2, 3, 3}, {3, 3, 3}, {4, 3, 3}, {3, 4, 3}, {0, 3, 4}, {1, 3, 4}};
776 
777     const uint32_t(&quints)[3] = quintsFromQ[Q];
778 
779     for (int i = 0; i < numValues; i++)
780     {
781         dst[i].m  = m[i];
782         dst[i].tq = quints[i];
783         dst[i].v  = (quints[i] << numBits) + m[i];
784     }
785 }
786 
decodeISEBitBlock(ISEDecodedResult * dst,BitAccessStream & data,int numBits)787 inline void decodeISEBitBlock(ISEDecodedResult *dst, BitAccessStream &data, int numBits)
788 {
789     dst[0].m = data.getNext(numBits);
790     dst[0].v = dst[0].m;
791 }
792 
decodeISE(ISEDecodedResult * dst,int numValues,BitAccessStream & data,const ISEParams & params)793 void decodeISE(ISEDecodedResult *dst, int numValues, BitAccessStream &data, const ISEParams &params)
794 {
795     if (params.mode == ISEMODE_TRIT)
796     {
797         const int numBlocks = deDivRoundUp32(numValues, 5);
798         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
799         {
800             const int numValuesInBlock = blockNdx == numBlocks - 1 ? numValues - 5 * (numBlocks - 1) : 5;
801             decodeISETritBlock(&dst[5 * blockNdx], numValuesInBlock, data, params.numBits);
802         }
803     }
804     else if (params.mode == ISEMODE_QUINT)
805     {
806         const int numBlocks = deDivRoundUp32(numValues, 3);
807         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
808         {
809             const int numValuesInBlock = blockNdx == numBlocks - 1 ? numValues - 3 * (numBlocks - 1) : 3;
810             decodeISEQuintBlock(&dst[3 * blockNdx], numValuesInBlock, data, params.numBits);
811         }
812     }
813     else
814     {
815         DE_ASSERT(params.mode == ISEMODE_PLAIN_BIT);
816         for (int i = 0; i < numValues; i++)
817             decodeISEBitBlock(&dst[i], data, params.numBits);
818     }
819 }
820 
unquantizeColorEndpoints(uint32_t * dst,const ISEDecodedResult * iseResults,int numEndpoints,const ISEParams & iseParams)821 void unquantizeColorEndpoints(uint32_t *dst, const ISEDecodedResult *iseResults, int numEndpoints,
822                               const ISEParams &iseParams)
823 {
824     if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
825     {
826         const int rangeCase = iseParams.numBits * 2 - (iseParams.mode == ISEMODE_TRIT ? 2 : 1);
827         DE_ASSERT(de::inRange(rangeCase, 0, 10));
828         static const uint32_t Ca[11] = {204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5};
829         const uint32_t C             = Ca[rangeCase];
830 
831         for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
832         {
833             const uint32_t a = getBit(iseResults[endpointNdx].m, 0);
834             const uint32_t b = getBit(iseResults[endpointNdx].m, 1);
835             const uint32_t c = getBit(iseResults[endpointNdx].m, 2);
836             const uint32_t d = getBit(iseResults[endpointNdx].m, 3);
837             const uint32_t e = getBit(iseResults[endpointNdx].m, 4);
838             const uint32_t f = getBit(iseResults[endpointNdx].m, 5);
839 
840             const uint32_t A = a == 0 ? 0 : (1 << 9) - 1;
841             const uint32_t B = rangeCase == 0  ? 0 :
842                                rangeCase == 1  ? 0 :
843                                rangeCase == 2  ? (b << 8) | (b << 4) | (b << 2) | (b << 1) :
844                                rangeCase == 3  ? (b << 8) | (b << 3) | (b << 2) :
845                                rangeCase == 4  ? (c << 8) | (b << 7) | (c << 3) | (b << 2) | (c << 1) | (b << 0) :
846                                rangeCase == 5  ? (c << 8) | (b << 7) | (c << 2) | (b << 1) | (c << 0) :
847                                rangeCase == 6  ? (d << 8) | (c << 7) | (b << 6) | (d << 2) | (c << 1) | (b << 0) :
848                                rangeCase == 7  ? (d << 8) | (c << 7) | (b << 6) | (d << 1) | (c << 0) :
849                                rangeCase == 8  ? (e << 8) | (d << 7) | (c << 6) | (b << 5) | (e << 1) | (d << 0) :
850                                rangeCase == 9  ? (e << 8) | (d << 7) | (c << 6) | (b << 5) | (e << 0) :
851                                rangeCase == 10 ? (f << 8) | (e << 7) | (d << 6) | (c << 5) | (b << 4) | (f << 0) :
852                                                  (uint32_t)-1;
853             DE_ASSERT(B != (uint32_t)-1);
854 
855             dst[endpointNdx] = (((iseResults[endpointNdx].tq * C + B) ^ A) >> 2) | (A & 0x80);
856         }
857     }
858     else
859     {
860         DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
861 
862         for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
863             dst[endpointNdx] = bitReplicationScale(iseResults[endpointNdx].v, iseParams.numBits, 8);
864     }
865 }
866 
bitTransferSigned(int32_t & a,int32_t & b)867 inline void bitTransferSigned(int32_t &a, int32_t &b)
868 {
869     b >>= 1;
870     b |= a & 0x80;
871     a >>= 1;
872     a &= 0x3f;
873     if (isBitSet(a, 5))
874         a -= 0x40;
875 }
876 
clampedRGBA(const IVec4 & rgba)877 inline UVec4 clampedRGBA(const IVec4 &rgba)
878 {
879     return UVec4(de::clamp(rgba.x(), 0, 0xff), de::clamp(rgba.y(), 0, 0xff), de::clamp(rgba.z(), 0, 0xff),
880                  de::clamp(rgba.w(), 0, 0xff));
881 }
882 
blueContract(int r,int g,int b,int a)883 inline IVec4 blueContract(int r, int g, int b, int a)
884 {
885     return IVec4((r + b) >> 1, (g + b) >> 1, b, a);
886 }
887 
isColorEndpointModeHDR(uint32_t mode)888 inline bool isColorEndpointModeHDR(uint32_t mode)
889 {
890     return mode == 2 || mode == 3 || mode == 7 || mode == 11 || mode == 14 || mode == 15;
891 }
892 
decodeHDREndpointMode7(UVec4 & e0,UVec4 & e1,uint32_t v0,uint32_t v1,uint32_t v2,uint32_t v3)893 void decodeHDREndpointMode7(UVec4 &e0, UVec4 &e1, uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3)
894 {
895     const uint32_t m10     = getBit(v1, 7) | (getBit(v2, 7) << 1);
896     const uint32_t m23     = getBits(v0, 6, 7);
897     const uint32_t majComp = m10 != 3 ? m10 : m23 != 3 ? m23 : 0;
898     const uint32_t mode    = m10 != 3 ? m23 : m23 != 3 ? 4 : 5;
899 
900     int32_t red   = (int32_t)getBits(v0, 0, 5);
901     int32_t green = (int32_t)getBits(v1, 0, 4);
902     int32_t blue  = (int32_t)getBits(v2, 0, 4);
903     int32_t scale = (int32_t)getBits(v3, 0, 4);
904 
905     {
906 #define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
907 #define ASSIGN_X_BITS(V0, S0, V1, S1, V2, S2, V3, S3, V4, S4, V5, S5, V6, S6) \
908     do                                                                        \
909     {                                                                         \
910         SHOR(V0, S0, x0);                                                     \
911         SHOR(V1, S1, x1);                                                     \
912         SHOR(V2, S2, x2);                                                     \
913         SHOR(V3, S3, x3);                                                     \
914         SHOR(V4, S4, x4);                                                     \
915         SHOR(V5, S5, x5);                                                     \
916         SHOR(V6, S6, x6);                                                     \
917     } while (false)
918 
919         const uint32_t x0 = getBit(v1, 6);
920         const uint32_t x1 = getBit(v1, 5);
921         const uint32_t x2 = getBit(v2, 6);
922         const uint32_t x3 = getBit(v2, 5);
923         const uint32_t x4 = getBit(v3, 7);
924         const uint32_t x5 = getBit(v3, 6);
925         const uint32_t x6 = getBit(v3, 5);
926 
927         int32_t &R = red;
928         int32_t &G = green;
929         int32_t &B = blue;
930         int32_t &S = scale;
931 
932         switch (mode)
933         {
934         case 0:
935             ASSIGN_X_BITS(R, 9, R, 8, R, 7, R, 10, R, 6, S, 6, S, 5);
936             break;
937         case 1:
938             ASSIGN_X_BITS(R, 8, G, 5, R, 7, B, 5, R, 6, R, 10, R, 9);
939             break;
940         case 2:
941             ASSIGN_X_BITS(R, 9, R, 8, R, 7, R, 6, S, 7, S, 6, S, 5);
942             break;
943         case 3:
944             ASSIGN_X_BITS(R, 8, G, 5, R, 7, B, 5, R, 6, S, 6, S, 5);
945             break;
946         case 4:
947             ASSIGN_X_BITS(G, 6, G, 5, B, 6, B, 5, R, 6, R, 7, S, 5);
948             break;
949         case 5:
950             ASSIGN_X_BITS(G, 6, G, 5, B, 6, B, 5, R, 6, S, 6, S, 5);
951             break;
952         default:
953             DE_ASSERT(false);
954         }
955 
956 #undef ASSIGN_X_BITS
957 #undef SHOR
958     }
959 
960     static const int shiftAmounts[] = {1, 1, 2, 3, 4, 5};
961     DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(shiftAmounts));
962 
963     red <<= shiftAmounts[mode];
964     green <<= shiftAmounts[mode];
965     blue <<= shiftAmounts[mode];
966     scale <<= shiftAmounts[mode];
967 
968     if (mode != 5)
969     {
970         green = red - green;
971         blue  = red - blue;
972     }
973 
974     if (majComp == 1)
975         std::swap(red, green);
976     else if (majComp == 2)
977         std::swap(red, blue);
978 
979     e0 = UVec4(de::clamp(red - scale, 0, 0xfff), de::clamp(green - scale, 0, 0xfff), de::clamp(blue - scale, 0, 0xfff),
980                0x780);
981 
982     e1 = UVec4(de::clamp(red, 0, 0xfff), de::clamp(green, 0, 0xfff), de::clamp(blue, 0, 0xfff), 0x780);
983 }
984 
decodeHDREndpointMode11(UVec4 & e0,UVec4 & e1,uint32_t v0,uint32_t v1,uint32_t v2,uint32_t v3,uint32_t v4,uint32_t v5)985 void decodeHDREndpointMode11(UVec4 &e0, UVec4 &e1, uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3, uint32_t v4,
986                              uint32_t v5)
987 {
988     const uint32_t major = (getBit(v5, 7) << 1) | getBit(v4, 7);
989 
990     if (major == 3)
991     {
992         e0 = UVec4(v0 << 4, v2 << 4, getBits(v4, 0, 6) << 5, 0x780);
993         e1 = UVec4(v1 << 4, v3 << 4, getBits(v5, 0, 6) << 5, 0x780);
994     }
995     else
996     {
997         const uint32_t mode = (getBit(v3, 7) << 2) | (getBit(v2, 7) << 1) | getBit(v1, 7);
998 
999         int32_t a  = (int32_t)((getBit(v1, 6) << 8) | v0);
1000         int32_t c  = (int32_t)(getBits(v1, 0, 5));
1001         int32_t b0 = (int32_t)(getBits(v2, 0, 5));
1002         int32_t b1 = (int32_t)(getBits(v3, 0, 5));
1003         int32_t d0 = (int32_t)(getBits(v4, 0, 4));
1004         int32_t d1 = (int32_t)(getBits(v5, 0, 4));
1005 
1006         {
1007 #define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
1008 #define ASSIGN_X_BITS(V0, S0, V1, S1, V2, S2, V3, S3, V4, S4, V5, S5) \
1009     do                                                                \
1010     {                                                                 \
1011         SHOR(V0, S0, x0);                                             \
1012         SHOR(V1, S1, x1);                                             \
1013         SHOR(V2, S2, x2);                                             \
1014         SHOR(V3, S3, x3);                                             \
1015         SHOR(V4, S4, x4);                                             \
1016         SHOR(V5, S5, x5);                                             \
1017     } while (false)
1018 
1019             const uint32_t x0 = getBit(v2, 6);
1020             const uint32_t x1 = getBit(v3, 6);
1021             const uint32_t x2 = getBit(v4, 6);
1022             const uint32_t x3 = getBit(v5, 6);
1023             const uint32_t x4 = getBit(v4, 5);
1024             const uint32_t x5 = getBit(v5, 5);
1025 
1026             switch (mode)
1027             {
1028             case 0:
1029                 ASSIGN_X_BITS(b0, 6, b1, 6, d0, 6, d1, 6, d0, 5, d1, 5);
1030                 break;
1031             case 1:
1032                 ASSIGN_X_BITS(b0, 6, b1, 6, b0, 7, b1, 7, d0, 5, d1, 5);
1033                 break;
1034             case 2:
1035                 ASSIGN_X_BITS(a, 9, c, 6, d0, 6, d1, 6, d0, 5, d1, 5);
1036                 break;
1037             case 3:
1038                 ASSIGN_X_BITS(b0, 6, b1, 6, a, 9, c, 6, d0, 5, d1, 5);
1039                 break;
1040             case 4:
1041                 ASSIGN_X_BITS(b0, 6, b1, 6, b0, 7, b1, 7, a, 9, a, 10);
1042                 break;
1043             case 5:
1044                 ASSIGN_X_BITS(a, 9, a, 10, c, 7, c, 6, d0, 5, d1, 5);
1045                 break;
1046             case 6:
1047                 ASSIGN_X_BITS(b0, 6, b1, 6, a, 11, c, 6, a, 9, a, 10);
1048                 break;
1049             case 7:
1050                 ASSIGN_X_BITS(a, 9, a, 10, a, 11, c, 6, d0, 5, d1, 5);
1051                 break;
1052             default:
1053                 DE_ASSERT(false);
1054             }
1055 
1056 #undef ASSIGN_X_BITS
1057 #undef SHOR
1058         }
1059 
1060         static const int numDBits[] = {7, 6, 7, 6, 5, 6, 5, 6};
1061         DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(numDBits));
1062 
1063         d0 = signExtend(d0, numDBits[mode]);
1064         d1 = signExtend(d1, numDBits[mode]);
1065 
1066         const int shiftAmount = (mode >> 1) ^ 3;
1067         a <<= shiftAmount;
1068         c <<= shiftAmount;
1069         b0 <<= shiftAmount;
1070         b1 <<= shiftAmount;
1071         d0 <<= shiftAmount;
1072         d1 <<= shiftAmount;
1073 
1074         e0 = UVec4(de::clamp(a - c, 0, 0xfff), de::clamp(a - b0 - c - d0, 0, 0xfff),
1075                    de::clamp(a - b1 - c - d1, 0, 0xfff), 0x780);
1076 
1077         e1 = UVec4(de::clamp(a, 0, 0xfff), de::clamp(a - b0, 0, 0xfff), de::clamp(a - b1, 0, 0xfff), 0x780);
1078 
1079         if (major == 1)
1080         {
1081             std::swap(e0.x(), e0.y());
1082             std::swap(e1.x(), e1.y());
1083         }
1084         else if (major == 2)
1085         {
1086             std::swap(e0.x(), e0.z());
1087             std::swap(e1.x(), e1.z());
1088         }
1089     }
1090 }
1091 
decodeHDREndpointMode15(UVec4 & e0,UVec4 & e1,uint32_t v0,uint32_t v1,uint32_t v2,uint32_t v3,uint32_t v4,uint32_t v5,uint32_t v6In,uint32_t v7In)1092 void decodeHDREndpointMode15(UVec4 &e0, UVec4 &e1, uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3, uint32_t v4,
1093                              uint32_t v5, uint32_t v6In, uint32_t v7In)
1094 {
1095     decodeHDREndpointMode11(e0, e1, v0, v1, v2, v3, v4, v5);
1096 
1097     const uint32_t mode = (getBit(v7In, 7) << 1) | getBit(v6In, 7);
1098     int32_t v6          = (int32_t)getBits(v6In, 0, 6);
1099     int32_t v7          = (int32_t)getBits(v7In, 0, 6);
1100 
1101     if (mode == 3)
1102     {
1103         e0.w() = v6 << 5;
1104         e1.w() = v7 << 5;
1105     }
1106     else
1107     {
1108         v6 |= (v7 << (mode + 1)) & 0x780;
1109         v7 &= (0x3f >> mode);
1110         v7 ^= 0x20 >> mode;
1111         v7 -= 0x20 >> mode;
1112         v6 <<= 4 - mode;
1113         v7 <<= 4 - mode;
1114 
1115         v7 += v6;
1116         v7     = de::clamp(v7, 0, 0xfff);
1117         e0.w() = v6;
1118         e1.w() = v7;
1119     }
1120 }
1121 
decodeColorEndpoints(ColorEndpointPair * dst,const uint32_t * unquantizedEndpoints,const uint32_t * endpointModes,int numPartitions)1122 void decodeColorEndpoints(ColorEndpointPair *dst, const uint32_t *unquantizedEndpoints, const uint32_t *endpointModes,
1123                           int numPartitions)
1124 {
1125     int unquantizedNdx = 0;
1126 
1127     for (int partitionNdx = 0; partitionNdx < numPartitions; partitionNdx++)
1128     {
1129         const uint32_t endpointMode = endpointModes[partitionNdx];
1130         const uint32_t *v           = &unquantizedEndpoints[unquantizedNdx];
1131         UVec4 &e0                   = dst[partitionNdx].e0;
1132         UVec4 &e1                   = dst[partitionNdx].e1;
1133 
1134         unquantizedNdx += computeNumColorEndpointValues(endpointMode);
1135 
1136         switch (endpointMode)
1137         {
1138         case 0:
1139             e0 = UVec4(v[0], v[0], v[0], 0xff);
1140             e1 = UVec4(v[1], v[1], v[1], 0xff);
1141             break;
1142 
1143         case 1:
1144         {
1145             const uint32_t L0 = (v[0] >> 2) | (getBits(v[1], 6, 7) << 6);
1146             const uint32_t L1 = de::min(0xffu, L0 + getBits(v[1], 0, 5));
1147             e0                = UVec4(L0, L0, L0, 0xff);
1148             e1                = UVec4(L1, L1, L1, 0xff);
1149             break;
1150         }
1151 
1152         case 2:
1153         {
1154             const uint32_t v1Gr = v[1] >= v[0];
1155             const uint32_t y0   = v1Gr ? v[0] << 4 : (v[1] << 4) + 8;
1156             const uint32_t y1   = v1Gr ? v[1] << 4 : (v[0] << 4) - 8;
1157 
1158             e0 = UVec4(y0, y0, y0, 0x780);
1159             e1 = UVec4(y1, y1, y1, 0x780);
1160             break;
1161         }
1162 
1163         case 3:
1164         {
1165             const bool m      = isBitSet(v[0], 7);
1166             const uint32_t y0 = m ? (getBits(v[1], 5, 7) << 9) | (getBits(v[0], 0, 6) << 2) :
1167                                     (getBits(v[1], 4, 7) << 8) | (getBits(v[0], 0, 6) << 1);
1168             const uint32_t d  = m ? getBits(v[1], 0, 4) << 2 : getBits(v[1], 0, 3) << 1;
1169             const uint32_t y1 = de::min(0xfffu, y0 + d);
1170 
1171             e0 = UVec4(y0, y0, y0, 0x780);
1172             e1 = UVec4(y1, y1, y1, 0x780);
1173             break;
1174         }
1175 
1176         case 4:
1177             e0 = UVec4(v[0], v[0], v[0], v[2]);
1178             e1 = UVec4(v[1], v[1], v[1], v[3]);
1179             break;
1180 
1181         case 5:
1182         {
1183             int32_t v0 = (int32_t)v[0];
1184             int32_t v1 = (int32_t)v[1];
1185             int32_t v2 = (int32_t)v[2];
1186             int32_t v3 = (int32_t)v[3];
1187             bitTransferSigned(v1, v0);
1188             bitTransferSigned(v3, v2);
1189 
1190             e0 = clampedRGBA(IVec4(v0, v0, v0, v2));
1191             e1 = clampedRGBA(IVec4(v0 + v1, v0 + v1, v0 + v1, v2 + v3));
1192             break;
1193         }
1194 
1195         case 6:
1196             e0 = UVec4((v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8, 0xff);
1197             e1 = UVec4(v[0], v[1], v[2], 0xff);
1198             break;
1199 
1200         case 7:
1201             decodeHDREndpointMode7(e0, e1, v[0], v[1], v[2], v[3]);
1202             break;
1203 
1204         case 8:
1205             if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4])
1206             {
1207                 e0 = UVec4(v[0], v[2], v[4], 0xff);
1208                 e1 = UVec4(v[1], v[3], v[5], 0xff);
1209             }
1210             else
1211             {
1212                 e0 = blueContract(v[1], v[3], v[5], 0xff).asUint();
1213                 e1 = blueContract(v[0], v[2], v[4], 0xff).asUint();
1214             }
1215             break;
1216 
1217         case 9:
1218         {
1219             int32_t v0 = (int32_t)v[0];
1220             int32_t v1 = (int32_t)v[1];
1221             int32_t v2 = (int32_t)v[2];
1222             int32_t v3 = (int32_t)v[3];
1223             int32_t v4 = (int32_t)v[4];
1224             int32_t v5 = (int32_t)v[5];
1225             bitTransferSigned(v1, v0);
1226             bitTransferSigned(v3, v2);
1227             bitTransferSigned(v5, v4);
1228 
1229             if (v1 + v3 + v5 >= 0)
1230             {
1231                 e0 = clampedRGBA(IVec4(v0, v2, v4, 0xff));
1232                 e1 = clampedRGBA(IVec4(v0 + v1, v2 + v3, v4 + v5, 0xff));
1233             }
1234             else
1235             {
1236                 e0 = clampedRGBA(blueContract(v0 + v1, v2 + v3, v4 + v5, 0xff));
1237                 e1 = clampedRGBA(blueContract(v0, v2, v4, 0xff));
1238             }
1239             break;
1240         }
1241 
1242         case 10:
1243             e0 = UVec4((v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8, v[4]);
1244             e1 = UVec4(v[0], v[1], v[2], v[5]);
1245             break;
1246 
1247         case 11:
1248             decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
1249             break;
1250 
1251         case 12:
1252             if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4])
1253             {
1254                 e0 = UVec4(v[0], v[2], v[4], v[6]);
1255                 e1 = UVec4(v[1], v[3], v[5], v[7]);
1256             }
1257             else
1258             {
1259                 e0 = clampedRGBA(blueContract(v[1], v[3], v[5], v[7]));
1260                 e1 = clampedRGBA(blueContract(v[0], v[2], v[4], v[6]));
1261             }
1262             break;
1263 
1264         case 13:
1265         {
1266             int32_t v0 = (int32_t)v[0];
1267             int32_t v1 = (int32_t)v[1];
1268             int32_t v2 = (int32_t)v[2];
1269             int32_t v3 = (int32_t)v[3];
1270             int32_t v4 = (int32_t)v[4];
1271             int32_t v5 = (int32_t)v[5];
1272             int32_t v6 = (int32_t)v[6];
1273             int32_t v7 = (int32_t)v[7];
1274             bitTransferSigned(v1, v0);
1275             bitTransferSigned(v3, v2);
1276             bitTransferSigned(v5, v4);
1277             bitTransferSigned(v7, v6);
1278 
1279             if (v1 + v3 + v5 >= 0)
1280             {
1281                 e0 = clampedRGBA(IVec4(v0, v2, v4, v6));
1282                 e1 = clampedRGBA(IVec4(v0 + v1, v2 + v3, v4 + v5, v6 + v7));
1283             }
1284             else
1285             {
1286                 e0 = clampedRGBA(blueContract(v0 + v1, v2 + v3, v4 + v5, v6 + v7));
1287                 e1 = clampedRGBA(blueContract(v0, v2, v4, v6));
1288             }
1289 
1290             break;
1291         }
1292 
1293         case 14:
1294             decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
1295             e0.w() = v[6];
1296             e1.w() = v[7];
1297             break;
1298 
1299         case 15:
1300             decodeHDREndpointMode15(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
1301             break;
1302 
1303         default:
1304             DE_ASSERT(false);
1305         }
1306     }
1307 }
1308 
computeColorEndpoints(ColorEndpointPair * dst,const Block128 & blockData,const uint32_t * endpointModes,int numPartitions,int numColorEndpointValues,const ISEParams & iseParams,int numBitsAvailable)1309 void computeColorEndpoints(ColorEndpointPair *dst, const Block128 &blockData, const uint32_t *endpointModes,
1310                            int numPartitions, int numColorEndpointValues, const ISEParams &iseParams,
1311                            int numBitsAvailable)
1312 {
1313     const int colorEndpointDataStart = numPartitions == 1 ? 17 : 29;
1314     ISEDecodedResult colorEndpointData[18];
1315 
1316     {
1317         BitAccessStream dataStream(blockData, colorEndpointDataStart, numBitsAvailable, true);
1318         decodeISE(&colorEndpointData[0], numColorEndpointValues, dataStream, iseParams);
1319     }
1320 
1321     {
1322         uint32_t unquantizedEndpoints[18];
1323         unquantizeColorEndpoints(&unquantizedEndpoints[0], &colorEndpointData[0], numColorEndpointValues, iseParams);
1324         decodeColorEndpoints(dst, &unquantizedEndpoints[0], &endpointModes[0], numPartitions);
1325     }
1326 }
1327 
unquantizeWeights(uint32_t dst[64],const ISEDecodedResult * weightGrid,const ASTCBlockMode & blockMode)1328 void unquantizeWeights(uint32_t dst[64], const ISEDecodedResult *weightGrid, const ASTCBlockMode &blockMode)
1329 {
1330     const int numWeights       = computeNumWeights(blockMode);
1331     const ISEParams &iseParams = blockMode.weightISEParams;
1332 
1333     if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
1334     {
1335         const int rangeCase = iseParams.numBits * 2 + (iseParams.mode == ISEMODE_QUINT ? 1 : 0);
1336 
1337         if (rangeCase == 0 || rangeCase == 1)
1338         {
1339             static const uint32_t map0[3] = {0, 32, 63};
1340             static const uint32_t map1[5] = {0, 16, 32, 47, 63};
1341             const uint32_t *const map     = rangeCase == 0 ? &map0[0] : &map1[0];
1342             for (int i = 0; i < numWeights; i++)
1343             {
1344                 DE_ASSERT(weightGrid[i].v < (rangeCase == 0 ? 3u : 5u));
1345                 dst[i] = map[weightGrid[i].v];
1346             }
1347         }
1348         else
1349         {
1350             DE_ASSERT(rangeCase <= 6);
1351             static const uint32_t Ca[5] = {50, 28, 23, 13, 11};
1352             const uint32_t C            = Ca[rangeCase - 2];
1353 
1354             for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
1355             {
1356                 const uint32_t a = getBit(weightGrid[weightNdx].m, 0);
1357                 const uint32_t b = getBit(weightGrid[weightNdx].m, 1);
1358                 const uint32_t c = getBit(weightGrid[weightNdx].m, 2);
1359 
1360                 const uint32_t A = a == 0 ? 0 : (1 << 7) - 1;
1361                 const uint32_t B = rangeCase == 2 ? 0 :
1362                                    rangeCase == 3 ? 0 :
1363                                    rangeCase == 4 ? (b << 6) | (b << 2) | (b << 0) :
1364                                    rangeCase == 5 ? (b << 6) | (b << 1) :
1365                                    rangeCase == 6 ? (c << 6) | (b << 5) | (c << 1) | (b << 0) :
1366                                                     (uint32_t)-1;
1367 
1368                 dst[weightNdx] = (((weightGrid[weightNdx].tq * C + B) ^ A) >> 2) | (A & 0x20);
1369             }
1370         }
1371     }
1372     else
1373     {
1374         DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
1375 
1376         for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
1377             dst[weightNdx] = bitReplicationScale(weightGrid[weightNdx].v, iseParams.numBits, 6);
1378     }
1379 
1380     for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
1381         dst[weightNdx] += dst[weightNdx] > 32 ? 1 : 0;
1382 
1383     // Initialize nonexistent weights to poison values
1384     for (int weightNdx = numWeights; weightNdx < 64; weightNdx++)
1385         dst[weightNdx] = ~0u;
1386 }
1387 
interpolateWeights(TexelWeightPair * dst,const uint32_t (& unquantizedWeights)[64],int blockWidth,int blockHeight,const ASTCBlockMode & blockMode)1388 void interpolateWeights(TexelWeightPair *dst, const uint32_t (&unquantizedWeights)[64], int blockWidth, int blockHeight,
1389                         const ASTCBlockMode &blockMode)
1390 {
1391     const int numWeightsPerTexel = blockMode.isDualPlane ? 2 : 1;
1392     const uint32_t scaleX        = (1024 + blockWidth / 2) / (blockWidth - 1);
1393     const uint32_t scaleY        = (1024 + blockHeight / 2) / (blockHeight - 1);
1394 
1395     DE_ASSERT(blockMode.weightGridWidth * blockMode.weightGridHeight * numWeightsPerTexel <=
1396               DE_LENGTH_OF_ARRAY(unquantizedWeights));
1397 
1398     for (int texelY = 0; texelY < blockHeight; texelY++)
1399     {
1400         for (int texelX = 0; texelX < blockWidth; texelX++)
1401         {
1402             const uint32_t gX = (scaleX * texelX * (blockMode.weightGridWidth - 1) + 32) >> 6;
1403             const uint32_t gY = (scaleY * texelY * (blockMode.weightGridHeight - 1) + 32) >> 6;
1404             const uint32_t jX = gX >> 4;
1405             const uint32_t jY = gY >> 4;
1406             const uint32_t fX = gX & 0xf;
1407             const uint32_t fY = gY & 0xf;
1408 
1409             const uint32_t w11 = (fX * fY + 8) >> 4;
1410             const uint32_t w10 = fY - w11;
1411             const uint32_t w01 = fX - w11;
1412             const uint32_t w00 = 16 - fX - fY + w11;
1413 
1414             const uint32_t i00 = jY * blockMode.weightGridWidth + jX;
1415             const uint32_t i01 = i00 + 1;
1416             const uint32_t i10 = i00 + blockMode.weightGridWidth;
1417             const uint32_t i11 = i00 + blockMode.weightGridWidth + 1;
1418 
1419             // These addresses can be out of bounds, but respective weights will be 0 then.
1420             DE_ASSERT(deInBounds32(i00, 0, blockMode.weightGridWidth * blockMode.weightGridHeight) || w00 == 0);
1421             DE_ASSERT(deInBounds32(i01, 0, blockMode.weightGridWidth * blockMode.weightGridHeight) || w01 == 0);
1422             DE_ASSERT(deInBounds32(i10, 0, blockMode.weightGridWidth * blockMode.weightGridHeight) || w10 == 0);
1423             DE_ASSERT(deInBounds32(i11, 0, blockMode.weightGridWidth * blockMode.weightGridHeight) || w11 == 0);
1424 
1425             for (int texelWeightNdx = 0; texelWeightNdx < numWeightsPerTexel; texelWeightNdx++)
1426             {
1427                 // & 0x3f clamps address to bounds of unquantizedWeights
1428                 const uint32_t p00 = unquantizedWeights[(i00 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
1429                 const uint32_t p01 = unquantizedWeights[(i01 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
1430                 const uint32_t p10 = unquantizedWeights[(i10 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
1431                 const uint32_t p11 = unquantizedWeights[(i11 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
1432 
1433                 dst[texelY * blockWidth + texelX].w[texelWeightNdx] =
1434                     (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4;
1435             }
1436         }
1437     }
1438 }
1439 
computeTexelWeights(TexelWeightPair * dst,const Block128 & blockData,int blockWidth,int blockHeight,const ASTCBlockMode & blockMode)1440 void computeTexelWeights(TexelWeightPair *dst, const Block128 &blockData, int blockWidth, int blockHeight,
1441                          const ASTCBlockMode &blockMode)
1442 {
1443     ISEDecodedResult weightGrid[64];
1444 
1445     {
1446         BitAccessStream dataStream(
1447             blockData, 127, computeNumRequiredBits(blockMode.weightISEParams, computeNumWeights(blockMode)), false);
1448         decodeISE(&weightGrid[0], computeNumWeights(blockMode), dataStream, blockMode.weightISEParams);
1449     }
1450 
1451     {
1452         uint32_t unquantizedWeights[64];
1453         unquantizeWeights(&unquantizedWeights[0], &weightGrid[0], blockMode);
1454         interpolateWeights(dst, unquantizedWeights, blockWidth, blockHeight, blockMode);
1455     }
1456 }
1457 
hash52(uint32_t v)1458 inline uint32_t hash52(uint32_t v)
1459 {
1460     uint32_t p = v;
1461     p ^= p >> 15;
1462     p -= p << 17;
1463     p += p << 7;
1464     p += p << 4;
1465     p ^= p >> 5;
1466     p += p << 16;
1467     p ^= p >> 7;
1468     p ^= p >> 3;
1469     p ^= p << 6;
1470     p ^= p >> 17;
1471     return p;
1472 }
1473 
computeTexelPartition(uint32_t seedIn,uint32_t xIn,uint32_t yIn,uint32_t zIn,int numPartitions,bool smallBlock)1474 int computeTexelPartition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int numPartitions, bool smallBlock)
1475 {
1476     DE_ASSERT(zIn == 0);
1477     const uint32_t x    = smallBlock ? xIn << 1 : xIn;
1478     const uint32_t y    = smallBlock ? yIn << 1 : yIn;
1479     const uint32_t z    = smallBlock ? zIn << 1 : zIn;
1480     const uint32_t seed = seedIn + 1024 * (numPartitions - 1);
1481     const uint32_t rnum = hash52(seed);
1482     uint8_t seed1       = (uint8_t)(rnum & 0xf);
1483     uint8_t seed2       = (uint8_t)((rnum >> 4) & 0xf);
1484     uint8_t seed3       = (uint8_t)((rnum >> 8) & 0xf);
1485     uint8_t seed4       = (uint8_t)((rnum >> 12) & 0xf);
1486     uint8_t seed5       = (uint8_t)((rnum >> 16) & 0xf);
1487     uint8_t seed6       = (uint8_t)((rnum >> 20) & 0xf);
1488     uint8_t seed7       = (uint8_t)((rnum >> 24) & 0xf);
1489     uint8_t seed8       = (uint8_t)((rnum >> 28) & 0xf);
1490     uint8_t seed9       = (uint8_t)((rnum >> 18) & 0xf);
1491     uint8_t seed10      = (uint8_t)((rnum >> 22) & 0xf);
1492     uint8_t seed11      = (uint8_t)((rnum >> 26) & 0xf);
1493     uint8_t seed12      = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf);
1494 
1495     seed1  = (uint8_t)(seed1 * seed1);
1496     seed2  = (uint8_t)(seed2 * seed2);
1497     seed3  = (uint8_t)(seed3 * seed3);
1498     seed4  = (uint8_t)(seed4 * seed4);
1499     seed5  = (uint8_t)(seed5 * seed5);
1500     seed6  = (uint8_t)(seed6 * seed6);
1501     seed7  = (uint8_t)(seed7 * seed7);
1502     seed8  = (uint8_t)(seed8 * seed8);
1503     seed9  = (uint8_t)(seed9 * seed9);
1504     seed10 = (uint8_t)(seed10 * seed10);
1505     seed11 = (uint8_t)(seed11 * seed11);
1506     seed12 = (uint8_t)(seed12 * seed12);
1507 
1508     const int shA = (seed & 2) != 0 ? 4 : 5;
1509     const int shB = numPartitions == 3 ? 6 : 5;
1510     const int sh1 = (seed & 1) != 0 ? shA : shB;
1511     const int sh2 = (seed & 1) != 0 ? shB : shA;
1512     const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2;
1513 
1514     seed1  = (uint8_t)(seed1 >> sh1);
1515     seed2  = (uint8_t)(seed2 >> sh2);
1516     seed3  = (uint8_t)(seed3 >> sh1);
1517     seed4  = (uint8_t)(seed4 >> sh2);
1518     seed5  = (uint8_t)(seed5 >> sh1);
1519     seed6  = (uint8_t)(seed6 >> sh2);
1520     seed7  = (uint8_t)(seed7 >> sh1);
1521     seed8  = (uint8_t)(seed8 >> sh2);
1522     seed9  = (uint8_t)(seed9 >> sh3);
1523     seed10 = (uint8_t)(seed10 >> sh3);
1524     seed11 = (uint8_t)(seed11 >> sh3);
1525     seed12 = (uint8_t)(seed12 >> sh3);
1526 
1527     const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14));
1528     const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10));
1529     const int c = numPartitions >= 3 ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0;
1530     const int d = numPartitions >= 4 ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0;
1531 
1532     return a >= b && a >= c && a >= d ? 0 : b >= c && b >= d ? 1 : c >= d ? 2 : 3;
1533 }
1534 
setTexelColors(void * dst,ColorEndpointPair * colorEndpoints,TexelWeightPair * texelWeights,int ccs,uint32_t partitionIndexSeed,int numPartitions,int blockWidth,int blockHeight,bool isSRGB,bool isLDRMode,const uint32_t * colorEndpointModes)1535 DecompressResult setTexelColors(void *dst, ColorEndpointPair *colorEndpoints, TexelWeightPair *texelWeights, int ccs,
1536                                 uint32_t partitionIndexSeed, int numPartitions, int blockWidth, int blockHeight,
1537                                 bool isSRGB, bool isLDRMode, const uint32_t *colorEndpointModes)
1538 {
1539     const bool smallBlock   = blockWidth * blockHeight < 31;
1540     DecompressResult result = DECOMPRESS_RESULT_VALID_BLOCK;
1541     bool isHDREndpoint[4];
1542 
1543     for (int i = 0; i < numPartitions; i++)
1544         isHDREndpoint[i] = isColorEndpointModeHDR(colorEndpointModes[i]);
1545 
1546     for (int texelY = 0; texelY < blockHeight; texelY++)
1547         for (int texelX = 0; texelX < blockWidth; texelX++)
1548         {
1549             const int texelNdx         = texelY * blockWidth + texelX;
1550             const int colorEndpointNdx = numPartitions == 1 ? 0 :
1551                                                               computeTexelPartition(partitionIndexSeed, texelX, texelY,
1552                                                                                     0, numPartitions, smallBlock);
1553             DE_ASSERT(colorEndpointNdx < numPartitions);
1554             const UVec4 &e0               = colorEndpoints[colorEndpointNdx].e0;
1555             const UVec4 &e1               = colorEndpoints[colorEndpointNdx].e1;
1556             const TexelWeightPair &weight = texelWeights[texelNdx];
1557 
1558             if (isLDRMode && isHDREndpoint[colorEndpointNdx])
1559             {
1560                 if (isSRGB)
1561                 {
1562                     ((uint8_t *)dst)[texelNdx * 4 + 0] = 0xff;
1563                     ((uint8_t *)dst)[texelNdx * 4 + 1] = 0;
1564                     ((uint8_t *)dst)[texelNdx * 4 + 2] = 0xff;
1565                     ((uint8_t *)dst)[texelNdx * 4 + 3] = 0xff;
1566                 }
1567                 else
1568                 {
1569                     ((float *)dst)[texelNdx * 4 + 0] = 1.0f;
1570                     ((float *)dst)[texelNdx * 4 + 1] = 0;
1571                     ((float *)dst)[texelNdx * 4 + 2] = 1.0f;
1572                     ((float *)dst)[texelNdx * 4 + 3] = 1.0f;
1573                 }
1574 
1575                 result = DECOMPRESS_RESULT_ERROR;
1576             }
1577             else
1578             {
1579                 for (int channelNdx = 0; channelNdx < 4; channelNdx++)
1580                 {
1581                     if (!isHDREndpoint[colorEndpointNdx] ||
1582                         (channelNdx == 3 && colorEndpointModes[colorEndpointNdx] ==
1583                                                 14)) // \note Alpha for mode 14 is treated the same as LDR.
1584                     {
1585                         const uint32_t c0 = (e0[channelNdx] << 8) | (isSRGB ? 0x80 : e0[channelNdx]);
1586                         const uint32_t c1 = (e1[channelNdx] << 8) | (isSRGB ? 0x80 : e1[channelNdx]);
1587                         const uint32_t w  = weight.w[ccs == channelNdx ? 1 : 0];
1588                         const uint32_t c  = (c0 * (64 - w) + c1 * w + 32) / 64;
1589 
1590                         if (isSRGB)
1591                             ((uint8_t *)dst)[texelNdx * 4 + channelNdx] = (uint8_t)((c & 0xff00) >> 8);
1592                         else
1593                             ((float *)dst)[texelNdx * 4 + channelNdx] = c == 65535 ? 1.0f : (float)c / 65536.0f;
1594                     }
1595                     else
1596                     {
1597                         DE_STATIC_ASSERT((de::meta::TypesSame<deFloat16, uint16_t>::Value));
1598                         const uint32_t c0  = e0[channelNdx] << 4;
1599                         const uint32_t c1  = e1[channelNdx] << 4;
1600                         const uint32_t w   = weight.w[ccs == channelNdx ? 1 : 0];
1601                         const uint32_t c   = (c0 * (64 - w) + c1 * w + 32) / 64;
1602                         const uint32_t e   = getBits(c, 11, 15);
1603                         const uint32_t m   = getBits(c, 0, 10);
1604                         const uint32_t mt  = m < 512 ? 3 * m : m >= 1536 ? 5 * m - 2048 : 4 * m - 512;
1605                         const deFloat16 cf = (deFloat16)((e << 10) + (mt >> 3));
1606 
1607                         ((float *)dst)[texelNdx * 4 + channelNdx] = deFloat16To32(isFloat16InfOrNan(cf) ? 0x7bff : cf);
1608                     }
1609                 }
1610             }
1611         }
1612 
1613     return result;
1614 }
1615 
decompressBlock(void * dst,const Block128 & blockData,int blockWidth,int blockHeight,bool isSRGB,bool isLDR)1616 DecompressResult decompressBlock(void *dst, const Block128 &blockData, int blockWidth, int blockHeight, bool isSRGB,
1617                                  bool isLDR)
1618 {
1619     DE_ASSERT(isLDR || !isSRGB);
1620 
1621     // Decode block mode.
1622 
1623     const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10));
1624 
1625     // Check for block mode errors.
1626 
1627     if (blockMode.isError)
1628     {
1629         setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
1630         return DECOMPRESS_RESULT_ERROR;
1631     }
1632 
1633     // Separate path for void-extent.
1634 
1635     if (blockMode.isVoidExtent)
1636         return decodeVoidExtentBlock(dst, blockData, blockWidth, blockHeight, isSRGB, isLDR);
1637 
1638     // Compute weight grid values.
1639 
1640     const int numWeights        = computeNumWeights(blockMode);
1641     const int numWeightDataBits = computeNumRequiredBits(blockMode.weightISEParams, numWeights);
1642     const int numPartitions     = (int)blockData.getBits(11, 12) + 1;
1643 
1644     // Check for errors in weight grid, partition and dual-plane parameters.
1645 
1646     if (numWeights > 64 || numWeightDataBits > 96 || numWeightDataBits < 24 || blockMode.weightGridWidth > blockWidth ||
1647         blockMode.weightGridHeight > blockHeight || (numPartitions == 4 && blockMode.isDualPlane))
1648     {
1649         setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
1650         return DECOMPRESS_RESULT_ERROR;
1651     }
1652 
1653     // Compute number of bits available for color endpoint data.
1654 
1655     const bool isSingleUniqueCem = numPartitions == 1 || blockData.getBits(23, 24) == 0;
1656     const int numConfigDataBits  = (numPartitions == 1 ? 17 :
1657                                     isSingleUniqueCem  ? 29 :
1658                                                          25 + 3 * numPartitions) +
1659                                   (blockMode.isDualPlane ? 2 : 0);
1660     const int numBitsForColorEndpoints = 128 - numWeightDataBits - numConfigDataBits;
1661     const int extraCemBitsStart        = 127 - numWeightDataBits -
1662                                   (isSingleUniqueCem  ? -1 :
1663                                    numPartitions == 4 ? 7 :
1664                                    numPartitions == 3 ? 4 :
1665                                    numPartitions == 2 ? 1 :
1666                                                         0);
1667     // Decode color endpoint modes.
1668 
1669     uint32_t colorEndpointModes[4];
1670     decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart);
1671 
1672     const int numColorEndpointValues = computeNumColorEndpointValues(colorEndpointModes, numPartitions);
1673 
1674     // Check for errors in color endpoint value count.
1675 
1676     if (numColorEndpointValues > 18 || numBitsForColorEndpoints < deDivRoundUp32(13 * numColorEndpointValues, 5))
1677     {
1678         setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
1679         return DECOMPRESS_RESULT_ERROR;
1680     }
1681 
1682     // Compute color endpoints.
1683 
1684     ColorEndpointPair colorEndpoints[4];
1685     computeColorEndpoints(&colorEndpoints[0], blockData, &colorEndpointModes[0], numPartitions, numColorEndpointValues,
1686                           computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues),
1687                           numBitsForColorEndpoints);
1688 
1689     // Compute texel weights.
1690 
1691     TexelWeightPair texelWeights[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT];
1692     computeTexelWeights(&texelWeights[0], blockData, blockWidth, blockHeight, blockMode);
1693 
1694     // Set texel colors.
1695 
1696     const int ccs = blockMode.isDualPlane ? (int)blockData.getBits(extraCemBitsStart - 2, extraCemBitsStart - 1) : -1;
1697     const uint32_t partitionIndexSeed = numPartitions > 1 ? blockData.getBits(13, 22) : (uint32_t)-1;
1698 
1699     return setTexelColors(dst, &colorEndpoints[0], &texelWeights[0], ccs, partitionIndexSeed, numPartitions, blockWidth,
1700                           blockHeight, isSRGB, isLDR, &colorEndpointModes[0]);
1701 }
1702 
decompress(const PixelBufferAccess & dst,const uint8_t * data,bool isSRGB,bool isLDR)1703 void decompress(const PixelBufferAccess &dst, const uint8_t *data, bool isSRGB, bool isLDR)
1704 {
1705     DE_ASSERT(isLDR || !isSRGB);
1706 
1707     const int blockWidth  = dst.getWidth();
1708     const int blockHeight = dst.getHeight();
1709 
1710     union
1711     {
1712         uint8_t sRGB[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4];
1713         float linear[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4];
1714     } decompressedBuffer;
1715 
1716     const Block128 blockData(data);
1717     decompressBlock(isSRGB ? (void *)&decompressedBuffer.sRGB[0] : (void *)&decompressedBuffer.linear[0], blockData,
1718                     dst.getWidth(), dst.getHeight(), isSRGB, isLDR);
1719 
1720     if (isSRGB)
1721     {
1722         for (int i = 0; i < blockHeight; i++)
1723             for (int j = 0; j < blockWidth; j++)
1724             {
1725                 dst.setPixel(IVec4(decompressedBuffer.sRGB[(i * blockWidth + j) * 4 + 0],
1726                                    decompressedBuffer.sRGB[(i * blockWidth + j) * 4 + 1],
1727                                    decompressedBuffer.sRGB[(i * blockWidth + j) * 4 + 2],
1728                                    decompressedBuffer.sRGB[(i * blockWidth + j) * 4 + 3]),
1729                              j, i);
1730             }
1731     }
1732     else
1733     {
1734         for (int i = 0; i < blockHeight; i++)
1735             for (int j = 0; j < blockWidth; j++)
1736             {
1737                 dst.setPixel(Vec4(decompressedBuffer.linear[(i * blockWidth + j) * 4 + 0],
1738                                   decompressedBuffer.linear[(i * blockWidth + j) * 4 + 1],
1739                                   decompressedBuffer.linear[(i * blockWidth + j) * 4 + 2],
1740                                   decompressedBuffer.linear[(i * blockWidth + j) * 4 + 3]),
1741                              j, i);
1742             }
1743     }
1744 }
1745 
1746 // Helper class for setting bits in a 128-bit block.
1747 class AssignBlock128
1748 {
1749 private:
1750     typedef uint64_t Word;
1751 
1752     enum
1753     {
1754         WORD_BYTES = sizeof(Word),
1755         WORD_BITS  = 8 * WORD_BYTES,
1756         NUM_WORDS  = 128 / WORD_BITS
1757     };
1758 
1759     DE_STATIC_ASSERT(128 % WORD_BITS == 0);
1760 
1761 public:
AssignBlock128(void)1762     AssignBlock128(void)
1763     {
1764         for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
1765             m_words[wordNdx] = 0;
1766     }
1767 
setBit(int ndx,uint32_t val)1768     void setBit(int ndx, uint32_t val)
1769     {
1770         DE_ASSERT(de::inBounds(ndx, 0, 128));
1771         DE_ASSERT((val & 1) == val);
1772         const int wordNdx = ndx / WORD_BITS;
1773         const int bitNdx  = ndx % WORD_BITS;
1774         m_words[wordNdx]  = (m_words[wordNdx] & ~((Word)1 << bitNdx)) | ((Word)val << bitNdx);
1775     }
1776 
setBits(int low,int high,uint32_t bits)1777     void setBits(int low, int high, uint32_t bits)
1778     {
1779         DE_ASSERT(de::inBounds(low, 0, 128));
1780         DE_ASSERT(de::inBounds(high, 0, 128));
1781         DE_ASSERT(de::inRange(high - low + 1, 0, 32));
1782         DE_ASSERT((bits & (((Word)1 << (high - low + 1)) - 1)) == bits);
1783 
1784         if (high - low + 1 == 0)
1785             return;
1786 
1787         const int word0Ndx   = low / WORD_BITS;
1788         const int word1Ndx   = high / WORD_BITS;
1789         const int lowNdxInW0 = low % WORD_BITS;
1790 
1791         if (word0Ndx == word1Ndx)
1792             m_words[word0Ndx] =
1793                 (m_words[word0Ndx] & ~((((Word)1 << (high - low + 1)) - 1) << lowNdxInW0)) | ((Word)bits << lowNdxInW0);
1794         else
1795         {
1796             DE_ASSERT(word1Ndx == word0Ndx + 1);
1797 
1798             const int highNdxInW1      = high % WORD_BITS;
1799             const int numBitsToSetInW0 = WORD_BITS - lowNdxInW0;
1800             const Word bitsLowMask     = ((Word)1 << numBitsToSetInW0) - 1;
1801 
1802             m_words[word0Ndx] =
1803                 (m_words[word0Ndx] & (((Word)1 << lowNdxInW0) - 1)) | (((Word)bits & bitsLowMask) << lowNdxInW0);
1804             m_words[word1Ndx] = (m_words[word1Ndx] & ~(((Word)1 << (highNdxInW1 + 1)) - 1)) |
1805                                 (((Word)bits & ~bitsLowMask) >> numBitsToSetInW0);
1806         }
1807     }
1808 
assignToMemory(uint8_t * dst) const1809     void assignToMemory(uint8_t *dst) const
1810     {
1811         for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
1812         {
1813             for (int byteNdx = 0; byteNdx < WORD_BYTES; byteNdx++)
1814                 dst[wordNdx * WORD_BYTES + byteNdx] = (uint8_t)((m_words[wordNdx] >> (8 * byteNdx)) & 0xff);
1815         }
1816     }
1817 
pushBytesToVector(vector<uint8_t> & dst) const1818     void pushBytesToVector(vector<uint8_t> &dst) const
1819     {
1820         const int assignStartIndex = (int)dst.size();
1821         dst.resize(dst.size() + BLOCK_SIZE_BYTES);
1822         assignToMemory(&dst[assignStartIndex]);
1823     }
1824 
1825 private:
1826     Word m_words[NUM_WORDS];
1827 };
1828 
1829 // A helper for sequential access into a AssignBlock128.
1830 class BitAssignAccessStream
1831 {
1832 public:
BitAssignAccessStream(AssignBlock128 & dst,int startNdxInSrc,int length,bool forward)1833     BitAssignAccessStream(AssignBlock128 &dst, int startNdxInSrc, int length, bool forward)
1834         : m_dst(dst)
1835         , m_startNdxInSrc(startNdxInSrc)
1836         , m_length(length)
1837         , m_forward(forward)
1838         , m_ndx(0)
1839     {
1840     }
1841 
1842     // Set the next num bits. Bits at positions greater than or equal to m_length are not touched.
setNext(int num,uint32_t bits)1843     void setNext(int num, uint32_t bits)
1844     {
1845         DE_ASSERT((bits & (((uint64_t)1 << num) - 1)) == bits);
1846 
1847         if (num == 0 || m_ndx >= m_length)
1848             return;
1849 
1850         const int end             = m_ndx + num;
1851         const int numBitsToDst    = de::max(0, de::min(m_length, end) - m_ndx);
1852         const int low             = m_ndx;
1853         const int high            = m_ndx + numBitsToDst - 1;
1854         const uint32_t actualBits = getBits(bits, 0, numBitsToDst - 1);
1855 
1856         m_ndx += num;
1857 
1858         return m_forward ?
1859                    m_dst.setBits(m_startNdxInSrc + low, m_startNdxInSrc + high, actualBits) :
1860                    m_dst.setBits(m_startNdxInSrc - high, m_startNdxInSrc - low, reverseBits(actualBits, numBitsToDst));
1861     }
1862 
1863 private:
1864     AssignBlock128 &m_dst;
1865     const int m_startNdxInSrc;
1866     const int m_length;
1867     const bool m_forward;
1868 
1869     int m_ndx;
1870 };
1871 
1872 struct VoidExtentParams
1873 {
1874     DE_STATIC_ASSERT((de::meta::TypesSame<deFloat16, uint16_t>::Value));
1875     bool isHDR;
1876     uint16_t r;
1877     uint16_t g;
1878     uint16_t b;
1879     uint16_t a;
1880     // \note Currently extent coordinates are all set to all-ones.
1881 
VoidExtentParamstcu::astc::__anone0fc7f7b0111::VoidExtentParams1882     VoidExtentParams(bool isHDR_, uint16_t r_, uint16_t g_, uint16_t b_, uint16_t a_)
1883         : isHDR(isHDR_)
1884         , r(r_)
1885         , g(g_)
1886         , b(b_)
1887         , a(a_)
1888     {
1889     }
1890 };
1891 
generateVoidExtentBlock(const VoidExtentParams & params)1892 static AssignBlock128 generateVoidExtentBlock(const VoidExtentParams &params)
1893 {
1894     AssignBlock128 block;
1895 
1896     block.setBits(0, 8, 0x1fc); // \note Marks void-extent block.
1897     block.setBit(9, params.isHDR);
1898     block.setBits(10, 11, 3); // \note Spec shows that these bits are both set, although they serve no purpose.
1899 
1900     // Extent coordinates - currently all-ones.
1901     block.setBits(12, 24, 0x1fff);
1902     block.setBits(25, 37, 0x1fff);
1903     block.setBits(38, 50, 0x1fff);
1904     block.setBits(51, 63, 0x1fff);
1905 
1906     DE_ASSERT(!params.isHDR || (!isFloat16InfOrNan(params.r) && !isFloat16InfOrNan(params.g) &&
1907                                 !isFloat16InfOrNan(params.b) && !isFloat16InfOrNan(params.a)));
1908 
1909     block.setBits(64, 79, params.r);
1910     block.setBits(80, 95, params.g);
1911     block.setBits(96, 111, params.b);
1912     block.setBits(112, 127, params.a);
1913 
1914     return block;
1915 }
1916 
1917 // An input array of ISE inputs for an entire ASTC block. Can be given as either single values in the
1918 // range [0, maximumValueOfISERange] or as explicit block value specifications. The latter is needed
1919 // so we can test all possible values of T and Q in a block, since multiple T or Q values may map
1920 // to the same set of decoded values.
1921 struct ISEInput
1922 {
1923     struct Block
1924     {
1925         uint32_t tOrQValue; //!< The 8-bit T or 7-bit Q in a trit or quint ISE block.
1926         uint32_t bitValues[5];
1927     };
1928 
1929     bool isGivenInBlockForm;
1930     union
1931     {
1932         //!< \note 64 comes from the maximum number of weight values in an ASTC block.
1933         uint32_t plain[64];
1934         Block block[64];
1935     } value;
1936 
ISEInputtcu::astc::__anone0fc7f7b0111::ISEInput1937     ISEInput(void) : isGivenInBlockForm(false)
1938     {
1939     }
1940 };
1941 
computeISERangeMax(const ISEParams & iseParams)1942 static inline uint32_t computeISERangeMax(const ISEParams &iseParams)
1943 {
1944     switch (iseParams.mode)
1945     {
1946     case ISEMODE_TRIT:
1947         return (1u << iseParams.numBits) * 3 - 1;
1948     case ISEMODE_QUINT:
1949         return (1u << iseParams.numBits) * 5 - 1;
1950     case ISEMODE_PLAIN_BIT:
1951         return (1u << iseParams.numBits) - 1;
1952     default:
1953         DE_ASSERT(false);
1954         return -1;
1955     }
1956 }
1957 
1958 struct NormalBlockParams
1959 {
1960     int weightGridWidth;
1961     int weightGridHeight;
1962     ISEParams weightISEParams;
1963     bool isDualPlane;
1964     uint32_t ccs; //! \note Irrelevant if !isDualPlane.
1965     int numPartitions;
1966     uint32_t colorEndpointModes[4];
1967     // \note Below members are irrelevant if numPartitions == 1.
1968     bool isMultiPartSingleCemMode; //! \note If true, the single CEM is at colorEndpointModes[0].
1969     uint32_t partitionSeed;
1970 
NormalBlockParamstcu::astc::__anone0fc7f7b0111::NormalBlockParams1971     NormalBlockParams(void)
1972         : weightGridWidth(-1)
1973         , weightGridHeight(-1)
1974         , weightISEParams(ISEMODE_LAST, -1)
1975         , isDualPlane(true)
1976         , ccs((uint32_t)-1)
1977         , numPartitions(-1)
1978         , isMultiPartSingleCemMode(false)
1979         , partitionSeed((uint32_t)-1)
1980     {
1981         colorEndpointModes[0] = 0;
1982         colorEndpointModes[1] = 0;
1983         colorEndpointModes[2] = 0;
1984         colorEndpointModes[3] = 0;
1985     }
1986 };
1987 
1988 struct NormalBlockISEInputs
1989 {
1990     ISEInput weight;
1991     ISEInput endpoint;
1992 
NormalBlockISEInputstcu::astc::__anone0fc7f7b0111::NormalBlockISEInputs1993     NormalBlockISEInputs(void) : weight(), endpoint()
1994     {
1995     }
1996 };
1997 
computeNumWeights(const NormalBlockParams & params)1998 static inline int computeNumWeights(const NormalBlockParams &params)
1999 {
2000     return params.weightGridWidth * params.weightGridHeight * (params.isDualPlane ? 2 : 1);
2001 }
2002 
computeNumBitsForColorEndpoints(const NormalBlockParams & params)2003 static inline int computeNumBitsForColorEndpoints(const NormalBlockParams &params)
2004 {
2005     const int numWeightBits     = computeNumRequiredBits(params.weightISEParams, computeNumWeights(params));
2006     const int numConfigDataBits = (params.numPartitions == 1       ? 17 :
2007                                    params.isMultiPartSingleCemMode ? 29 :
2008                                                                      25 + 3 * params.numPartitions) +
2009                                   (params.isDualPlane ? 2 : 0);
2010 
2011     return 128 - numWeightBits - numConfigDataBits;
2012 }
2013 
computeNumColorEndpointValues(const uint32_t * endpointModes,int numPartitions,bool isMultiPartSingleCemMode)2014 static inline int computeNumColorEndpointValues(const uint32_t *endpointModes, int numPartitions,
2015                                                 bool isMultiPartSingleCemMode)
2016 {
2017     if (isMultiPartSingleCemMode)
2018         return numPartitions * computeNumColorEndpointValues(endpointModes[0]);
2019     else
2020     {
2021         int result = 0;
2022         for (int i = 0; i < numPartitions; i++)
2023             result += computeNumColorEndpointValues(endpointModes[i]);
2024         return result;
2025     }
2026 }
2027 
isValidBlockParams(const NormalBlockParams & params,int blockWidth,int blockHeight)2028 static inline bool isValidBlockParams(const NormalBlockParams &params, int blockWidth, int blockHeight)
2029 {
2030     const int numWeights             = computeNumWeights(params);
2031     const int numWeightBits          = computeNumRequiredBits(params.weightISEParams, numWeights);
2032     const int numColorEndpointValues = computeNumColorEndpointValues(
2033         &params.colorEndpointModes[0], params.numPartitions, params.isMultiPartSingleCemMode);
2034     const int numBitsForColorEndpoints = computeNumBitsForColorEndpoints(params);
2035 
2036     return numWeights <= 64 && de::inRange(numWeightBits, 24, 96) && params.weightGridWidth <= blockWidth &&
2037            params.weightGridHeight <= blockHeight && !(params.numPartitions == 4 && params.isDualPlane) &&
2038            numColorEndpointValues <= 18 && numBitsForColorEndpoints >= deDivRoundUp32(13 * numColorEndpointValues, 5);
2039 }
2040 
2041 // Write bits 0 to 10 of an ASTC block.
writeBlockMode(AssignBlock128 & dst,const NormalBlockParams & blockParams)2042 static void writeBlockMode(AssignBlock128 &dst, const NormalBlockParams &blockParams)
2043 {
2044     const uint32_t d = blockParams.isDualPlane != 0;
2045     // r and h initialized in switch below.
2046     uint32_t r;
2047     uint32_t h;
2048     // a, b and blockModeLayoutNdx initialized in block mode layout index detecting loop below.
2049     uint32_t a = (uint32_t)-1;
2050     uint32_t b = (uint32_t)-1;
2051     int blockModeLayoutNdx;
2052 
2053     // Find the values of r and h (ISE range).
2054     switch (computeISERangeMax(blockParams.weightISEParams))
2055     {
2056     case 1:
2057         r = 2;
2058         h = 0;
2059         break;
2060     case 2:
2061         r = 3;
2062         h = 0;
2063         break;
2064     case 3:
2065         r = 4;
2066         h = 0;
2067         break;
2068     case 4:
2069         r = 5;
2070         h = 0;
2071         break;
2072     case 5:
2073         r = 6;
2074         h = 0;
2075         break;
2076     case 7:
2077         r = 7;
2078         h = 0;
2079         break;
2080 
2081     case 9:
2082         r = 2;
2083         h = 1;
2084         break;
2085     case 11:
2086         r = 3;
2087         h = 1;
2088         break;
2089     case 15:
2090         r = 4;
2091         h = 1;
2092         break;
2093     case 19:
2094         r = 5;
2095         h = 1;
2096         break;
2097     case 23:
2098         r = 6;
2099         h = 1;
2100         break;
2101     case 31:
2102         r = 7;
2103         h = 1;
2104         break;
2105 
2106     default:
2107         DE_ASSERT(false);
2108         r = (uint32_t)-1;
2109         h = (uint32_t)-1;
2110     }
2111 
2112     // Find block mode layout index, i.e. appropriate row in the "2d block mode layout" table in ASTC spec.
2113 
2114     {
2115         enum BlockModeLayoutABVariable
2116         {
2117             Z = 0,
2118             A = 1,
2119             B = 2
2120         };
2121 
2122         static const struct BlockModeLayout
2123         {
2124             int aNumBits;
2125             int bNumBits;
2126             BlockModeLayoutABVariable gridWidthVariableTerm;
2127             int gridWidthConstantTerm;
2128             BlockModeLayoutABVariable gridHeightVariableTerm;
2129             int gridHeightConstantTerm;
2130         } blockModeLayouts[] = {{2, 2, B, 4, A, 2},  {2, 2, B, 8, A, 2},  {2, 2, A, 2, B, 8},  {2, 1, A, 2, B, 6},
2131                                 {2, 1, B, 2, A, 2},  {2, 0, Z, 12, A, 2}, {2, 0, A, 2, Z, 12}, {0, 0, Z, 6, Z, 10},
2132                                 {0, 0, Z, 10, Z, 6}, {2, 2, A, 6, B, 6}};
2133 
2134         for (blockModeLayoutNdx = 0; blockModeLayoutNdx < DE_LENGTH_OF_ARRAY(blockModeLayouts); blockModeLayoutNdx++)
2135         {
2136             const BlockModeLayout &layout   = blockModeLayouts[blockModeLayoutNdx];
2137             const int aMax                  = (1 << layout.aNumBits) - 1;
2138             const int bMax                  = (1 << layout.bNumBits) - 1;
2139             const int variableOffsetsMax[3] = {0, aMax, bMax};
2140             const int widthMin              = layout.gridWidthConstantTerm;
2141             const int heightMin             = layout.gridHeightConstantTerm;
2142             const int widthMax              = widthMin + variableOffsetsMax[layout.gridWidthVariableTerm];
2143             const int heightMax             = heightMin + variableOffsetsMax[layout.gridHeightVariableTerm];
2144 
2145             DE_ASSERT(layout.gridWidthVariableTerm != layout.gridHeightVariableTerm ||
2146                       layout.gridWidthVariableTerm == Z);
2147 
2148             if (de::inRange(blockParams.weightGridWidth, widthMin, widthMax) &&
2149                 de::inRange(blockParams.weightGridHeight, heightMin, heightMax))
2150             {
2151                 uint32_t defaultvalue    = 0;
2152                 uint32_t &widthVariable  = layout.gridWidthVariableTerm == A ? a :
2153                                            layout.gridWidthVariableTerm == B ? b :
2154                                                                                defaultvalue;
2155                 uint32_t &heightVariable = layout.gridHeightVariableTerm == A ? a :
2156                                            layout.gridHeightVariableTerm == B ? b :
2157                                                                                 defaultvalue;
2158 
2159                 widthVariable  = blockParams.weightGridWidth - layout.gridWidthConstantTerm;
2160                 heightVariable = blockParams.weightGridHeight - layout.gridHeightConstantTerm;
2161 
2162                 break;
2163             }
2164         }
2165     }
2166 
2167     // Set block mode bits.
2168 
2169     const uint32_t a0 = getBit(a, 0);
2170     const uint32_t a1 = getBit(a, 1);
2171     const uint32_t b0 = getBit(b, 0);
2172     const uint32_t b1 = getBit(b, 1);
2173     const uint32_t r0 = getBit(r, 0);
2174     const uint32_t r1 = getBit(r, 1);
2175     const uint32_t r2 = getBit(r, 2);
2176 
2177 #define SB(NDX, VAL) dst.setBit((NDX), (VAL))
2178 #define ASSIGN_BITS(B10, B9, B8, B7, B6, B5, B4, B3, B2, B1, B0) \
2179     do                                                           \
2180     {                                                            \
2181         SB(10, (B10));                                           \
2182         SB(9, (B9));                                             \
2183         SB(8, (B8));                                             \
2184         SB(7, (B7));                                             \
2185         SB(6, (B6));                                             \
2186         SB(5, (B5));                                             \
2187         SB(4, (B4));                                             \
2188         SB(3, (B3));                                             \
2189         SB(2, (B2));                                             \
2190         SB(1, (B1));                                             \
2191         SB(0, (B0));                                             \
2192     } while (false)
2193 
2194     switch (blockModeLayoutNdx)
2195     {
2196     case 0:
2197         ASSIGN_BITS(d, h, b1, b0, a1, a0, r0, 0, 0, r2, r1);
2198         break;
2199     case 1:
2200         ASSIGN_BITS(d, h, b1, b0, a1, a0, r0, 0, 1, r2, r1);
2201         break;
2202     case 2:
2203         ASSIGN_BITS(d, h, b1, b0, a1, a0, r0, 1, 0, r2, r1);
2204         break;
2205     case 3:
2206         ASSIGN_BITS(d, h, 0, b, a1, a0, r0, 1, 1, r2, r1);
2207         break;
2208     case 4:
2209         ASSIGN_BITS(d, h, 1, b, a1, a0, r0, 1, 1, r2, r1);
2210         break;
2211     case 5:
2212         ASSIGN_BITS(d, h, 0, 0, a1, a0, r0, r2, r1, 0, 0);
2213         break;
2214     case 6:
2215         ASSIGN_BITS(d, h, 0, 1, a1, a0, r0, r2, r1, 0, 0);
2216         break;
2217     case 7:
2218         ASSIGN_BITS(d, h, 1, 1, 0, 0, r0, r2, r1, 0, 0);
2219         break;
2220     case 8:
2221         ASSIGN_BITS(d, h, 1, 1, 0, 1, r0, r2, r1, 0, 0);
2222         break;
2223     case 9:
2224         ASSIGN_BITS(b1, b0, 1, 0, a1, a0, r0, r2, r1, 0, 0);
2225         DE_ASSERT(d == 0 && h == 0);
2226         break;
2227     default:
2228         DE_ASSERT(false);
2229     }
2230 
2231 #undef ASSIGN_BITS
2232 #undef SB
2233 }
2234 
2235 // Write color endpoint mode data of an ASTC block.
writeColorEndpointModes(AssignBlock128 & dst,const uint32_t * colorEndpointModes,bool isMultiPartSingleCemMode,int numPartitions,int extraCemBitsStart)2236 static void writeColorEndpointModes(AssignBlock128 &dst, const uint32_t *colorEndpointModes,
2237                                     bool isMultiPartSingleCemMode, int numPartitions, int extraCemBitsStart)
2238 {
2239     if (numPartitions == 1)
2240         dst.setBits(13, 16, colorEndpointModes[0]);
2241     else
2242     {
2243         if (isMultiPartSingleCemMode)
2244         {
2245             dst.setBits(23, 24, 0);
2246             dst.setBits(25, 28, colorEndpointModes[0]);
2247         }
2248         else
2249         {
2250             DE_ASSERT(numPartitions > 0);
2251             const uint32_t minCem      = *std::min_element(&colorEndpointModes[0], &colorEndpointModes[numPartitions]);
2252             const uint32_t maxCem      = *std::max_element(&colorEndpointModes[0], &colorEndpointModes[numPartitions]);
2253             const uint32_t minCemClass = minCem / 4;
2254             const uint32_t maxCemClass = maxCem / 4;
2255             DE_ASSERT(maxCemClass - minCemClass <= 1);
2256             DE_UNREF(minCemClass); // \note For non-debug builds.
2257             const uint32_t highLevelSelector = de::max(1u, maxCemClass);
2258 
2259             dst.setBits(23, 24, highLevelSelector);
2260 
2261             for (int partNdx = 0; partNdx < numPartitions; partNdx++)
2262             {
2263                 const uint32_t c           = colorEndpointModes[partNdx] / 4 == highLevelSelector ? 1 : 0;
2264                 const uint32_t m           = colorEndpointModes[partNdx] % 4;
2265                 const uint32_t lowMBit0Ndx = numPartitions + 2 * partNdx;
2266                 const uint32_t lowMBit1Ndx = numPartitions + 2 * partNdx + 1;
2267                 dst.setBit(25 + partNdx, c);
2268                 dst.setBit(lowMBit0Ndx < 4 ? 25 + lowMBit0Ndx : extraCemBitsStart + lowMBit0Ndx - 4, getBit(m, 0));
2269                 dst.setBit(lowMBit1Ndx < 4 ? 25 + lowMBit1Ndx : extraCemBitsStart + lowMBit1Ndx - 4, getBit(m, 1));
2270             }
2271         }
2272     }
2273 }
2274 
encodeISETritBlock(BitAssignAccessStream & dst,int numBits,bool fromExplicitInputBlock,const ISEInput::Block & blockInput,const uint32_t * nonBlockInput,int numValues)2275 static void encodeISETritBlock(BitAssignAccessStream &dst, int numBits, bool fromExplicitInputBlock,
2276                                const ISEInput::Block &blockInput, const uint32_t *nonBlockInput, int numValues)
2277 {
2278     // tritBlockTValue[t0][t1][t2][t3][t4] is a value of T (not necessarily the only one) that will yield the given trits when decoded.
2279     static const uint32_t tritBlockTValue[3][3][3][3][3] = {{{{{0, 128, 96}, {32, 160, 224}, {64, 192, 28}},
2280                                                               {{16, 144, 112}, {48, 176, 240}, {80, 208, 156}},
2281                                                               {{3, 131, 99}, {35, 163, 227}, {67, 195, 31}}},
2282                                                              {{{4, 132, 100}, {36, 164, 228}, {68, 196, 60}},
2283                                                               {{20, 148, 116}, {52, 180, 244}, {84, 212, 188}},
2284                                                               {{19, 147, 115}, {51, 179, 243}, {83, 211, 159}}},
2285                                                              {{{8, 136, 104}, {40, 168, 232}, {72, 200, 92}},
2286                                                               {{24, 152, 120}, {56, 184, 248}, {88, 216, 220}},
2287                                                               {{12, 140, 108}, {44, 172, 236}, {76, 204, 124}}}},
2288                                                             {{{{1, 129, 97}, {33, 161, 225}, {65, 193, 29}},
2289                                                               {{17, 145, 113}, {49, 177, 241}, {81, 209, 157}},
2290                                                               {{7, 135, 103}, {39, 167, 231}, {71, 199, 63}}},
2291                                                              {{{5, 133, 101}, {37, 165, 229}, {69, 197, 61}},
2292                                                               {{21, 149, 117}, {53, 181, 245}, {85, 213, 189}},
2293                                                               {{23, 151, 119}, {55, 183, 247}, {87, 215, 191}}},
2294                                                              {{{9, 137, 105}, {41, 169, 233}, {73, 201, 93}},
2295                                                               {{25, 153, 121}, {57, 185, 249}, {89, 217, 221}},
2296                                                               {{13, 141, 109}, {45, 173, 237}, {77, 205, 125}}}},
2297                                                             {{{{2, 130, 98}, {34, 162, 226}, {66, 194, 30}},
2298                                                               {{18, 146, 114}, {50, 178, 242}, {82, 210, 158}},
2299                                                               {{11, 139, 107}, {43, 171, 235}, {75, 203, 95}}},
2300                                                              {{{6, 134, 102}, {38, 166, 230}, {70, 198, 62}},
2301                                                               {{22, 150, 118}, {54, 182, 246}, {86, 214, 190}},
2302                                                               {{27, 155, 123}, {59, 187, 251}, {91, 219, 223}}},
2303                                                              {{{10, 138, 106}, {42, 170, 234}, {74, 202, 94}},
2304                                                               {{26, 154, 122}, {58, 186, 250}, {90, 218, 222}},
2305                                                               {{14, 142, 110}, {46, 174, 238}, {78, 206, 126}}}}};
2306 
2307     DE_ASSERT(de::inRange(numValues, 1, 5));
2308 
2309     uint32_t tritParts[5];
2310     uint32_t bitParts[5];
2311 
2312     for (int i = 0; i < 5; i++)
2313     {
2314         if (i < numValues)
2315         {
2316             if (fromExplicitInputBlock)
2317             {
2318                 bitParts[i]  = blockInput.bitValues[i];
2319                 tritParts[i] = -1; // \note Won't be used, but silences warning.
2320             }
2321             else
2322             {
2323                 // \todo [2016-01-20 pyry] numBits = 0 doesn't make sense
2324                 bitParts[i]  = numBits > 0 ? getBits(nonBlockInput[i], 0, numBits - 1) : 0;
2325                 tritParts[i] = nonBlockInput[i] >> numBits;
2326             }
2327         }
2328         else
2329         {
2330             bitParts[i]  = 0;
2331             tritParts[i] = 0;
2332         }
2333     }
2334 
2335     const uint32_t T = fromExplicitInputBlock ?
2336                            blockInput.tOrQValue :
2337                            tritBlockTValue[tritParts[0]][tritParts[1]][tritParts[2]][tritParts[3]][tritParts[4]];
2338 
2339     dst.setNext(numBits, bitParts[0]);
2340     dst.setNext(2, getBits(T, 0, 1));
2341     dst.setNext(numBits, bitParts[1]);
2342     dst.setNext(2, getBits(T, 2, 3));
2343     dst.setNext(numBits, bitParts[2]);
2344     dst.setNext(1, getBit(T, 4));
2345     dst.setNext(numBits, bitParts[3]);
2346     dst.setNext(2, getBits(T, 5, 6));
2347     dst.setNext(numBits, bitParts[4]);
2348     dst.setNext(1, getBit(T, 7));
2349 }
2350 
encodeISEQuintBlock(BitAssignAccessStream & dst,int numBits,bool fromExplicitInputBlock,const ISEInput::Block & blockInput,const uint32_t * nonBlockInput,int numValues)2351 static void encodeISEQuintBlock(BitAssignAccessStream &dst, int numBits, bool fromExplicitInputBlock,
2352                                 const ISEInput::Block &blockInput, const uint32_t *nonBlockInput, int numValues)
2353 {
2354     // quintBlockQValue[q0][q1][q2] is a value of Q (not necessarily the only one) that will yield the given quints when decoded.
2355     static const uint32_t quintBlockQValue[5][5][5] = {{{0, 32, 64, 96, 102},
2356                                                         {8, 40, 72, 104, 110},
2357                                                         {16, 48, 80, 112, 118},
2358                                                         {24, 56, 88, 120, 126},
2359                                                         {5, 37, 69, 101, 39}},
2360                                                        {{1, 33, 65, 97, 103},
2361                                                         {9, 41, 73, 105, 111},
2362                                                         {17, 49, 81, 113, 119},
2363                                                         {25, 57, 89, 121, 127},
2364                                                         {13, 45, 77, 109, 47}},
2365                                                        {{2, 34, 66, 98, 70},
2366                                                         {10, 42, 74, 106, 78},
2367                                                         {18, 50, 82, 114, 86},
2368                                                         {26, 58, 90, 122, 94},
2369                                                         {21, 53, 85, 117, 55}},
2370                                                        {{3, 35, 67, 99, 71},
2371                                                         {11, 43, 75, 107, 79},
2372                                                         {19, 51, 83, 115, 87},
2373                                                         {27, 59, 91, 123, 95},
2374                                                         {29, 61, 93, 125, 63}},
2375                                                        {{4, 36, 68, 100, 38},
2376                                                         {12, 44, 76, 108, 46},
2377                                                         {20, 52, 84, 116, 54},
2378                                                         {28, 60, 92, 124, 62},
2379                                                         {6, 14, 22, 30, 7}}};
2380 
2381     DE_ASSERT(de::inRange(numValues, 1, 3));
2382 
2383     uint32_t quintParts[3];
2384     uint32_t bitParts[3];
2385 
2386     for (int i = 0; i < 3; i++)
2387     {
2388         if (i < numValues)
2389         {
2390             if (fromExplicitInputBlock)
2391             {
2392                 bitParts[i]   = blockInput.bitValues[i];
2393                 quintParts[i] = -1; // \note Won't be used, but silences warning.
2394             }
2395             else
2396             {
2397                 // \todo [2016-01-20 pyry] numBits = 0 doesn't make sense
2398                 bitParts[i]   = numBits > 0 ? getBits(nonBlockInput[i], 0, numBits - 1) : 0;
2399                 quintParts[i] = nonBlockInput[i] >> numBits;
2400             }
2401         }
2402         else
2403         {
2404             bitParts[i]   = 0;
2405             quintParts[i] = 0;
2406         }
2407     }
2408 
2409     const uint32_t Q =
2410         fromExplicitInputBlock ? blockInput.tOrQValue : quintBlockQValue[quintParts[0]][quintParts[1]][quintParts[2]];
2411 
2412     dst.setNext(numBits, bitParts[0]);
2413     dst.setNext(3, getBits(Q, 0, 2));
2414     dst.setNext(numBits, bitParts[1]);
2415     dst.setNext(2, getBits(Q, 3, 4));
2416     dst.setNext(numBits, bitParts[2]);
2417     dst.setNext(2, getBits(Q, 5, 6));
2418 }
2419 
encodeISEBitBlock(BitAssignAccessStream & dst,int numBits,uint32_t value)2420 static void encodeISEBitBlock(BitAssignAccessStream &dst, int numBits, uint32_t value)
2421 {
2422     DE_ASSERT(de::inRange(value, 0u, (1u << numBits) - 1));
2423     dst.setNext(numBits, value);
2424 }
2425 
encodeISE(BitAssignAccessStream & dst,const ISEParams & params,const ISEInput & input,int numValues)2426 static void encodeISE(BitAssignAccessStream &dst, const ISEParams &params, const ISEInput &input, int numValues)
2427 {
2428     if (params.mode == ISEMODE_TRIT)
2429     {
2430         const int numBlocks = deDivRoundUp32(numValues, 5);
2431         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2432         {
2433             const int numValuesInBlock = blockNdx == numBlocks - 1 ? numValues - 5 * (numBlocks - 1) : 5;
2434             encodeISETritBlock(dst, params.numBits, input.isGivenInBlockForm,
2435                                input.isGivenInBlockForm ? input.value.block[blockNdx] : ISEInput::Block(),
2436                                input.isGivenInBlockForm ? DE_NULL : &input.value.plain[5 * blockNdx], numValuesInBlock);
2437         }
2438     }
2439     else if (params.mode == ISEMODE_QUINT)
2440     {
2441         const int numBlocks = deDivRoundUp32(numValues, 3);
2442         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2443         {
2444             const int numValuesInBlock = blockNdx == numBlocks - 1 ? numValues - 3 * (numBlocks - 1) : 3;
2445             encodeISEQuintBlock(dst, params.numBits, input.isGivenInBlockForm,
2446                                 input.isGivenInBlockForm ? input.value.block[blockNdx] : ISEInput::Block(),
2447                                 input.isGivenInBlockForm ? DE_NULL : &input.value.plain[3 * blockNdx],
2448                                 numValuesInBlock);
2449         }
2450     }
2451     else
2452     {
2453         DE_ASSERT(params.mode == ISEMODE_PLAIN_BIT);
2454         for (int i = 0; i < numValues; i++)
2455             encodeISEBitBlock(dst, params.numBits,
2456                               input.isGivenInBlockForm ? input.value.block[i].bitValues[0] : input.value.plain[i]);
2457     }
2458 }
2459 
writeWeightData(AssignBlock128 & dst,const ISEParams & iseParams,const ISEInput & input,int numWeights)2460 static void writeWeightData(AssignBlock128 &dst, const ISEParams &iseParams, const ISEInput &input, int numWeights)
2461 {
2462     const int numWeightBits = computeNumRequiredBits(iseParams, numWeights);
2463     BitAssignAccessStream access(dst, 127, numWeightBits, false);
2464     encodeISE(access, iseParams, input, numWeights);
2465 }
2466 
writeColorEndpointData(AssignBlock128 & dst,const ISEParams & iseParams,const ISEInput & input,int numEndpoints,int numBitsForColorEndpoints,int colorEndpointDataStartNdx)2467 static void writeColorEndpointData(AssignBlock128 &dst, const ISEParams &iseParams, const ISEInput &input,
2468                                    int numEndpoints, int numBitsForColorEndpoints, int colorEndpointDataStartNdx)
2469 {
2470     BitAssignAccessStream access(dst, colorEndpointDataStartNdx, numBitsForColorEndpoints, true);
2471     encodeISE(access, iseParams, input, numEndpoints);
2472 }
2473 
generateNormalBlock(const NormalBlockParams & blockParams,int blockWidth,int blockHeight,const NormalBlockISEInputs & iseInputs)2474 static AssignBlock128 generateNormalBlock(const NormalBlockParams &blockParams, int blockWidth, int blockHeight,
2475                                           const NormalBlockISEInputs &iseInputs)
2476 {
2477     DE_ASSERT(isValidBlockParams(blockParams, blockWidth, blockHeight));
2478     DE_UNREF(blockWidth);  // \note For non-debug builds.
2479     DE_UNREF(blockHeight); // \note For non-debug builds.
2480 
2481     AssignBlock128 block;
2482     const int numWeights    = computeNumWeights(blockParams);
2483     const int numWeightBits = computeNumRequiredBits(blockParams.weightISEParams, numWeights);
2484 
2485     writeBlockMode(block, blockParams);
2486 
2487     block.setBits(11, 12, blockParams.numPartitions - 1);
2488     if (blockParams.numPartitions > 1)
2489         block.setBits(13, 22, blockParams.partitionSeed);
2490 
2491     {
2492         const int extraCemBitsStart = 127 - numWeightBits -
2493                                       (blockParams.numPartitions == 1 || blockParams.isMultiPartSingleCemMode ? -1 :
2494                                        blockParams.numPartitions == 4                                         ? 7 :
2495                                        blockParams.numPartitions == 3                                         ? 4 :
2496                                        blockParams.numPartitions == 2                                         ? 1 :
2497                                                                                                                 0);
2498 
2499         writeColorEndpointModes(block, &blockParams.colorEndpointModes[0], blockParams.isMultiPartSingleCemMode,
2500                                 blockParams.numPartitions, extraCemBitsStart);
2501 
2502         if (blockParams.isDualPlane)
2503             block.setBits(extraCemBitsStart - 2, extraCemBitsStart - 1, blockParams.ccs);
2504     }
2505 
2506     writeWeightData(block, blockParams.weightISEParams, iseInputs.weight, numWeights);
2507 
2508     {
2509         const int numColorEndpointValues = computeNumColorEndpointValues(
2510             &blockParams.colorEndpointModes[0], blockParams.numPartitions, blockParams.isMultiPartSingleCemMode);
2511         const int numBitsForColorEndpoints  = computeNumBitsForColorEndpoints(blockParams);
2512         const int colorEndpointDataStartNdx = blockParams.numPartitions == 1 ? 17 : 29;
2513         const ISEParams &colorEndpointISEParams =
2514             computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues);
2515 
2516         writeColorEndpointData(block, colorEndpointISEParams, iseInputs.endpoint, numColorEndpointValues,
2517                                numBitsForColorEndpoints, colorEndpointDataStartNdx);
2518     }
2519 
2520     return block;
2521 }
2522 
2523 // Generate default ISE inputs for weight and endpoint data - gradient-ish values.
generateDefaultISEInputs(const NormalBlockParams & blockParams)2524 static NormalBlockISEInputs generateDefaultISEInputs(const NormalBlockParams &blockParams)
2525 {
2526     NormalBlockISEInputs result;
2527 
2528     {
2529         result.weight.isGivenInBlockForm = false;
2530 
2531         const int numWeights     = computeNumWeights(blockParams);
2532         const int weightRangeMax = computeISERangeMax(blockParams.weightISEParams);
2533 
2534         if (blockParams.isDualPlane)
2535         {
2536             for (int i = 0; i < numWeights; i += 2)
2537                 result.weight.value.plain[i] = (i * weightRangeMax + (numWeights - 1) / 2) / (numWeights - 1);
2538 
2539             for (int i = 1; i < numWeights; i += 2)
2540                 result.weight.value.plain[i] =
2541                     weightRangeMax - (i * weightRangeMax + (numWeights - 1) / 2) / (numWeights - 1);
2542         }
2543         else
2544         {
2545             for (int i = 0; i < numWeights; i++)
2546                 result.weight.value.plain[i] = (i * weightRangeMax + (numWeights - 1) / 2) / (numWeights - 1);
2547         }
2548     }
2549 
2550     {
2551         result.endpoint.isGivenInBlockForm = false;
2552 
2553         const int numColorEndpointValues = computeNumColorEndpointValues(
2554             &blockParams.colorEndpointModes[0], blockParams.numPartitions, blockParams.isMultiPartSingleCemMode);
2555         const int numBitsForColorEndpoints = computeNumBitsForColorEndpoints(blockParams);
2556         const ISEParams &colorEndpointISEParams =
2557             computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues);
2558         const int colorEndpointRangeMax = computeISERangeMax(colorEndpointISEParams);
2559 
2560         for (int i = 0; i < numColorEndpointValues; i++)
2561             result.endpoint.value.plain[i] =
2562                 (i * colorEndpointRangeMax + (numColorEndpointValues - 1) / 2) / (numColorEndpointValues - 1);
2563     }
2564 
2565     return result;
2566 }
2567 
2568 static const ISEParams s_weightISEParamsCandidates[] = {
2569     ISEParams(ISEMODE_PLAIN_BIT, 1), ISEParams(ISEMODE_TRIT, 0), ISEParams(ISEMODE_PLAIN_BIT, 2),
2570     ISEParams(ISEMODE_QUINT, 0),     ISEParams(ISEMODE_TRIT, 1), ISEParams(ISEMODE_PLAIN_BIT, 3),
2571     ISEParams(ISEMODE_QUINT, 1),     ISEParams(ISEMODE_TRIT, 2), ISEParams(ISEMODE_PLAIN_BIT, 4),
2572     ISEParams(ISEMODE_QUINT, 2),     ISEParams(ISEMODE_TRIT, 3), ISEParams(ISEMODE_PLAIN_BIT, 5)};
2573 
generateRandomBlock(uint8_t * dst,const IVec3 & blockSize,de::Random & rnd)2574 void generateRandomBlock(uint8_t *dst, const IVec3 &blockSize, de::Random &rnd)
2575 {
2576     DE_ASSERT(blockSize.z() == 1);
2577 
2578     if (rnd.getFloat() < 0.1f)
2579     {
2580         // Void extent block.
2581         const bool isVoidExtentHDR = rnd.getBool();
2582         const uint16_t r = isVoidExtentHDR ? deFloat32To16(rnd.getFloat(0.0f, 1.0f)) : (uint16_t)rnd.getInt(0, 0xffff);
2583         const uint16_t g = isVoidExtentHDR ? deFloat32To16(rnd.getFloat(0.0f, 1.0f)) : (uint16_t)rnd.getInt(0, 0xffff);
2584         const uint16_t b = isVoidExtentHDR ? deFloat32To16(rnd.getFloat(0.0f, 1.0f)) : (uint16_t)rnd.getInt(0, 0xffff);
2585         const uint16_t a = isVoidExtentHDR ? deFloat32To16(rnd.getFloat(0.0f, 1.0f)) : (uint16_t)rnd.getInt(0, 0xffff);
2586         generateVoidExtentBlock(VoidExtentParams(isVoidExtentHDR, r, g, b, a)).assignToMemory(dst);
2587     }
2588     else
2589     {
2590         // Not void extent block.
2591 
2592         // Generate block params.
2593 
2594         NormalBlockParams blockParams;
2595 
2596         do
2597         {
2598             blockParams.weightGridWidth  = rnd.getInt(2, blockSize.x());
2599             blockParams.weightGridHeight = rnd.getInt(2, blockSize.y());
2600             blockParams.weightISEParams =
2601                 s_weightISEParamsCandidates[rnd.getInt(0, DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates) - 1)];
2602             blockParams.numPartitions            = rnd.getInt(1, 4);
2603             blockParams.isMultiPartSingleCemMode = rnd.getFloat() < 0.25f;
2604             blockParams.isDualPlane              = blockParams.numPartitions != 4 && rnd.getBool();
2605             blockParams.ccs                      = rnd.getInt(0, 3);
2606             blockParams.partitionSeed            = rnd.getInt(0, 1023);
2607 
2608             blockParams.colorEndpointModes[0] = rnd.getInt(0, 15);
2609 
2610             {
2611                 const int cemDiff = blockParams.isMultiPartSingleCemMode    ? 0 :
2612                                     blockParams.colorEndpointModes[0] == 0  ? 1 :
2613                                     blockParams.colorEndpointModes[0] == 15 ? -1 :
2614                                     rnd.getBool()                           ? 1 :
2615                                                                               -1;
2616 
2617                 for (int i = 1; i < blockParams.numPartitions; i++)
2618                     blockParams.colorEndpointModes[i] =
2619                         blockParams.colorEndpointModes[0] + (cemDiff == -1 ? rnd.getInt(-1, 0) :
2620                                                              cemDiff == 1  ? rnd.getInt(0, 1) :
2621                                                                              0);
2622             }
2623         } while (!isValidBlockParams(blockParams, blockSize.x(), blockSize.y()));
2624 
2625         // Generate ISE inputs for both weight and endpoint data.
2626 
2627         NormalBlockISEInputs iseInputs;
2628 
2629         for (int weightOrEndpoints = 0; weightOrEndpoints <= 1; weightOrEndpoints++)
2630         {
2631             const bool setWeights = weightOrEndpoints == 0;
2632             const int numValues   = setWeights ? computeNumWeights(blockParams) :
2633                                                  computeNumColorEndpointValues(&blockParams.colorEndpointModes[0],
2634                                                                                blockParams.numPartitions,
2635                                                                                blockParams.isMultiPartSingleCemMode);
2636             const ISEParams iseParams =
2637                 setWeights ? blockParams.weightISEParams :
2638                              computeMaximumRangeISEParams(computeNumBitsForColorEndpoints(blockParams), numValues);
2639             ISEInput &iseInput = setWeights ? iseInputs.weight : iseInputs.endpoint;
2640 
2641             iseInput.isGivenInBlockForm = rnd.getBool();
2642 
2643             if (iseInput.isGivenInBlockForm)
2644             {
2645                 const int numValuesPerISEBlock = iseParams.mode == ISEMODE_TRIT  ? 5 :
2646                                                  iseParams.mode == ISEMODE_QUINT ? 3 :
2647                                                                                    1;
2648                 const int iseBitMax            = (1 << iseParams.numBits) - 1;
2649                 const int numISEBlocks         = deDivRoundUp32(numValues, numValuesPerISEBlock);
2650 
2651                 for (int iseBlockNdx = 0; iseBlockNdx < numISEBlocks; iseBlockNdx++)
2652                 {
2653                     iseInput.value.block[iseBlockNdx].tOrQValue = rnd.getInt(0, 255);
2654                     for (int i = 0; i < numValuesPerISEBlock; i++)
2655                         iseInput.value.block[iseBlockNdx].bitValues[i] = rnd.getInt(0, iseBitMax);
2656                 }
2657             }
2658             else
2659             {
2660                 const int rangeMax = computeISERangeMax(iseParams);
2661 
2662                 for (int valueNdx = 0; valueNdx < numValues; valueNdx++)
2663                     iseInput.value.plain[valueNdx] = rnd.getInt(0, rangeMax);
2664             }
2665         }
2666 
2667         generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs).assignToMemory(dst);
2668     }
2669 }
2670 
2671 } // namespace
2672 
2673 // Generate block data for a given BlockTestType and format.
generateBlockCaseTestData(vector<uint8_t> & dst,CompressedTexFormat format,BlockTestType testType)2674 void generateBlockCaseTestData(vector<uint8_t> &dst, CompressedTexFormat format, BlockTestType testType)
2675 {
2676     DE_ASSERT(isAstcFormat(format));
2677     DE_ASSERT(!(isAstcSRGBFormat(format) && isBlockTestTypeHDROnly(testType)));
2678 
2679     const IVec3 blockSize = getBlockPixelSize(format);
2680     DE_ASSERT(blockSize.z() == 1);
2681 
2682     switch (testType)
2683     {
2684     case BLOCK_TEST_TYPE_VOID_EXTENT_LDR:
2685         // Generate a gradient-like set of LDR void-extent blocks.
2686         {
2687             const int numBlocks      = 1 << 13;
2688             const uint32_t numValues = 1 << 16;
2689             dst.reserve(numBlocks * BLOCK_SIZE_BYTES);
2690 
2691             for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2692             {
2693                 const uint32_t baseValue = blockNdx * (numValues - 1) / (numBlocks - 1);
2694                 const uint16_t r         = (uint16_t)((baseValue + numValues * 0 / 4) % numValues);
2695                 const uint16_t g         = (uint16_t)((baseValue + numValues * 1 / 4) % numValues);
2696                 const uint16_t b         = (uint16_t)((baseValue + numValues * 2 / 4) % numValues);
2697                 const uint16_t a         = (uint16_t)((baseValue + numValues * 3 / 4) % numValues);
2698                 AssignBlock128 block;
2699 
2700                 generateVoidExtentBlock(VoidExtentParams(false, r, g, b, a)).pushBytesToVector(dst);
2701             }
2702 
2703             break;
2704         }
2705 
2706     case BLOCK_TEST_TYPE_VOID_EXTENT_HDR:
2707         // Generate a gradient-like set of HDR void-extent blocks, with values ranging from the largest finite negative to largest finite positive of fp16.
2708         {
2709             const float minValue = -65504.0f;
2710             const float maxValue = +65504.0f;
2711             const int numBlocks  = 1 << 13;
2712             dst.reserve(numBlocks * BLOCK_SIZE_BYTES);
2713 
2714             for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2715             {
2716                 const int rNdx = (blockNdx + numBlocks * 0 / 4) % numBlocks;
2717                 const int gNdx = (blockNdx + numBlocks * 1 / 4) % numBlocks;
2718                 const int bNdx = (blockNdx + numBlocks * 2 / 4) % numBlocks;
2719                 const int aNdx = (blockNdx + numBlocks * 3 / 4) % numBlocks;
2720                 const deFloat16 r =
2721                     deFloat32To16(minValue + (float)rNdx * (maxValue - minValue) / (float)(numBlocks - 1));
2722                 const deFloat16 g =
2723                     deFloat32To16(minValue + (float)gNdx * (maxValue - minValue) / (float)(numBlocks - 1));
2724                 const deFloat16 b =
2725                     deFloat32To16(minValue + (float)bNdx * (maxValue - minValue) / (float)(numBlocks - 1));
2726                 const deFloat16 a =
2727                     deFloat32To16(minValue + (float)aNdx * (maxValue - minValue) / (float)(numBlocks - 1));
2728 
2729                 generateVoidExtentBlock(VoidExtentParams(true, r, g, b, a)).pushBytesToVector(dst);
2730             }
2731 
2732             break;
2733         }
2734 
2735     case BLOCK_TEST_TYPE_WEIGHT_GRID:
2736         // Generate different combinations of plane count, weight ISE params, and grid size.
2737         {
2738             for (int isDualPlane = 0; isDualPlane <= 1; isDualPlane++)
2739                 for (int iseParamsNdx = 0; iseParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates);
2740                      iseParamsNdx++)
2741                     for (int weightGridWidth = 2; weightGridWidth <= 12; weightGridWidth++)
2742                         for (int weightGridHeight = 2; weightGridHeight <= 12; weightGridHeight++)
2743                         {
2744                             NormalBlockParams blockParams;
2745                             NormalBlockISEInputs iseInputs;
2746 
2747                             blockParams.weightGridWidth       = weightGridWidth;
2748                             blockParams.weightGridHeight      = weightGridHeight;
2749                             blockParams.isDualPlane           = isDualPlane != 0;
2750                             blockParams.weightISEParams       = s_weightISEParamsCandidates[iseParamsNdx];
2751                             blockParams.ccs                   = 0;
2752                             blockParams.numPartitions         = 1;
2753                             blockParams.colorEndpointModes[0] = 0;
2754 
2755                             if (isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
2756                                 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
2757                                                     generateDefaultISEInputs(blockParams))
2758                                     .pushBytesToVector(dst);
2759                         }
2760 
2761             break;
2762         }
2763 
2764     case BLOCK_TEST_TYPE_WEIGHT_ISE:
2765         // For each weight ISE param set, generate blocks that cover:
2766         // - each single value of the ISE's range, at each position inside an ISE block
2767         // - for trit and quint ISEs, each single T or Q value of an ISE block
2768         {
2769             for (int iseParamsNdx = 0; iseParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates); iseParamsNdx++)
2770             {
2771                 const ISEParams &iseParams = s_weightISEParamsCandidates[iseParamsNdx];
2772                 NormalBlockParams blockParams;
2773 
2774                 blockParams.weightGridWidth  = 4;
2775                 blockParams.weightGridHeight = 4;
2776                 blockParams.weightISEParams  = iseParams;
2777                 blockParams.numPartitions    = 1;
2778                 blockParams.isDualPlane =
2779                     blockParams.weightGridWidth * blockParams.weightGridHeight < 24 ? true : false;
2780                 blockParams.ccs                   = 0;
2781                 blockParams.colorEndpointModes[0] = 0;
2782 
2783                 while (!isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
2784                 {
2785                     blockParams.weightGridWidth--;
2786                     blockParams.weightGridHeight--;
2787                 }
2788 
2789                 const int numValuesInISEBlock = iseParams.mode == ISEMODE_TRIT  ? 5 :
2790                                                 iseParams.mode == ISEMODE_QUINT ? 3 :
2791                                                                                   1;
2792                 const int numWeights          = computeNumWeights(blockParams);
2793 
2794                 {
2795                     const int numWeightValues           = (int)computeISERangeMax(iseParams) + 1;
2796                     const int numBlocks                 = deDivRoundUp32(numWeightValues, numWeights);
2797                     NormalBlockISEInputs iseInputs      = generateDefaultISEInputs(blockParams);
2798                     iseInputs.weight.isGivenInBlockForm = false;
2799 
2800                     for (int offset = 0; offset < numValuesInISEBlock; offset++)
2801                         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2802                         {
2803                             for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
2804                                 iseInputs.weight.value.plain[weightNdx] =
2805                                     (blockNdx * numWeights + weightNdx + offset) % numWeightValues;
2806 
2807                             generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs)
2808                                 .pushBytesToVector(dst);
2809                         }
2810                 }
2811 
2812                 if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
2813                 {
2814                     NormalBlockISEInputs iseInputs      = generateDefaultISEInputs(blockParams);
2815                     iseInputs.weight.isGivenInBlockForm = true;
2816 
2817                     const int numTQValues          = 1 << (iseParams.mode == ISEMODE_TRIT ? 8 : 7);
2818                     const int numISEBlocksPerBlock = deDivRoundUp32(numWeights, numValuesInISEBlock);
2819                     const int numBlocks            = deDivRoundUp32(numTQValues, numISEBlocksPerBlock);
2820 
2821                     for (int offset = 0; offset < numValuesInISEBlock; offset++)
2822                         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2823                         {
2824                             for (int iseBlockNdx = 0; iseBlockNdx < numISEBlocksPerBlock; iseBlockNdx++)
2825                             {
2826                                 for (int i = 0; i < numValuesInISEBlock; i++)
2827                                     iseInputs.weight.value.block[iseBlockNdx].bitValues[i] = 0;
2828                                 iseInputs.weight.value.block[iseBlockNdx].tOrQValue =
2829                                     (blockNdx * numISEBlocksPerBlock + iseBlockNdx + offset) % numTQValues;
2830                             }
2831 
2832                             generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs)
2833                                 .pushBytesToVector(dst);
2834                         }
2835                 }
2836             }
2837 
2838             break;
2839         }
2840 
2841     case BLOCK_TEST_TYPE_CEMS:
2842         // For each plane count & partition count combination, generate all color endpoint mode combinations.
2843         {
2844             for (int isDualPlane = 0; isDualPlane <= 1; isDualPlane++)
2845                 for (int numPartitions = 1; numPartitions <= (isDualPlane != 0 ? 3 : 4); numPartitions++)
2846                 {
2847                     // Multi-partition, single-CEM mode.
2848                     if (numPartitions > 1)
2849                     {
2850                         for (uint32_t singleCem = 0; singleCem < 16; singleCem++)
2851                         {
2852                             NormalBlockParams blockParams;
2853                             blockParams.weightGridWidth          = 4;
2854                             blockParams.weightGridHeight         = 4;
2855                             blockParams.isDualPlane              = isDualPlane != 0;
2856                             blockParams.ccs                      = 0;
2857                             blockParams.numPartitions            = numPartitions;
2858                             blockParams.isMultiPartSingleCemMode = true;
2859                             blockParams.colorEndpointModes[0]    = singleCem;
2860                             blockParams.partitionSeed            = 634;
2861 
2862                             for (int iseParamsNdx = 0; iseParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates);
2863                                  iseParamsNdx++)
2864                             {
2865                                 blockParams.weightISEParams = s_weightISEParamsCandidates[iseParamsNdx];
2866                                 if (isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
2867                                 {
2868                                     generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
2869                                                         generateDefaultISEInputs(blockParams))
2870                                         .pushBytesToVector(dst);
2871                                     break;
2872                                 }
2873                             }
2874                         }
2875                     }
2876 
2877                     // Separate-CEM mode.
2878                     for (uint32_t cem0 = 0; cem0 < 16; cem0++)
2879                         for (uint32_t cem1 = 0; cem1 < (numPartitions >= 2 ? 16u : 1u); cem1++)
2880                             for (uint32_t cem2 = 0; cem2 < (numPartitions >= 3 ? 16u : 1u); cem2++)
2881                                 for (uint32_t cem3 = 0; cem3 < (numPartitions >= 4 ? 16u : 1u); cem3++)
2882                                 {
2883                                     NormalBlockParams blockParams;
2884                                     blockParams.weightGridWidth          = 4;
2885                                     blockParams.weightGridHeight         = 4;
2886                                     blockParams.isDualPlane              = isDualPlane != 0;
2887                                     blockParams.ccs                      = 0;
2888                                     blockParams.numPartitions            = numPartitions;
2889                                     blockParams.isMultiPartSingleCemMode = false;
2890                                     blockParams.colorEndpointModes[0]    = cem0;
2891                                     blockParams.colorEndpointModes[1]    = cem1;
2892                                     blockParams.colorEndpointModes[2]    = cem2;
2893                                     blockParams.colorEndpointModes[3]    = cem3;
2894                                     blockParams.partitionSeed            = 634;
2895 
2896                                     {
2897                                         const uint32_t minCem =
2898                                             *std::min_element(&blockParams.colorEndpointModes[0],
2899                                                               &blockParams.colorEndpointModes[numPartitions]);
2900                                         const uint32_t maxCem =
2901                                             *std::max_element(&blockParams.colorEndpointModes[0],
2902                                                               &blockParams.colorEndpointModes[numPartitions]);
2903                                         const uint32_t minCemClass = minCem / 4;
2904                                         const uint32_t maxCemClass = maxCem / 4;
2905 
2906                                         if (maxCemClass - minCemClass > 1)
2907                                             continue;
2908                                     }
2909 
2910                                     for (int iseParamsNdx = 0;
2911                                          iseParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates); iseParamsNdx++)
2912                                     {
2913                                         blockParams.weightISEParams = s_weightISEParamsCandidates[iseParamsNdx];
2914                                         if (isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
2915                                         {
2916                                             generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
2917                                                                 generateDefaultISEInputs(blockParams))
2918                                                 .pushBytesToVector(dst);
2919                                             break;
2920                                         }
2921                                     }
2922                                 }
2923                 }
2924 
2925             break;
2926         }
2927 
2928     case BLOCK_TEST_TYPE_PARTITION_SEED:
2929         // Test all partition seeds ("partition pattern indices").
2930         {
2931             for (int numPartitions = 2; numPartitions <= 4; numPartitions++)
2932                 for (uint32_t partitionSeed = 0; partitionSeed < 1 << 10; partitionSeed++)
2933                 {
2934                     NormalBlockParams blockParams;
2935                     blockParams.weightGridWidth          = 4;
2936                     blockParams.weightGridHeight         = 4;
2937                     blockParams.weightISEParams          = ISEParams(ISEMODE_PLAIN_BIT, 2);
2938                     blockParams.isDualPlane              = false;
2939                     blockParams.numPartitions            = numPartitions;
2940                     blockParams.isMultiPartSingleCemMode = true;
2941                     blockParams.colorEndpointModes[0]    = 0;
2942                     blockParams.partitionSeed            = partitionSeed;
2943 
2944                     generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
2945                                         generateDefaultISEInputs(blockParams))
2946                         .pushBytesToVector(dst);
2947                 }
2948 
2949             break;
2950         }
2951 
2952     // \note Fall-through.
2953     case BLOCK_TEST_TYPE_ENDPOINT_VALUE_LDR:
2954     case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15:
2955     case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15:
2956         // For each endpoint mode, for each pair of components in the endpoint value, test 10x10 combinations of values for that pair.
2957         // \note Separate modes for HDR and mode 15 due to different color scales and biases.
2958         {
2959             for (uint32_t cem = 0; cem < 16; cem++)
2960             {
2961                 const bool isHDRCem = cem == 2 || cem == 3 || cem == 7 || cem == 11 || cem == 14 || cem == 15;
2962 
2963                 if ((testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_LDR && isHDRCem) ||
2964                     (testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15 && (!isHDRCem || cem == 15)) ||
2965                     (testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15 && cem != 15))
2966                     continue;
2967 
2968                 NormalBlockParams blockParams;
2969                 blockParams.weightGridWidth       = 3;
2970                 blockParams.weightGridHeight      = 4;
2971                 blockParams.weightISEParams       = ISEParams(ISEMODE_PLAIN_BIT, 2);
2972                 blockParams.isDualPlane           = false;
2973                 blockParams.numPartitions         = 1;
2974                 blockParams.colorEndpointModes[0] = cem;
2975 
2976                 {
2977                     const int numBitsForEndpoints = computeNumBitsForColorEndpoints(blockParams);
2978                     const int numEndpointParts    = computeNumColorEndpointValues(cem);
2979                     const ISEParams endpointISE   = computeMaximumRangeISEParams(numBitsForEndpoints, numEndpointParts);
2980                     const int endpointISERangeMax = computeISERangeMax(endpointISE);
2981 
2982                     for (int endpointPartNdx0 = 0; endpointPartNdx0 < numEndpointParts; endpointPartNdx0++)
2983                         for (int endpointPartNdx1 = endpointPartNdx0 + 1; endpointPartNdx1 < numEndpointParts;
2984                              endpointPartNdx1++)
2985                         {
2986                             NormalBlockISEInputs iseInputs = generateDefaultISEInputs(blockParams);
2987                             const int numEndpointValues    = de::min(10, endpointISERangeMax + 1);
2988 
2989                             for (int endpointValueNdx0 = 0; endpointValueNdx0 < numEndpointValues; endpointValueNdx0++)
2990                                 for (int endpointValueNdx1 = 0; endpointValueNdx1 < numEndpointValues;
2991                                      endpointValueNdx1++)
2992                                 {
2993                                     const int endpointValue0 =
2994                                         endpointValueNdx0 * endpointISERangeMax / (numEndpointValues - 1);
2995                                     const int endpointValue1 =
2996                                         endpointValueNdx1 * endpointISERangeMax / (numEndpointValues - 1);
2997 
2998                                     iseInputs.endpoint.value.plain[endpointPartNdx0] = endpointValue0;
2999                                     iseInputs.endpoint.value.plain[endpointPartNdx1] = endpointValue1;
3000 
3001                                     generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs)
3002                                         .pushBytesToVector(dst);
3003                                 }
3004                         }
3005                 }
3006             }
3007 
3008             break;
3009         }
3010 
3011     case BLOCK_TEST_TYPE_ENDPOINT_ISE:
3012         // Similar to BLOCK_TEST_TYPE_WEIGHT_ISE, see above.
3013         {
3014             static const uint32_t endpointRangeMaximums[] = {5, 9, 11, 19, 23, 39, 47, 79, 95, 159, 191};
3015 
3016             for (int endpointRangeNdx = 0; endpointRangeNdx < DE_LENGTH_OF_ARRAY(endpointRangeMaximums);
3017                  endpointRangeNdx++)
3018             {
3019                 bool validCaseGenerated = false;
3020 
3021                 for (int numPartitions = 1; !validCaseGenerated && numPartitions <= 4; numPartitions++)
3022                     for (int isDual = 0; !validCaseGenerated && isDual <= 1; isDual++)
3023                         for (int weightISEParamsNdx = 0;
3024                              !validCaseGenerated &&
3025                              weightISEParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates);
3026                              weightISEParamsNdx++)
3027                             for (int weightGridWidth = 2; !validCaseGenerated && weightGridWidth <= 12;
3028                                  weightGridWidth++)
3029                                 for (int weightGridHeight = 2; !validCaseGenerated && weightGridHeight <= 12;
3030                                      weightGridHeight++)
3031                                 {
3032                                     NormalBlockParams blockParams;
3033                                     blockParams.weightGridWidth  = weightGridWidth;
3034                                     blockParams.weightGridHeight = weightGridHeight;
3035                                     blockParams.weightISEParams  = s_weightISEParamsCandidates[weightISEParamsNdx];
3036                                     blockParams.isDualPlane      = isDual != 0;
3037                                     blockParams.ccs              = 0;
3038                                     blockParams.numPartitions    = numPartitions;
3039                                     blockParams.isMultiPartSingleCemMode = true;
3040                                     blockParams.colorEndpointModes[0]    = 12;
3041                                     blockParams.partitionSeed            = 634;
3042 
3043                                     if (isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
3044                                     {
3045                                         const ISEParams endpointISEParams = computeMaximumRangeISEParams(
3046                                             computeNumBitsForColorEndpoints(blockParams),
3047                                             computeNumColorEndpointValues(&blockParams.colorEndpointModes[0],
3048                                                                           numPartitions, true));
3049 
3050                                         if (computeISERangeMax(endpointISEParams) ==
3051                                             endpointRangeMaximums[endpointRangeNdx])
3052                                         {
3053                                             validCaseGenerated = true;
3054 
3055                                             const int numColorEndpoints = computeNumColorEndpointValues(
3056                                                 &blockParams.colorEndpointModes[0], numPartitions,
3057                                                 blockParams.isMultiPartSingleCemMode);
3058                                             const int numValuesInISEBlock = endpointISEParams.mode == ISEMODE_TRIT ? 5 :
3059                                                                             endpointISEParams.mode == ISEMODE_QUINT ?
3060                                                                                                                      3 :
3061                                                                                                                      1;
3062 
3063                                             {
3064                                                 const int numColorEndpointValues =
3065                                                     (int)computeISERangeMax(endpointISEParams) + 1;
3066                                                 const int numBlocks =
3067                                                     deDivRoundUp32(numColorEndpointValues, numColorEndpoints);
3068                                                 NormalBlockISEInputs iseInputs = generateDefaultISEInputs(blockParams);
3069                                                 iseInputs.endpoint.isGivenInBlockForm = false;
3070 
3071                                                 for (int offset = 0; offset < numValuesInISEBlock; offset++)
3072                                                     for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
3073                                                     {
3074                                                         for (int endpointNdx = 0; endpointNdx < numColorEndpoints;
3075                                                              endpointNdx++)
3076                                                             iseInputs.endpoint.value.plain[endpointNdx] =
3077                                                                 (blockNdx * numColorEndpoints + endpointNdx + offset) %
3078                                                                 numColorEndpointValues;
3079 
3080                                                         generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
3081                                                                             iseInputs)
3082                                                             .pushBytesToVector(dst);
3083                                                     }
3084                                             }
3085 
3086                                             if (endpointISEParams.mode == ISEMODE_TRIT ||
3087                                                 endpointISEParams.mode == ISEMODE_QUINT)
3088                                             {
3089                                                 NormalBlockISEInputs iseInputs = generateDefaultISEInputs(blockParams);
3090                                                 iseInputs.endpoint.isGivenInBlockForm = true;
3091 
3092                                                 const int numTQValues =
3093                                                     1 << (endpointISEParams.mode == ISEMODE_TRIT ? 8 : 7);
3094                                                 const int numISEBlocksPerBlock =
3095                                                     deDivRoundUp32(numColorEndpoints, numValuesInISEBlock);
3096                                                 const int numBlocks = deDivRoundUp32(numTQValues, numISEBlocksPerBlock);
3097 
3098                                                 for (int offset = 0; offset < numValuesInISEBlock; offset++)
3099                                                     for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
3100                                                     {
3101                                                         for (int iseBlockNdx = 0; iseBlockNdx < numISEBlocksPerBlock;
3102                                                              iseBlockNdx++)
3103                                                         {
3104                                                             for (int i = 0; i < numValuesInISEBlock; i++)
3105                                                                 iseInputs.endpoint.value.block[iseBlockNdx]
3106                                                                     .bitValues[i] = 0;
3107                                                             iseInputs.endpoint.value.block[iseBlockNdx].tOrQValue =
3108                                                                 (blockNdx * numISEBlocksPerBlock + iseBlockNdx +
3109                                                                  offset) %
3110                                                                 numTQValues;
3111                                                         }
3112 
3113                                                         generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
3114                                                                             iseInputs)
3115                                                             .pushBytesToVector(dst);
3116                                                     }
3117                                             }
3118                                         }
3119                                     }
3120                                 }
3121 
3122                 DE_ASSERT(validCaseGenerated);
3123             }
3124 
3125             break;
3126         }
3127 
3128     case BLOCK_TEST_TYPE_CCS:
3129         // For all partition counts, test all values of the CCS (color component selector).
3130         {
3131             for (int numPartitions = 1; numPartitions <= 3; numPartitions++)
3132                 for (uint32_t ccs = 0; ccs < 4; ccs++)
3133                 {
3134                     NormalBlockParams blockParams;
3135                     blockParams.weightGridWidth          = 3;
3136                     blockParams.weightGridHeight         = 3;
3137                     blockParams.weightISEParams          = ISEParams(ISEMODE_PLAIN_BIT, 2);
3138                     blockParams.isDualPlane              = true;
3139                     blockParams.ccs                      = ccs;
3140                     blockParams.numPartitions            = numPartitions;
3141                     blockParams.isMultiPartSingleCemMode = true;
3142                     blockParams.colorEndpointModes[0]    = 8;
3143                     blockParams.partitionSeed            = 634;
3144 
3145                     generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
3146                                         generateDefaultISEInputs(blockParams))
3147                         .pushBytesToVector(dst);
3148                 }
3149 
3150             break;
3151         }
3152 
3153     case BLOCK_TEST_TYPE_RANDOM:
3154         // Generate a number of random (including invalid) blocks.
3155         {
3156             const int numBlocks = 16384;
3157             const uint32_t seed = 1;
3158 
3159             dst.resize(numBlocks * BLOCK_SIZE_BYTES);
3160 
3161             generateRandomBlocks(&dst[0], numBlocks, format, seed);
3162 
3163             break;
3164         }
3165 
3166     default:
3167         DE_ASSERT(false);
3168     }
3169 }
3170 
generateRandomBlocks(uint8_t * dst,size_t numBlocks,CompressedTexFormat format,uint32_t seed)3171 void generateRandomBlocks(uint8_t *dst, size_t numBlocks, CompressedTexFormat format, uint32_t seed)
3172 {
3173     const IVec3 blockSize = getBlockPixelSize(format);
3174     de::Random rnd(seed);
3175     size_t numBlocksGenerated = 0;
3176 
3177     DE_ASSERT(isAstcFormat(format));
3178     DE_ASSERT(blockSize.z() == 1);
3179 
3180     for (numBlocksGenerated = 0; numBlocksGenerated < numBlocks; numBlocksGenerated++)
3181     {
3182         uint8_t *const curBlockPtr = dst + numBlocksGenerated * BLOCK_SIZE_BYTES;
3183 
3184         generateRandomBlock(curBlockPtr, blockSize, rnd);
3185     }
3186 }
3187 
generateRandomValidBlocks(uint8_t * dst,size_t numBlocks,CompressedTexFormat format,TexDecompressionParams::AstcMode mode,uint32_t seed)3188 void generateRandomValidBlocks(uint8_t *dst, size_t numBlocks, CompressedTexFormat format,
3189                                TexDecompressionParams::AstcMode mode, uint32_t seed)
3190 {
3191     const IVec3 blockSize = getBlockPixelSize(format);
3192     de::Random rnd(seed);
3193     size_t numBlocksGenerated = 0;
3194 
3195     DE_ASSERT(isAstcFormat(format));
3196     DE_ASSERT(blockSize.z() == 1);
3197 
3198     for (numBlocksGenerated = 0; numBlocksGenerated < numBlocks; numBlocksGenerated++)
3199     {
3200         uint8_t *const curBlockPtr = dst + numBlocksGenerated * BLOCK_SIZE_BYTES;
3201 
3202         do
3203         {
3204             generateRandomBlock(curBlockPtr, blockSize, rnd);
3205         } while (!isValidBlock(curBlockPtr, format, mode));
3206     }
3207 }
3208 
3209 // Generate a number of trivial blocks to fill unneeded space in a texture.
generateDefaultVoidExtentBlocks(uint8_t * dst,size_t numBlocks)3210 void generateDefaultVoidExtentBlocks(uint8_t *dst, size_t numBlocks)
3211 {
3212     AssignBlock128 block = generateVoidExtentBlock(VoidExtentParams(false, 0, 0, 0, 0));
3213     for (size_t ndx = 0; ndx < numBlocks; ndx++)
3214         block.assignToMemory(&dst[ndx * BLOCK_SIZE_BYTES]);
3215 }
3216 
generateDefaultNormalBlocks(uint8_t * dst,size_t numBlocks,int blockWidth,int blockHeight)3217 void generateDefaultNormalBlocks(uint8_t *dst, size_t numBlocks, int blockWidth, int blockHeight)
3218 {
3219     NormalBlockParams blockParams;
3220 
3221     blockParams.weightGridWidth       = 3;
3222     blockParams.weightGridHeight      = 3;
3223     blockParams.weightISEParams       = ISEParams(ISEMODE_PLAIN_BIT, 5);
3224     blockParams.isDualPlane           = false;
3225     blockParams.numPartitions         = 1;
3226     blockParams.colorEndpointModes[0] = 8;
3227 
3228     NormalBlockISEInputs iseInputs      = generateDefaultISEInputs(blockParams);
3229     iseInputs.weight.isGivenInBlockForm = false;
3230 
3231     const int numWeights     = computeNumWeights(blockParams);
3232     const int weightRangeMax = computeISERangeMax(blockParams.weightISEParams);
3233 
3234     for (size_t blockNdx = 0; blockNdx < numBlocks; blockNdx++)
3235     {
3236         for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
3237             iseInputs.weight.value.plain[weightNdx] =
3238                 (uint32_t)((blockNdx * numWeights + weightNdx) * weightRangeMax / (numBlocks * numWeights - 1));
3239 
3240         generateNormalBlock(blockParams, blockWidth, blockHeight, iseInputs)
3241             .assignToMemory(dst + blockNdx * BLOCK_SIZE_BYTES);
3242     }
3243 }
3244 
isValidBlock(const uint8_t * data,CompressedTexFormat format,TexDecompressionParams::AstcMode mode)3245 bool isValidBlock(const uint8_t *data, CompressedTexFormat format, TexDecompressionParams::AstcMode mode)
3246 {
3247     const tcu::IVec3 blockPixelSize = getBlockPixelSize(format);
3248     const bool isSRGB               = isAstcSRGBFormat(format);
3249     const bool isLDR                = isSRGB || mode == TexDecompressionParams::ASTCMODE_LDR;
3250 
3251     // sRGB is not supported in HDR mode
3252     DE_ASSERT(!(mode == TexDecompressionParams::ASTCMODE_HDR && isSRGB));
3253 
3254     union
3255     {
3256         uint8_t sRGB[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4];
3257         float linear[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4];
3258     } tmpBuffer;
3259     const Block128 blockData(data);
3260     const DecompressResult result =
3261         decompressBlock((isSRGB ? (void *)&tmpBuffer.sRGB[0] : (void *)&tmpBuffer.linear[0]), blockData,
3262                         blockPixelSize.x(), blockPixelSize.y(), isSRGB, isLDR);
3263 
3264     return result == DECOMPRESS_RESULT_VALID_BLOCK;
3265 }
3266 
decompress(const PixelBufferAccess & dst,const uint8_t * data,CompressedTexFormat format,TexDecompressionParams::AstcMode mode)3267 void decompress(const PixelBufferAccess &dst, const uint8_t *data, CompressedTexFormat format,
3268                 TexDecompressionParams::AstcMode mode)
3269 {
3270     const bool isSRGBFormat = isAstcSRGBFormat(format);
3271 
3272 #if defined(DE_DEBUG)
3273     const tcu::IVec3 blockPixelSize = getBlockPixelSize(format);
3274 
3275     DE_ASSERT(dst.getWidth() == blockPixelSize.x() && dst.getHeight() == blockPixelSize.y() &&
3276               dst.getDepth() == blockPixelSize.z());
3277     DE_ASSERT(mode == TexDecompressionParams::ASTCMODE_LDR || mode == TexDecompressionParams::ASTCMODE_HDR);
3278 #endif
3279 
3280     // sRGB is not supported in HDR mode
3281     DE_ASSERT(!(mode == TexDecompressionParams::ASTCMODE_HDR && isSRGBFormat));
3282 
3283     decompress(dst, data, isSRGBFormat, isSRGBFormat || mode == TexDecompressionParams::ASTCMODE_LDR);
3284 }
3285 
getBlockTestTypeName(BlockTestType testType)3286 const char *getBlockTestTypeName(BlockTestType testType)
3287 {
3288     switch (testType)
3289     {
3290     case BLOCK_TEST_TYPE_VOID_EXTENT_LDR:
3291         return "void_extent_ldr";
3292     case BLOCK_TEST_TYPE_VOID_EXTENT_HDR:
3293         return "void_extent_hdr";
3294     case BLOCK_TEST_TYPE_WEIGHT_GRID:
3295         return "weight_grid";
3296     case BLOCK_TEST_TYPE_WEIGHT_ISE:
3297         return "weight_ise";
3298     case BLOCK_TEST_TYPE_CEMS:
3299         return "color_endpoint_modes";
3300     case BLOCK_TEST_TYPE_PARTITION_SEED:
3301         return "partition_pattern_index";
3302     case BLOCK_TEST_TYPE_ENDPOINT_VALUE_LDR:
3303         return "endpoint_value_ldr";
3304     case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15:
3305         return "endpoint_value_hdr_cem_not_15";
3306     case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15:
3307         return "endpoint_value_hdr_cem_15";
3308     case BLOCK_TEST_TYPE_ENDPOINT_ISE:
3309         return "endpoint_ise";
3310     case BLOCK_TEST_TYPE_CCS:
3311         return "color_component_selector";
3312     case BLOCK_TEST_TYPE_RANDOM:
3313         return "random";
3314     default:
3315         DE_ASSERT(false);
3316         return DE_NULL;
3317     }
3318 }
3319 
getBlockTestTypeDescription(BlockTestType testType)3320 const char *getBlockTestTypeDescription(BlockTestType testType)
3321 {
3322     switch (testType)
3323     {
3324     case BLOCK_TEST_TYPE_VOID_EXTENT_LDR:
3325         return "Test void extent block, LDR mode";
3326     case BLOCK_TEST_TYPE_VOID_EXTENT_HDR:
3327         return "Test void extent block, HDR mode";
3328     case BLOCK_TEST_TYPE_WEIGHT_GRID:
3329         return "Test combinations of plane count, weight integer sequence encoding parameters, and weight grid size";
3330     case BLOCK_TEST_TYPE_WEIGHT_ISE:
3331         return "Test different integer sequence encoding block values for weight grid";
3332     case BLOCK_TEST_TYPE_CEMS:
3333         return "Test different color endpoint mode combinations, combined with different plane and partition counts";
3334     case BLOCK_TEST_TYPE_PARTITION_SEED:
3335         return "Test different partition pattern indices";
3336     case BLOCK_TEST_TYPE_ENDPOINT_VALUE_LDR:
3337         return "Test various combinations of each pair of color endpoint values, for each LDR color endpoint mode";
3338     case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15:
3339         return "Test various combinations of each pair of color endpoint values, for each HDR color endpoint mode "
3340                "other than mode 15";
3341     case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15:
3342         return "Test various combinations of each pair of color endpoint values, HDR color endpoint mode 15";
3343     case BLOCK_TEST_TYPE_ENDPOINT_ISE:
3344         return "Test different integer sequence encoding block values for color endpoints";
3345     case BLOCK_TEST_TYPE_CCS:
3346         return "Test color component selector, for different partition counts";
3347     case BLOCK_TEST_TYPE_RANDOM:
3348         return "Random block test";
3349     default:
3350         DE_ASSERT(false);
3351         return DE_NULL;
3352     }
3353 }
3354 
isBlockTestTypeHDROnly(BlockTestType testType)3355 bool isBlockTestTypeHDROnly(BlockTestType testType)
3356 {
3357     return testType == BLOCK_TEST_TYPE_VOID_EXTENT_HDR || testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15 ||
3358            testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15;
3359 }
3360 
getBlockTestTypeColorScale(BlockTestType testType)3361 Vec4 getBlockTestTypeColorScale(BlockTestType testType)
3362 {
3363     switch (testType)
3364     {
3365     case tcu::astc::BLOCK_TEST_TYPE_VOID_EXTENT_HDR:
3366         return Vec4(0.5f / 65504.0f);
3367     case tcu::astc::BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15:
3368         return Vec4(1.0f / 65504.0f, 1.0f / 65504.0f, 1.0f / 65504.0f, 1.0f);
3369     case tcu::astc::BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15:
3370         return Vec4(1.0f / 65504.0f);
3371     default:
3372         return Vec4(1.0f);
3373     }
3374 }
3375 
getBlockTestTypeColorBias(BlockTestType testType)3376 Vec4 getBlockTestTypeColorBias(BlockTestType testType)
3377 {
3378     switch (testType)
3379     {
3380     case tcu::astc::BLOCK_TEST_TYPE_VOID_EXTENT_HDR:
3381         return Vec4(0.5f);
3382     default:
3383         return Vec4(0.0f);
3384     }
3385 }
3386 
3387 } // namespace astc
3388 } // namespace tcu
3389