1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program Tester Core
3 * ----------------------------------------
4 *
5 * Copyright 2016 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief ASTC Utilities.
22 *//*--------------------------------------------------------------------*/
23
24 #include "tcuAstcUtil.hpp"
25 #include "deFloat16.h"
26 #include "deRandom.hpp"
27 #include "deMeta.hpp"
28
29 #include <algorithm>
30
31 namespace tcu
32 {
33 namespace astc
34 {
35
36 using std::vector;
37
38 namespace
39 {
40
41 // Common utilities
42
43 enum
44 {
45 MAX_BLOCK_WIDTH = 12,
46 MAX_BLOCK_HEIGHT = 12
47 };
48
getBit(uint32_t src,int ndx)49 inline uint32_t getBit(uint32_t src, int ndx)
50 {
51 DE_ASSERT(de::inBounds(ndx, 0, 32));
52 return (src >> ndx) & 1;
53 }
54
getBits(uint32_t src,int low,int high)55 inline uint32_t getBits(uint32_t src, int low, int high)
56 {
57 const int numBits = (high - low) + 1;
58
59 DE_ASSERT(de::inRange(numBits, 1, 32));
60
61 if (numBits < 32)
62 return (uint32_t)((src >> low) & ((1u << numBits) - 1));
63 else
64 return (uint32_t)((src >> low) & 0xFFFFFFFFu);
65 }
66
isBitSet(uint32_t src,int ndx)67 inline bool isBitSet(uint32_t src, int ndx)
68 {
69 return getBit(src, ndx) != 0;
70 }
71
reverseBits(uint32_t src,int numBits)72 inline uint32_t reverseBits(uint32_t src, int numBits)
73 {
74 DE_ASSERT(de::inRange(numBits, 0, 32));
75 uint32_t result = 0;
76 for (int i = 0; i < numBits; i++)
77 result |= ((src >> i) & 1) << (numBits - 1 - i);
78 return result;
79 }
80
bitReplicationScale(uint32_t src,int numSrcBits,int numDstBits)81 inline uint32_t bitReplicationScale(uint32_t src, int numSrcBits, int numDstBits)
82 {
83 DE_ASSERT(numSrcBits <= numDstBits);
84 DE_ASSERT((src & ((1 << numSrcBits) - 1)) == src);
85 uint32_t dst = 0;
86 for (int shift = numDstBits - numSrcBits; shift > -numSrcBits; shift -= numSrcBits)
87 dst |= shift >= 0 ? src << shift : src >> -shift;
88 return dst;
89 }
90
signExtend(int32_t src,int numSrcBits)91 inline int32_t signExtend(int32_t src, int numSrcBits)
92 {
93 DE_ASSERT(de::inRange(numSrcBits, 2, 31));
94 const bool negative = (src & (1 << (numSrcBits - 1))) != 0;
95 return src | (negative ? ~((1 << numSrcBits) - 1) : 0);
96 }
97
isFloat16InfOrNan(deFloat16 v)98 inline bool isFloat16InfOrNan(deFloat16 v)
99 {
100 return getBits(v, 10, 14) == 31;
101 }
102
103 enum ISEMode
104 {
105 ISEMODE_TRIT = 0,
106 ISEMODE_QUINT,
107 ISEMODE_PLAIN_BIT,
108
109 ISEMODE_LAST
110 };
111
112 struct ISEParams
113 {
114 ISEMode mode;
115 int numBits;
116
ISEParamstcu::astc::__anone0fc7f7b0111::ISEParams117 ISEParams(ISEMode mode_, int numBits_) : mode(mode_), numBits(numBits_)
118 {
119 }
120 };
121
computeNumRequiredBits(const ISEParams & iseParams,int numValues)122 inline int computeNumRequiredBits(const ISEParams &iseParams, int numValues)
123 {
124 switch (iseParams.mode)
125 {
126 case ISEMODE_TRIT:
127 return deDivRoundUp32(numValues * 8, 5) + numValues * iseParams.numBits;
128 case ISEMODE_QUINT:
129 return deDivRoundUp32(numValues * 7, 3) + numValues * iseParams.numBits;
130 case ISEMODE_PLAIN_BIT:
131 return numValues * iseParams.numBits;
132 default:
133 DE_ASSERT(false);
134 return -1;
135 }
136 }
137
computeMaximumRangeISEParams(int numAvailableBits,int numValuesInSequence)138 ISEParams computeMaximumRangeISEParams(int numAvailableBits, int numValuesInSequence)
139 {
140 int curBitsForTritMode = 6;
141 int curBitsForQuintMode = 5;
142 int curBitsForPlainBitMode = 8;
143
144 while (true)
145 {
146 DE_ASSERT(curBitsForTritMode > 0 || curBitsForQuintMode > 0 || curBitsForPlainBitMode > 0);
147
148 const int tritRange = curBitsForTritMode > 0 ? (3 << curBitsForTritMode) - 1 : -1;
149 const int quintRange = curBitsForQuintMode > 0 ? (5 << curBitsForQuintMode) - 1 : -1;
150 const int plainBitRange = curBitsForPlainBitMode > 0 ? (1 << curBitsForPlainBitMode) - 1 : -1;
151 const int maxRange = de::max(de::max(tritRange, quintRange), plainBitRange);
152
153 if (maxRange == tritRange)
154 {
155 const ISEParams params(ISEMODE_TRIT, curBitsForTritMode);
156 if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
157 return ISEParams(ISEMODE_TRIT, curBitsForTritMode);
158 curBitsForTritMode--;
159 }
160 else if (maxRange == quintRange)
161 {
162 const ISEParams params(ISEMODE_QUINT, curBitsForQuintMode);
163 if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
164 return ISEParams(ISEMODE_QUINT, curBitsForQuintMode);
165 curBitsForQuintMode--;
166 }
167 else
168 {
169 const ISEParams params(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
170 DE_ASSERT(maxRange == plainBitRange);
171 if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
172 return ISEParams(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
173 curBitsForPlainBitMode--;
174 }
175 }
176 }
177
computeNumColorEndpointValues(uint32_t endpointMode)178 inline int computeNumColorEndpointValues(uint32_t endpointMode)
179 {
180 DE_ASSERT(endpointMode < 16);
181 return (endpointMode / 4 + 1) * 2;
182 }
183
184 // Decompression utilities
185
186 enum DecompressResult
187 {
188 DECOMPRESS_RESULT_VALID_BLOCK = 0, //!< Decompressed valid block
189 DECOMPRESS_RESULT_ERROR, //!< Encountered error while decompressing, error color written
190
191 DECOMPRESS_RESULT_LAST
192 };
193
194 // A helper for getting bits from a 128-bit block.
195 class Block128
196 {
197 private:
198 typedef uint64_t Word;
199
200 enum
201 {
202 WORD_BYTES = sizeof(Word),
203 WORD_BITS = 8 * WORD_BYTES,
204 NUM_WORDS = 128 / WORD_BITS
205 };
206
207 DE_STATIC_ASSERT(128 % WORD_BITS == 0);
208
209 public:
Block128(const uint8_t * src)210 Block128(const uint8_t *src)
211 {
212 for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
213 {
214 m_words[wordNdx] = 0;
215 for (int byteNdx = 0; byteNdx < WORD_BYTES; byteNdx++)
216 m_words[wordNdx] |= (Word)src[wordNdx * WORD_BYTES + byteNdx] << (8 * byteNdx);
217 }
218 }
219
getBit(int ndx) const220 uint32_t getBit(int ndx) const
221 {
222 DE_ASSERT(de::inBounds(ndx, 0, 128));
223 return (m_words[ndx / WORD_BITS] >> (ndx % WORD_BITS)) & 1;
224 }
225
getBits(int low,int high) const226 uint32_t getBits(int low, int high) const
227 {
228 DE_ASSERT(de::inBounds(low, 0, 128));
229 DE_ASSERT(de::inBounds(high, 0, 128));
230 DE_ASSERT(de::inRange(high - low + 1, 0, 32));
231
232 if (high - low + 1 == 0)
233 return 0;
234
235 const int word0Ndx = low / WORD_BITS;
236 const int word1Ndx = high / WORD_BITS;
237
238 // \note "foo << bar << 1" done instead of "foo << (bar+1)" to avoid overflow, i.e. shift amount being too big.
239
240 if (word0Ndx == word1Ndx)
241 return (uint32_t)((m_words[word0Ndx] & ((((Word)1 << high % WORD_BITS << 1) - 1))) >>
242 ((Word)low % WORD_BITS));
243 else
244 {
245 DE_ASSERT(word1Ndx == word0Ndx + 1);
246
247 return (uint32_t)(m_words[word0Ndx] >> (low % WORD_BITS)) |
248 (uint32_t)((m_words[word1Ndx] & (((Word)1 << high % WORD_BITS << 1) - 1))
249 << (high - low - high % WORD_BITS));
250 }
251 }
252
isBitSet(int ndx) const253 bool isBitSet(int ndx) const
254 {
255 DE_ASSERT(de::inBounds(ndx, 0, 128));
256 return getBit(ndx) != 0;
257 }
258
259 private:
260 Word m_words[NUM_WORDS];
261 };
262
263 // A helper for sequential access into a Block128.
264 class BitAccessStream
265 {
266 public:
BitAccessStream(const Block128 & src,int startNdxInSrc,int length,bool forward)267 BitAccessStream(const Block128 &src, int startNdxInSrc, int length, bool forward)
268 : m_src(src)
269 , m_startNdxInSrc(startNdxInSrc)
270 , m_length(length)
271 , m_forward(forward)
272 , m_ndx(0)
273 {
274 }
275
276 // Get the next num bits. Bits at positions greater than or equal to m_length are zeros.
getNext(int num)277 uint32_t getNext(int num)
278 {
279 if (num == 0 || m_ndx >= m_length)
280 return 0;
281
282 const int end = m_ndx + num;
283 const int numBitsFromSrc = de::max(0, de::min(m_length, end) - m_ndx);
284 const int low = m_ndx;
285 const int high = m_ndx + numBitsFromSrc - 1;
286
287 m_ndx += num;
288
289 return m_forward ? m_src.getBits(m_startNdxInSrc + low, m_startNdxInSrc + high) :
290 reverseBits(m_src.getBits(m_startNdxInSrc - high, m_startNdxInSrc - low), numBitsFromSrc);
291 }
292
293 private:
294 const Block128 &m_src;
295 const int m_startNdxInSrc;
296 const int m_length;
297 const bool m_forward;
298
299 int m_ndx;
300 };
301
302 struct ISEDecodedResult
303 {
304 uint32_t m;
305 uint32_t tq; //!< Trit or quint value, depending on ISE mode.
306 uint32_t v;
307 };
308
309 // Data from an ASTC block's "block mode" part (i.e. bits [0,10]).
310 struct ASTCBlockMode
311 {
312 bool isError;
313 // \note Following fields only relevant if !isError.
314 bool isVoidExtent;
315 // \note Following fields only relevant if !isVoidExtent.
316 bool isDualPlane;
317 int weightGridWidth;
318 int weightGridHeight;
319 ISEParams weightISEParams;
320
ASTCBlockModetcu::astc::__anone0fc7f7b0111::ASTCBlockMode321 ASTCBlockMode(void)
322 : isError(true)
323 , isVoidExtent(true)
324 , isDualPlane(true)
325 , weightGridWidth(-1)
326 , weightGridHeight(-1)
327 , weightISEParams(ISEMODE_LAST, -1)
328 {
329 }
330 };
331
computeNumWeights(const ASTCBlockMode & mode)332 inline int computeNumWeights(const ASTCBlockMode &mode)
333 {
334 return mode.weightGridWidth * mode.weightGridHeight * (mode.isDualPlane ? 2 : 1);
335 }
336
337 struct ColorEndpointPair
338 {
339 UVec4 e0;
340 UVec4 e1;
341 };
342
343 struct TexelWeightPair
344 {
345 uint32_t w[2];
346 };
347
getASTCBlockMode(uint32_t blockModeData)348 ASTCBlockMode getASTCBlockMode(uint32_t blockModeData)
349 {
350 ASTCBlockMode blockMode;
351 blockMode.isError = true; // \note Set to false later, if not error.
352
353 blockMode.isVoidExtent = getBits(blockModeData, 0, 8) == 0x1fc;
354
355 if (!blockMode.isVoidExtent)
356 {
357 if ((getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 6, 8) == 7) ||
358 getBits(blockModeData, 0, 3) == 0)
359 return blockMode; // Invalid ("reserved").
360
361 uint32_t r = (uint32_t)-1; // \note Set in the following branches.
362
363 if (getBits(blockModeData, 0, 1) == 0)
364 {
365 const uint32_t r0 = getBit(blockModeData, 4);
366 const uint32_t r1 = getBit(blockModeData, 2);
367 const uint32_t r2 = getBit(blockModeData, 3);
368 const uint32_t i78 = getBits(blockModeData, 7, 8);
369
370 r = (r2 << 2) | (r1 << 1) | (r0 << 0);
371
372 if (i78 == 3)
373 {
374 const bool i5 = isBitSet(blockModeData, 5);
375 blockMode.weightGridWidth = i5 ? 10 : 6;
376 blockMode.weightGridHeight = i5 ? 6 : 10;
377 }
378 else
379 {
380 const uint32_t a = getBits(blockModeData, 5, 6);
381 switch (i78)
382 {
383 case 0:
384 blockMode.weightGridWidth = 12;
385 blockMode.weightGridHeight = a + 2;
386 break;
387 case 1:
388 blockMode.weightGridWidth = a + 2;
389 blockMode.weightGridHeight = 12;
390 break;
391 case 2:
392 blockMode.weightGridWidth = a + 6;
393 blockMode.weightGridHeight = getBits(blockModeData, 9, 10) + 6;
394 break;
395 default:
396 DE_ASSERT(false);
397 }
398 }
399 }
400 else
401 {
402 const uint32_t r0 = getBit(blockModeData, 4);
403 const uint32_t r1 = getBit(blockModeData, 0);
404 const uint32_t r2 = getBit(blockModeData, 1);
405 const uint32_t i23 = getBits(blockModeData, 2, 3);
406 const uint32_t a = getBits(blockModeData, 5, 6);
407
408 r = (r2 << 2) | (r1 << 1) | (r0 << 0);
409
410 if (i23 == 3)
411 {
412 const uint32_t b = getBit(blockModeData, 7);
413 const bool i8 = isBitSet(blockModeData, 8);
414 blockMode.weightGridWidth = i8 ? b + 2 : a + 2;
415 blockMode.weightGridHeight = i8 ? a + 2 : b + 6;
416 }
417 else
418 {
419 const uint32_t b = getBits(blockModeData, 7, 8);
420
421 switch (i23)
422 {
423 case 0:
424 blockMode.weightGridWidth = b + 4;
425 blockMode.weightGridHeight = a + 2;
426 break;
427 case 1:
428 blockMode.weightGridWidth = b + 8;
429 blockMode.weightGridHeight = a + 2;
430 break;
431 case 2:
432 blockMode.weightGridWidth = a + 2;
433 blockMode.weightGridHeight = b + 8;
434 break;
435 default:
436 DE_ASSERT(false);
437 }
438 }
439 }
440
441 const bool zeroDH = getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 7, 8) == 2;
442 const bool h = zeroDH ? 0 : isBitSet(blockModeData, 9);
443 blockMode.isDualPlane = zeroDH ? 0 : isBitSet(blockModeData, 10);
444
445 {
446 ISEMode &m = blockMode.weightISEParams.mode;
447 int &b = blockMode.weightISEParams.numBits;
448 m = ISEMODE_PLAIN_BIT;
449 b = 0;
450
451 if (h)
452 {
453 switch (r)
454 {
455 case 2:
456 m = ISEMODE_QUINT;
457 b = 1;
458 break;
459 case 3:
460 m = ISEMODE_TRIT;
461 b = 2;
462 break;
463 case 4:
464 b = 4;
465 break;
466 case 5:
467 m = ISEMODE_QUINT;
468 b = 2;
469 break;
470 case 6:
471 m = ISEMODE_TRIT;
472 b = 3;
473 break;
474 case 7:
475 b = 5;
476 break;
477 default:
478 DE_ASSERT(false);
479 }
480 }
481 else
482 {
483 switch (r)
484 {
485 case 2:
486 b = 1;
487 break;
488 case 3:
489 m = ISEMODE_TRIT;
490 break;
491 case 4:
492 b = 2;
493 break;
494 case 5:
495 m = ISEMODE_QUINT;
496 break;
497 case 6:
498 m = ISEMODE_TRIT;
499 b = 1;
500 break;
501 case 7:
502 b = 3;
503 break;
504 default:
505 DE_ASSERT(false);
506 }
507 }
508 }
509 }
510
511 blockMode.isError = false;
512 return blockMode;
513 }
514
setASTCErrorColorBlock(void * dst,int blockWidth,int blockHeight,bool isSRGB)515 inline void setASTCErrorColorBlock(void *dst, int blockWidth, int blockHeight, bool isSRGB)
516 {
517 if (isSRGB)
518 {
519 uint8_t *const dstU = (uint8_t *)dst;
520
521 for (int i = 0; i < blockWidth * blockHeight; i++)
522 {
523 dstU[4 * i + 0] = 0xff;
524 dstU[4 * i + 1] = 0;
525 dstU[4 * i + 2] = 0xff;
526 dstU[4 * i + 3] = 0xff;
527 }
528 }
529 else
530 {
531 float *const dstF = (float *)dst;
532
533 for (int i = 0; i < blockWidth * blockHeight; i++)
534 {
535 dstF[4 * i + 0] = 1.0f;
536 dstF[4 * i + 1] = 0.0f;
537 dstF[4 * i + 2] = 1.0f;
538 dstF[4 * i + 3] = 1.0f;
539 }
540 }
541 }
542
decodeVoidExtentBlock(void * dst,const Block128 & blockData,int blockWidth,int blockHeight,bool isSRGB,bool isLDRMode)543 DecompressResult decodeVoidExtentBlock(void *dst, const Block128 &blockData, int blockWidth, int blockHeight,
544 bool isSRGB, bool isLDRMode)
545 {
546 const uint32_t minSExtent = blockData.getBits(12, 24);
547 const uint32_t maxSExtent = blockData.getBits(25, 37);
548 const uint32_t minTExtent = blockData.getBits(38, 50);
549 const uint32_t maxTExtent = blockData.getBits(51, 63);
550 const bool allExtentsAllOnes =
551 minSExtent == 0x1fff && maxSExtent == 0x1fff && minTExtent == 0x1fff && maxTExtent == 0x1fff;
552 const bool isHDRBlock = blockData.isBitSet(9);
553
554 if ((isLDRMode && isHDRBlock) || (!allExtentsAllOnes && (minSExtent >= maxSExtent || minTExtent >= maxTExtent)))
555 {
556 setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
557 return DECOMPRESS_RESULT_ERROR;
558 }
559
560 const uint32_t rgba[4] = {blockData.getBits(64, 79), blockData.getBits(80, 95), blockData.getBits(96, 111),
561 blockData.getBits(112, 127)};
562
563 if (isSRGB)
564 {
565 uint8_t *const dstU = (uint8_t *)dst;
566 for (int i = 0; i < blockWidth * blockHeight; i++)
567 for (int c = 0; c < 4; c++)
568 dstU[i * 4 + c] = (uint8_t)((rgba[c] & 0xff00) >> 8);
569 }
570 else
571 {
572 float *const dstF = (float *)dst;
573
574 if (isHDRBlock)
575 {
576 for (int c = 0; c < 4; c++)
577 {
578 if (isFloat16InfOrNan((deFloat16)rgba[c]))
579 throw InternalError("Infinity or NaN color component in HDR void extent block in ASTC texture "
580 "(behavior undefined by ASTC specification)");
581 }
582
583 for (int i = 0; i < blockWidth * blockHeight; i++)
584 for (int c = 0; c < 4; c++)
585 dstF[i * 4 + c] = deFloat16To32((deFloat16)rgba[c]);
586 }
587 else
588 {
589 for (int i = 0; i < blockWidth * blockHeight; i++)
590 for (int c = 0; c < 4; c++)
591 dstF[i * 4 + c] = rgba[c] == 65535 ? 1.0f : (float)rgba[c] / 65536.0f;
592 }
593 }
594
595 return DECOMPRESS_RESULT_VALID_BLOCK;
596 }
597
decodeColorEndpointModes(uint32_t * endpointModesDst,const Block128 & blockData,int numPartitions,int extraCemBitsStart)598 void decodeColorEndpointModes(uint32_t *endpointModesDst, const Block128 &blockData, int numPartitions,
599 int extraCemBitsStart)
600 {
601 if (numPartitions == 1)
602 endpointModesDst[0] = blockData.getBits(13, 16);
603 else
604 {
605 const uint32_t highLevelSelector = blockData.getBits(23, 24);
606
607 if (highLevelSelector == 0)
608 {
609 const uint32_t mode = blockData.getBits(25, 28);
610 for (int i = 0; i < numPartitions; i++)
611 endpointModesDst[i] = mode;
612 }
613 else
614 {
615 for (int partNdx = 0; partNdx < numPartitions; partNdx++)
616 {
617 const uint32_t cemClass = highLevelSelector - (blockData.isBitSet(25 + partNdx) ? 0 : 1);
618 const uint32_t lowBit0Ndx = numPartitions + 2 * partNdx;
619 const uint32_t lowBit1Ndx = numPartitions + 2 * partNdx + 1;
620 const uint32_t lowBit0 =
621 blockData.getBit(lowBit0Ndx < 4 ? 25 + lowBit0Ndx : extraCemBitsStart + lowBit0Ndx - 4);
622 const uint32_t lowBit1 =
623 blockData.getBit(lowBit1Ndx < 4 ? 25 + lowBit1Ndx : extraCemBitsStart + lowBit1Ndx - 4);
624
625 endpointModesDst[partNdx] = (cemClass << 2) | (lowBit1 << 1) | lowBit0;
626 }
627 }
628 }
629 }
630
computeNumColorEndpointValues(const uint32_t * endpointModes,int numPartitions)631 int computeNumColorEndpointValues(const uint32_t *endpointModes, int numPartitions)
632 {
633 int result = 0;
634 for (int i = 0; i < numPartitions; i++)
635 result += computeNumColorEndpointValues(endpointModes[i]);
636 return result;
637 }
638
decodeISETritBlock(ISEDecodedResult * dst,int numValues,BitAccessStream & data,int numBits)639 void decodeISETritBlock(ISEDecodedResult *dst, int numValues, BitAccessStream &data, int numBits)
640 {
641 DE_ASSERT(de::inRange(numValues, 1, 5));
642
643 uint32_t m[5];
644
645 m[0] = data.getNext(numBits);
646 uint32_t T01 = data.getNext(2);
647 m[1] = data.getNext(numBits);
648 uint32_t T23 = data.getNext(2);
649 m[2] = data.getNext(numBits);
650 uint32_t T4 = data.getNext(1);
651 m[3] = data.getNext(numBits);
652 uint32_t T56 = data.getNext(2);
653 m[4] = data.getNext(numBits);
654 uint32_t T7 = data.getNext(1);
655
656 switch (numValues)
657 {
658 case 1:
659 T23 = 0;
660 // Fallthrough
661 case 2:
662 T4 = 0;
663 // Fallthrough
664 case 3:
665 T56 = 0;
666 // Fallthrough
667 case 4:
668 T7 = 0;
669 // Fallthrough
670 case 5:
671 break;
672 default:
673 DE_ASSERT(false);
674 }
675
676 const uint32_t T = (T7 << 7) | (T56 << 5) | (T4 << 4) | (T23 << 2) | (T01 << 0);
677
678 static const uint32_t tritsFromT[256][5] = {
679 {0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0}, {0, 1, 0, 0, 0}, {1, 1, 0, 0, 0},
680 {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0}, {0, 2, 0, 0, 0}, {1, 2, 0, 0, 0}, {2, 2, 0, 0, 0}, {2, 0, 2, 0, 0},
681 {0, 2, 2, 0, 0}, {1, 2, 2, 0, 0}, {2, 2, 2, 0, 0}, {2, 0, 2, 0, 0}, {0, 0, 1, 0, 0}, {1, 0, 1, 0, 0},
682 {2, 0, 1, 0, 0}, {0, 1, 2, 0, 0}, {0, 1, 1, 0, 0}, {1, 1, 1, 0, 0}, {2, 1, 1, 0, 0}, {1, 1, 2, 0, 0},
683 {0, 2, 1, 0, 0}, {1, 2, 1, 0, 0}, {2, 2, 1, 0, 0}, {2, 1, 2, 0, 0}, {0, 0, 0, 2, 2}, {1, 0, 0, 2, 2},
684 {2, 0, 0, 2, 2}, {0, 0, 2, 2, 2}, {0, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {2, 0, 0, 1, 0}, {0, 0, 2, 1, 0},
685 {0, 1, 0, 1, 0}, {1, 1, 0, 1, 0}, {2, 1, 0, 1, 0}, {1, 0, 2, 1, 0}, {0, 2, 0, 1, 0}, {1, 2, 0, 1, 0},
686 {2, 2, 0, 1, 0}, {2, 0, 2, 1, 0}, {0, 2, 2, 1, 0}, {1, 2, 2, 1, 0}, {2, 2, 2, 1, 0}, {2, 0, 2, 1, 0},
687 {0, 0, 1, 1, 0}, {1, 0, 1, 1, 0}, {2, 0, 1, 1, 0}, {0, 1, 2, 1, 0}, {0, 1, 1, 1, 0}, {1, 1, 1, 1, 0},
688 {2, 1, 1, 1, 0}, {1, 1, 2, 1, 0}, {0, 2, 1, 1, 0}, {1, 2, 1, 1, 0}, {2, 2, 1, 1, 0}, {2, 1, 2, 1, 0},
689 {0, 1, 0, 2, 2}, {1, 1, 0, 2, 2}, {2, 1, 0, 2, 2}, {1, 0, 2, 2, 2}, {0, 0, 0, 2, 0}, {1, 0, 0, 2, 0},
690 {2, 0, 0, 2, 0}, {0, 0, 2, 2, 0}, {0, 1, 0, 2, 0}, {1, 1, 0, 2, 0}, {2, 1, 0, 2, 0}, {1, 0, 2, 2, 0},
691 {0, 2, 0, 2, 0}, {1, 2, 0, 2, 0}, {2, 2, 0, 2, 0}, {2, 0, 2, 2, 0}, {0, 2, 2, 2, 0}, {1, 2, 2, 2, 0},
692 {2, 2, 2, 2, 0}, {2, 0, 2, 2, 0}, {0, 0, 1, 2, 0}, {1, 0, 1, 2, 0}, {2, 0, 1, 2, 0}, {0, 1, 2, 2, 0},
693 {0, 1, 1, 2, 0}, {1, 1, 1, 2, 0}, {2, 1, 1, 2, 0}, {1, 1, 2, 2, 0}, {0, 2, 1, 2, 0}, {1, 2, 1, 2, 0},
694 {2, 2, 1, 2, 0}, {2, 1, 2, 2, 0}, {0, 2, 0, 2, 2}, {1, 2, 0, 2, 2}, {2, 2, 0, 2, 2}, {2, 0, 2, 2, 2},
695 {0, 0, 0, 0, 2}, {1, 0, 0, 0, 2}, {2, 0, 0, 0, 2}, {0, 0, 2, 0, 2}, {0, 1, 0, 0, 2}, {1, 1, 0, 0, 2},
696 {2, 1, 0, 0, 2}, {1, 0, 2, 0, 2}, {0, 2, 0, 0, 2}, {1, 2, 0, 0, 2}, {2, 2, 0, 0, 2}, {2, 0, 2, 0, 2},
697 {0, 2, 2, 0, 2}, {1, 2, 2, 0, 2}, {2, 2, 2, 0, 2}, {2, 0, 2, 0, 2}, {0, 0, 1, 0, 2}, {1, 0, 1, 0, 2},
698 {2, 0, 1, 0, 2}, {0, 1, 2, 0, 2}, {0, 1, 1, 0, 2}, {1, 1, 1, 0, 2}, {2, 1, 1, 0, 2}, {1, 1, 2, 0, 2},
699 {0, 2, 1, 0, 2}, {1, 2, 1, 0, 2}, {2, 2, 1, 0, 2}, {2, 1, 2, 0, 2}, {0, 2, 2, 2, 2}, {1, 2, 2, 2, 2},
700 {2, 2, 2, 2, 2}, {2, 0, 2, 2, 2}, {0, 0, 0, 0, 1}, {1, 0, 0, 0, 1}, {2, 0, 0, 0, 1}, {0, 0, 2, 0, 1},
701 {0, 1, 0, 0, 1}, {1, 1, 0, 0, 1}, {2, 1, 0, 0, 1}, {1, 0, 2, 0, 1}, {0, 2, 0, 0, 1}, {1, 2, 0, 0, 1},
702 {2, 2, 0, 0, 1}, {2, 0, 2, 0, 1}, {0, 2, 2, 0, 1}, {1, 2, 2, 0, 1}, {2, 2, 2, 0, 1}, {2, 0, 2, 0, 1},
703 {0, 0, 1, 0, 1}, {1, 0, 1, 0, 1}, {2, 0, 1, 0, 1}, {0, 1, 2, 0, 1}, {0, 1, 1, 0, 1}, {1, 1, 1, 0, 1},
704 {2, 1, 1, 0, 1}, {1, 1, 2, 0, 1}, {0, 2, 1, 0, 1}, {1, 2, 1, 0, 1}, {2, 2, 1, 0, 1}, {2, 1, 2, 0, 1},
705 {0, 0, 1, 2, 2}, {1, 0, 1, 2, 2}, {2, 0, 1, 2, 2}, {0, 1, 2, 2, 2}, {0, 0, 0, 1, 1}, {1, 0, 0, 1, 1},
706 {2, 0, 0, 1, 1}, {0, 0, 2, 1, 1}, {0, 1, 0, 1, 1}, {1, 1, 0, 1, 1}, {2, 1, 0, 1, 1}, {1, 0, 2, 1, 1},
707 {0, 2, 0, 1, 1}, {1, 2, 0, 1, 1}, {2, 2, 0, 1, 1}, {2, 0, 2, 1, 1}, {0, 2, 2, 1, 1}, {1, 2, 2, 1, 1},
708 {2, 2, 2, 1, 1}, {2, 0, 2, 1, 1}, {0, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}, {0, 1, 2, 1, 1},
709 {0, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {1, 1, 2, 1, 1}, {0, 2, 1, 1, 1}, {1, 2, 1, 1, 1},
710 {2, 2, 1, 1, 1}, {2, 1, 2, 1, 1}, {0, 1, 1, 2, 2}, {1, 1, 1, 2, 2}, {2, 1, 1, 2, 2}, {1, 1, 2, 2, 2},
711 {0, 0, 0, 2, 1}, {1, 0, 0, 2, 1}, {2, 0, 0, 2, 1}, {0, 0, 2, 2, 1}, {0, 1, 0, 2, 1}, {1, 1, 0, 2, 1},
712 {2, 1, 0, 2, 1}, {1, 0, 2, 2, 1}, {0, 2, 0, 2, 1}, {1, 2, 0, 2, 1}, {2, 2, 0, 2, 1}, {2, 0, 2, 2, 1},
713 {0, 2, 2, 2, 1}, {1, 2, 2, 2, 1}, {2, 2, 2, 2, 1}, {2, 0, 2, 2, 1}, {0, 0, 1, 2, 1}, {1, 0, 1, 2, 1},
714 {2, 0, 1, 2, 1}, {0, 1, 2, 2, 1}, {0, 1, 1, 2, 1}, {1, 1, 1, 2, 1}, {2, 1, 1, 2, 1}, {1, 1, 2, 2, 1},
715 {0, 2, 1, 2, 1}, {1, 2, 1, 2, 1}, {2, 2, 1, 2, 1}, {2, 1, 2, 2, 1}, {0, 2, 1, 2, 2}, {1, 2, 1, 2, 2},
716 {2, 2, 1, 2, 2}, {2, 1, 2, 2, 2}, {0, 0, 0, 1, 2}, {1, 0, 0, 1, 2}, {2, 0, 0, 1, 2}, {0, 0, 2, 1, 2},
717 {0, 1, 0, 1, 2}, {1, 1, 0, 1, 2}, {2, 1, 0, 1, 2}, {1, 0, 2, 1, 2}, {0, 2, 0, 1, 2}, {1, 2, 0, 1, 2},
718 {2, 2, 0, 1, 2}, {2, 0, 2, 1, 2}, {0, 2, 2, 1, 2}, {1, 2, 2, 1, 2}, {2, 2, 2, 1, 2}, {2, 0, 2, 1, 2},
719 {0, 0, 1, 1, 2}, {1, 0, 1, 1, 2}, {2, 0, 1, 1, 2}, {0, 1, 2, 1, 2}, {0, 1, 1, 1, 2}, {1, 1, 1, 1, 2},
720 {2, 1, 1, 1, 2}, {1, 1, 2, 1, 2}, {0, 2, 1, 1, 2}, {1, 2, 1, 1, 2}, {2, 2, 1, 1, 2}, {2, 1, 2, 1, 2},
721 {0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 1, 2, 2, 2}};
722
723 const uint32_t(&trits)[5] = tritsFromT[T];
724
725 for (int i = 0; i < numValues; i++)
726 {
727 dst[i].m = m[i];
728 dst[i].tq = trits[i];
729 dst[i].v = (trits[i] << numBits) + m[i];
730 }
731 }
732
decodeISEQuintBlock(ISEDecodedResult * dst,int numValues,BitAccessStream & data,int numBits)733 void decodeISEQuintBlock(ISEDecodedResult *dst, int numValues, BitAccessStream &data, int numBits)
734 {
735 DE_ASSERT(de::inRange(numValues, 1, 3));
736
737 uint32_t m[3];
738
739 m[0] = data.getNext(numBits);
740 uint32_t Q012 = data.getNext(3);
741 m[1] = data.getNext(numBits);
742 uint32_t Q34 = data.getNext(2);
743 m[2] = data.getNext(numBits);
744 uint32_t Q56 = data.getNext(2);
745
746 switch (numValues)
747 {
748 case 1:
749 Q34 = 0;
750 // Fallthrough
751 case 2:
752 Q56 = 0;
753 // Fallthrough
754 case 3:
755 break;
756 default:
757 DE_ASSERT(false);
758 }
759
760 const uint32_t Q = (Q56 << 5) | (Q34 << 3) | (Q012 << 0);
761
762 static const uint32_t quintsFromQ[256][3] = {
763 {0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0}, {4, 0, 0}, {0, 4, 0}, {4, 4, 0}, {4, 4, 4}, {0, 1, 0}, {1, 1, 0},
764 {2, 1, 0}, {3, 1, 0}, {4, 1, 0}, {1, 4, 0}, {4, 4, 1}, {4, 4, 4}, {0, 2, 0}, {1, 2, 0}, {2, 2, 0}, {3, 2, 0},
765 {4, 2, 0}, {2, 4, 0}, {4, 4, 2}, {4, 4, 4}, {0, 3, 0}, {1, 3, 0}, {2, 3, 0}, {3, 3, 0}, {4, 3, 0}, {3, 4, 0},
766 {4, 4, 3}, {4, 4, 4}, {0, 0, 1}, {1, 0, 1}, {2, 0, 1}, {3, 0, 1}, {4, 0, 1}, {0, 4, 1}, {4, 0, 4}, {0, 4, 4},
767 {0, 1, 1}, {1, 1, 1}, {2, 1, 1}, {3, 1, 1}, {4, 1, 1}, {1, 4, 1}, {4, 1, 4}, {1, 4, 4}, {0, 2, 1}, {1, 2, 1},
768 {2, 2, 1}, {3, 2, 1}, {4, 2, 1}, {2, 4, 1}, {4, 2, 4}, {2, 4, 4}, {0, 3, 1}, {1, 3, 1}, {2, 3, 1}, {3, 3, 1},
769 {4, 3, 1}, {3, 4, 1}, {4, 3, 4}, {3, 4, 4}, {0, 0, 2}, {1, 0, 2}, {2, 0, 2}, {3, 0, 2}, {4, 0, 2}, {0, 4, 2},
770 {2, 0, 4}, {3, 0, 4}, {0, 1, 2}, {1, 1, 2}, {2, 1, 2}, {3, 1, 2}, {4, 1, 2}, {1, 4, 2}, {2, 1, 4}, {3, 1, 4},
771 {0, 2, 2}, {1, 2, 2}, {2, 2, 2}, {3, 2, 2}, {4, 2, 2}, {2, 4, 2}, {2, 2, 4}, {3, 2, 4}, {0, 3, 2}, {1, 3, 2},
772 {2, 3, 2}, {3, 3, 2}, {4, 3, 2}, {3, 4, 2}, {2, 3, 4}, {3, 3, 4}, {0, 0, 3}, {1, 0, 3}, {2, 0, 3}, {3, 0, 3},
773 {4, 0, 3}, {0, 4, 3}, {0, 0, 4}, {1, 0, 4}, {0, 1, 3}, {1, 1, 3}, {2, 1, 3}, {3, 1, 3}, {4, 1, 3}, {1, 4, 3},
774 {0, 1, 4}, {1, 1, 4}, {0, 2, 3}, {1, 2, 3}, {2, 2, 3}, {3, 2, 3}, {4, 2, 3}, {2, 4, 3}, {0, 2, 4}, {1, 2, 4},
775 {0, 3, 3}, {1, 3, 3}, {2, 3, 3}, {3, 3, 3}, {4, 3, 3}, {3, 4, 3}, {0, 3, 4}, {1, 3, 4}};
776
777 const uint32_t(&quints)[3] = quintsFromQ[Q];
778
779 for (int i = 0; i < numValues; i++)
780 {
781 dst[i].m = m[i];
782 dst[i].tq = quints[i];
783 dst[i].v = (quints[i] << numBits) + m[i];
784 }
785 }
786
decodeISEBitBlock(ISEDecodedResult * dst,BitAccessStream & data,int numBits)787 inline void decodeISEBitBlock(ISEDecodedResult *dst, BitAccessStream &data, int numBits)
788 {
789 dst[0].m = data.getNext(numBits);
790 dst[0].v = dst[0].m;
791 }
792
decodeISE(ISEDecodedResult * dst,int numValues,BitAccessStream & data,const ISEParams & params)793 void decodeISE(ISEDecodedResult *dst, int numValues, BitAccessStream &data, const ISEParams ¶ms)
794 {
795 if (params.mode == ISEMODE_TRIT)
796 {
797 const int numBlocks = deDivRoundUp32(numValues, 5);
798 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
799 {
800 const int numValuesInBlock = blockNdx == numBlocks - 1 ? numValues - 5 * (numBlocks - 1) : 5;
801 decodeISETritBlock(&dst[5 * blockNdx], numValuesInBlock, data, params.numBits);
802 }
803 }
804 else if (params.mode == ISEMODE_QUINT)
805 {
806 const int numBlocks = deDivRoundUp32(numValues, 3);
807 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
808 {
809 const int numValuesInBlock = blockNdx == numBlocks - 1 ? numValues - 3 * (numBlocks - 1) : 3;
810 decodeISEQuintBlock(&dst[3 * blockNdx], numValuesInBlock, data, params.numBits);
811 }
812 }
813 else
814 {
815 DE_ASSERT(params.mode == ISEMODE_PLAIN_BIT);
816 for (int i = 0; i < numValues; i++)
817 decodeISEBitBlock(&dst[i], data, params.numBits);
818 }
819 }
820
unquantizeColorEndpoints(uint32_t * dst,const ISEDecodedResult * iseResults,int numEndpoints,const ISEParams & iseParams)821 void unquantizeColorEndpoints(uint32_t *dst, const ISEDecodedResult *iseResults, int numEndpoints,
822 const ISEParams &iseParams)
823 {
824 if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
825 {
826 const int rangeCase = iseParams.numBits * 2 - (iseParams.mode == ISEMODE_TRIT ? 2 : 1);
827 DE_ASSERT(de::inRange(rangeCase, 0, 10));
828 static const uint32_t Ca[11] = {204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5};
829 const uint32_t C = Ca[rangeCase];
830
831 for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
832 {
833 const uint32_t a = getBit(iseResults[endpointNdx].m, 0);
834 const uint32_t b = getBit(iseResults[endpointNdx].m, 1);
835 const uint32_t c = getBit(iseResults[endpointNdx].m, 2);
836 const uint32_t d = getBit(iseResults[endpointNdx].m, 3);
837 const uint32_t e = getBit(iseResults[endpointNdx].m, 4);
838 const uint32_t f = getBit(iseResults[endpointNdx].m, 5);
839
840 const uint32_t A = a == 0 ? 0 : (1 << 9) - 1;
841 const uint32_t B = rangeCase == 0 ? 0 :
842 rangeCase == 1 ? 0 :
843 rangeCase == 2 ? (b << 8) | (b << 4) | (b << 2) | (b << 1) :
844 rangeCase == 3 ? (b << 8) | (b << 3) | (b << 2) :
845 rangeCase == 4 ? (c << 8) | (b << 7) | (c << 3) | (b << 2) | (c << 1) | (b << 0) :
846 rangeCase == 5 ? (c << 8) | (b << 7) | (c << 2) | (b << 1) | (c << 0) :
847 rangeCase == 6 ? (d << 8) | (c << 7) | (b << 6) | (d << 2) | (c << 1) | (b << 0) :
848 rangeCase == 7 ? (d << 8) | (c << 7) | (b << 6) | (d << 1) | (c << 0) :
849 rangeCase == 8 ? (e << 8) | (d << 7) | (c << 6) | (b << 5) | (e << 1) | (d << 0) :
850 rangeCase == 9 ? (e << 8) | (d << 7) | (c << 6) | (b << 5) | (e << 0) :
851 rangeCase == 10 ? (f << 8) | (e << 7) | (d << 6) | (c << 5) | (b << 4) | (f << 0) :
852 (uint32_t)-1;
853 DE_ASSERT(B != (uint32_t)-1);
854
855 dst[endpointNdx] = (((iseResults[endpointNdx].tq * C + B) ^ A) >> 2) | (A & 0x80);
856 }
857 }
858 else
859 {
860 DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
861
862 for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
863 dst[endpointNdx] = bitReplicationScale(iseResults[endpointNdx].v, iseParams.numBits, 8);
864 }
865 }
866
bitTransferSigned(int32_t & a,int32_t & b)867 inline void bitTransferSigned(int32_t &a, int32_t &b)
868 {
869 b >>= 1;
870 b |= a & 0x80;
871 a >>= 1;
872 a &= 0x3f;
873 if (isBitSet(a, 5))
874 a -= 0x40;
875 }
876
clampedRGBA(const IVec4 & rgba)877 inline UVec4 clampedRGBA(const IVec4 &rgba)
878 {
879 return UVec4(de::clamp(rgba.x(), 0, 0xff), de::clamp(rgba.y(), 0, 0xff), de::clamp(rgba.z(), 0, 0xff),
880 de::clamp(rgba.w(), 0, 0xff));
881 }
882
blueContract(int r,int g,int b,int a)883 inline IVec4 blueContract(int r, int g, int b, int a)
884 {
885 return IVec4((r + b) >> 1, (g + b) >> 1, b, a);
886 }
887
isColorEndpointModeHDR(uint32_t mode)888 inline bool isColorEndpointModeHDR(uint32_t mode)
889 {
890 return mode == 2 || mode == 3 || mode == 7 || mode == 11 || mode == 14 || mode == 15;
891 }
892
decodeHDREndpointMode7(UVec4 & e0,UVec4 & e1,uint32_t v0,uint32_t v1,uint32_t v2,uint32_t v3)893 void decodeHDREndpointMode7(UVec4 &e0, UVec4 &e1, uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3)
894 {
895 const uint32_t m10 = getBit(v1, 7) | (getBit(v2, 7) << 1);
896 const uint32_t m23 = getBits(v0, 6, 7);
897 const uint32_t majComp = m10 != 3 ? m10 : m23 != 3 ? m23 : 0;
898 const uint32_t mode = m10 != 3 ? m23 : m23 != 3 ? 4 : 5;
899
900 int32_t red = (int32_t)getBits(v0, 0, 5);
901 int32_t green = (int32_t)getBits(v1, 0, 4);
902 int32_t blue = (int32_t)getBits(v2, 0, 4);
903 int32_t scale = (int32_t)getBits(v3, 0, 4);
904
905 {
906 #define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
907 #define ASSIGN_X_BITS(V0, S0, V1, S1, V2, S2, V3, S3, V4, S4, V5, S5, V6, S6) \
908 do \
909 { \
910 SHOR(V0, S0, x0); \
911 SHOR(V1, S1, x1); \
912 SHOR(V2, S2, x2); \
913 SHOR(V3, S3, x3); \
914 SHOR(V4, S4, x4); \
915 SHOR(V5, S5, x5); \
916 SHOR(V6, S6, x6); \
917 } while (false)
918
919 const uint32_t x0 = getBit(v1, 6);
920 const uint32_t x1 = getBit(v1, 5);
921 const uint32_t x2 = getBit(v2, 6);
922 const uint32_t x3 = getBit(v2, 5);
923 const uint32_t x4 = getBit(v3, 7);
924 const uint32_t x5 = getBit(v3, 6);
925 const uint32_t x6 = getBit(v3, 5);
926
927 int32_t &R = red;
928 int32_t &G = green;
929 int32_t &B = blue;
930 int32_t &S = scale;
931
932 switch (mode)
933 {
934 case 0:
935 ASSIGN_X_BITS(R, 9, R, 8, R, 7, R, 10, R, 6, S, 6, S, 5);
936 break;
937 case 1:
938 ASSIGN_X_BITS(R, 8, G, 5, R, 7, B, 5, R, 6, R, 10, R, 9);
939 break;
940 case 2:
941 ASSIGN_X_BITS(R, 9, R, 8, R, 7, R, 6, S, 7, S, 6, S, 5);
942 break;
943 case 3:
944 ASSIGN_X_BITS(R, 8, G, 5, R, 7, B, 5, R, 6, S, 6, S, 5);
945 break;
946 case 4:
947 ASSIGN_X_BITS(G, 6, G, 5, B, 6, B, 5, R, 6, R, 7, S, 5);
948 break;
949 case 5:
950 ASSIGN_X_BITS(G, 6, G, 5, B, 6, B, 5, R, 6, S, 6, S, 5);
951 break;
952 default:
953 DE_ASSERT(false);
954 }
955
956 #undef ASSIGN_X_BITS
957 #undef SHOR
958 }
959
960 static const int shiftAmounts[] = {1, 1, 2, 3, 4, 5};
961 DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(shiftAmounts));
962
963 red <<= shiftAmounts[mode];
964 green <<= shiftAmounts[mode];
965 blue <<= shiftAmounts[mode];
966 scale <<= shiftAmounts[mode];
967
968 if (mode != 5)
969 {
970 green = red - green;
971 blue = red - blue;
972 }
973
974 if (majComp == 1)
975 std::swap(red, green);
976 else if (majComp == 2)
977 std::swap(red, blue);
978
979 e0 = UVec4(de::clamp(red - scale, 0, 0xfff), de::clamp(green - scale, 0, 0xfff), de::clamp(blue - scale, 0, 0xfff),
980 0x780);
981
982 e1 = UVec4(de::clamp(red, 0, 0xfff), de::clamp(green, 0, 0xfff), de::clamp(blue, 0, 0xfff), 0x780);
983 }
984
decodeHDREndpointMode11(UVec4 & e0,UVec4 & e1,uint32_t v0,uint32_t v1,uint32_t v2,uint32_t v3,uint32_t v4,uint32_t v5)985 void decodeHDREndpointMode11(UVec4 &e0, UVec4 &e1, uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3, uint32_t v4,
986 uint32_t v5)
987 {
988 const uint32_t major = (getBit(v5, 7) << 1) | getBit(v4, 7);
989
990 if (major == 3)
991 {
992 e0 = UVec4(v0 << 4, v2 << 4, getBits(v4, 0, 6) << 5, 0x780);
993 e1 = UVec4(v1 << 4, v3 << 4, getBits(v5, 0, 6) << 5, 0x780);
994 }
995 else
996 {
997 const uint32_t mode = (getBit(v3, 7) << 2) | (getBit(v2, 7) << 1) | getBit(v1, 7);
998
999 int32_t a = (int32_t)((getBit(v1, 6) << 8) | v0);
1000 int32_t c = (int32_t)(getBits(v1, 0, 5));
1001 int32_t b0 = (int32_t)(getBits(v2, 0, 5));
1002 int32_t b1 = (int32_t)(getBits(v3, 0, 5));
1003 int32_t d0 = (int32_t)(getBits(v4, 0, 4));
1004 int32_t d1 = (int32_t)(getBits(v5, 0, 4));
1005
1006 {
1007 #define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
1008 #define ASSIGN_X_BITS(V0, S0, V1, S1, V2, S2, V3, S3, V4, S4, V5, S5) \
1009 do \
1010 { \
1011 SHOR(V0, S0, x0); \
1012 SHOR(V1, S1, x1); \
1013 SHOR(V2, S2, x2); \
1014 SHOR(V3, S3, x3); \
1015 SHOR(V4, S4, x4); \
1016 SHOR(V5, S5, x5); \
1017 } while (false)
1018
1019 const uint32_t x0 = getBit(v2, 6);
1020 const uint32_t x1 = getBit(v3, 6);
1021 const uint32_t x2 = getBit(v4, 6);
1022 const uint32_t x3 = getBit(v5, 6);
1023 const uint32_t x4 = getBit(v4, 5);
1024 const uint32_t x5 = getBit(v5, 5);
1025
1026 switch (mode)
1027 {
1028 case 0:
1029 ASSIGN_X_BITS(b0, 6, b1, 6, d0, 6, d1, 6, d0, 5, d1, 5);
1030 break;
1031 case 1:
1032 ASSIGN_X_BITS(b0, 6, b1, 6, b0, 7, b1, 7, d0, 5, d1, 5);
1033 break;
1034 case 2:
1035 ASSIGN_X_BITS(a, 9, c, 6, d0, 6, d1, 6, d0, 5, d1, 5);
1036 break;
1037 case 3:
1038 ASSIGN_X_BITS(b0, 6, b1, 6, a, 9, c, 6, d0, 5, d1, 5);
1039 break;
1040 case 4:
1041 ASSIGN_X_BITS(b0, 6, b1, 6, b0, 7, b1, 7, a, 9, a, 10);
1042 break;
1043 case 5:
1044 ASSIGN_X_BITS(a, 9, a, 10, c, 7, c, 6, d0, 5, d1, 5);
1045 break;
1046 case 6:
1047 ASSIGN_X_BITS(b0, 6, b1, 6, a, 11, c, 6, a, 9, a, 10);
1048 break;
1049 case 7:
1050 ASSIGN_X_BITS(a, 9, a, 10, a, 11, c, 6, d0, 5, d1, 5);
1051 break;
1052 default:
1053 DE_ASSERT(false);
1054 }
1055
1056 #undef ASSIGN_X_BITS
1057 #undef SHOR
1058 }
1059
1060 static const int numDBits[] = {7, 6, 7, 6, 5, 6, 5, 6};
1061 DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(numDBits));
1062
1063 d0 = signExtend(d0, numDBits[mode]);
1064 d1 = signExtend(d1, numDBits[mode]);
1065
1066 const int shiftAmount = (mode >> 1) ^ 3;
1067 a <<= shiftAmount;
1068 c <<= shiftAmount;
1069 b0 <<= shiftAmount;
1070 b1 <<= shiftAmount;
1071 d0 <<= shiftAmount;
1072 d1 <<= shiftAmount;
1073
1074 e0 = UVec4(de::clamp(a - c, 0, 0xfff), de::clamp(a - b0 - c - d0, 0, 0xfff),
1075 de::clamp(a - b1 - c - d1, 0, 0xfff), 0x780);
1076
1077 e1 = UVec4(de::clamp(a, 0, 0xfff), de::clamp(a - b0, 0, 0xfff), de::clamp(a - b1, 0, 0xfff), 0x780);
1078
1079 if (major == 1)
1080 {
1081 std::swap(e0.x(), e0.y());
1082 std::swap(e1.x(), e1.y());
1083 }
1084 else if (major == 2)
1085 {
1086 std::swap(e0.x(), e0.z());
1087 std::swap(e1.x(), e1.z());
1088 }
1089 }
1090 }
1091
decodeHDREndpointMode15(UVec4 & e0,UVec4 & e1,uint32_t v0,uint32_t v1,uint32_t v2,uint32_t v3,uint32_t v4,uint32_t v5,uint32_t v6In,uint32_t v7In)1092 void decodeHDREndpointMode15(UVec4 &e0, UVec4 &e1, uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3, uint32_t v4,
1093 uint32_t v5, uint32_t v6In, uint32_t v7In)
1094 {
1095 decodeHDREndpointMode11(e0, e1, v0, v1, v2, v3, v4, v5);
1096
1097 const uint32_t mode = (getBit(v7In, 7) << 1) | getBit(v6In, 7);
1098 int32_t v6 = (int32_t)getBits(v6In, 0, 6);
1099 int32_t v7 = (int32_t)getBits(v7In, 0, 6);
1100
1101 if (mode == 3)
1102 {
1103 e0.w() = v6 << 5;
1104 e1.w() = v7 << 5;
1105 }
1106 else
1107 {
1108 v6 |= (v7 << (mode + 1)) & 0x780;
1109 v7 &= (0x3f >> mode);
1110 v7 ^= 0x20 >> mode;
1111 v7 -= 0x20 >> mode;
1112 v6 <<= 4 - mode;
1113 v7 <<= 4 - mode;
1114
1115 v7 += v6;
1116 v7 = de::clamp(v7, 0, 0xfff);
1117 e0.w() = v6;
1118 e1.w() = v7;
1119 }
1120 }
1121
decodeColorEndpoints(ColorEndpointPair * dst,const uint32_t * unquantizedEndpoints,const uint32_t * endpointModes,int numPartitions)1122 void decodeColorEndpoints(ColorEndpointPair *dst, const uint32_t *unquantizedEndpoints, const uint32_t *endpointModes,
1123 int numPartitions)
1124 {
1125 int unquantizedNdx = 0;
1126
1127 for (int partitionNdx = 0; partitionNdx < numPartitions; partitionNdx++)
1128 {
1129 const uint32_t endpointMode = endpointModes[partitionNdx];
1130 const uint32_t *v = &unquantizedEndpoints[unquantizedNdx];
1131 UVec4 &e0 = dst[partitionNdx].e0;
1132 UVec4 &e1 = dst[partitionNdx].e1;
1133
1134 unquantizedNdx += computeNumColorEndpointValues(endpointMode);
1135
1136 switch (endpointMode)
1137 {
1138 case 0:
1139 e0 = UVec4(v[0], v[0], v[0], 0xff);
1140 e1 = UVec4(v[1], v[1], v[1], 0xff);
1141 break;
1142
1143 case 1:
1144 {
1145 const uint32_t L0 = (v[0] >> 2) | (getBits(v[1], 6, 7) << 6);
1146 const uint32_t L1 = de::min(0xffu, L0 + getBits(v[1], 0, 5));
1147 e0 = UVec4(L0, L0, L0, 0xff);
1148 e1 = UVec4(L1, L1, L1, 0xff);
1149 break;
1150 }
1151
1152 case 2:
1153 {
1154 const uint32_t v1Gr = v[1] >= v[0];
1155 const uint32_t y0 = v1Gr ? v[0] << 4 : (v[1] << 4) + 8;
1156 const uint32_t y1 = v1Gr ? v[1] << 4 : (v[0] << 4) - 8;
1157
1158 e0 = UVec4(y0, y0, y0, 0x780);
1159 e1 = UVec4(y1, y1, y1, 0x780);
1160 break;
1161 }
1162
1163 case 3:
1164 {
1165 const bool m = isBitSet(v[0], 7);
1166 const uint32_t y0 = m ? (getBits(v[1], 5, 7) << 9) | (getBits(v[0], 0, 6) << 2) :
1167 (getBits(v[1], 4, 7) << 8) | (getBits(v[0], 0, 6) << 1);
1168 const uint32_t d = m ? getBits(v[1], 0, 4) << 2 : getBits(v[1], 0, 3) << 1;
1169 const uint32_t y1 = de::min(0xfffu, y0 + d);
1170
1171 e0 = UVec4(y0, y0, y0, 0x780);
1172 e1 = UVec4(y1, y1, y1, 0x780);
1173 break;
1174 }
1175
1176 case 4:
1177 e0 = UVec4(v[0], v[0], v[0], v[2]);
1178 e1 = UVec4(v[1], v[1], v[1], v[3]);
1179 break;
1180
1181 case 5:
1182 {
1183 int32_t v0 = (int32_t)v[0];
1184 int32_t v1 = (int32_t)v[1];
1185 int32_t v2 = (int32_t)v[2];
1186 int32_t v3 = (int32_t)v[3];
1187 bitTransferSigned(v1, v0);
1188 bitTransferSigned(v3, v2);
1189
1190 e0 = clampedRGBA(IVec4(v0, v0, v0, v2));
1191 e1 = clampedRGBA(IVec4(v0 + v1, v0 + v1, v0 + v1, v2 + v3));
1192 break;
1193 }
1194
1195 case 6:
1196 e0 = UVec4((v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8, 0xff);
1197 e1 = UVec4(v[0], v[1], v[2], 0xff);
1198 break;
1199
1200 case 7:
1201 decodeHDREndpointMode7(e0, e1, v[0], v[1], v[2], v[3]);
1202 break;
1203
1204 case 8:
1205 if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4])
1206 {
1207 e0 = UVec4(v[0], v[2], v[4], 0xff);
1208 e1 = UVec4(v[1], v[3], v[5], 0xff);
1209 }
1210 else
1211 {
1212 e0 = blueContract(v[1], v[3], v[5], 0xff).asUint();
1213 e1 = blueContract(v[0], v[2], v[4], 0xff).asUint();
1214 }
1215 break;
1216
1217 case 9:
1218 {
1219 int32_t v0 = (int32_t)v[0];
1220 int32_t v1 = (int32_t)v[1];
1221 int32_t v2 = (int32_t)v[2];
1222 int32_t v3 = (int32_t)v[3];
1223 int32_t v4 = (int32_t)v[4];
1224 int32_t v5 = (int32_t)v[5];
1225 bitTransferSigned(v1, v0);
1226 bitTransferSigned(v3, v2);
1227 bitTransferSigned(v5, v4);
1228
1229 if (v1 + v3 + v5 >= 0)
1230 {
1231 e0 = clampedRGBA(IVec4(v0, v2, v4, 0xff));
1232 e1 = clampedRGBA(IVec4(v0 + v1, v2 + v3, v4 + v5, 0xff));
1233 }
1234 else
1235 {
1236 e0 = clampedRGBA(blueContract(v0 + v1, v2 + v3, v4 + v5, 0xff));
1237 e1 = clampedRGBA(blueContract(v0, v2, v4, 0xff));
1238 }
1239 break;
1240 }
1241
1242 case 10:
1243 e0 = UVec4((v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8, v[4]);
1244 e1 = UVec4(v[0], v[1], v[2], v[5]);
1245 break;
1246
1247 case 11:
1248 decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
1249 break;
1250
1251 case 12:
1252 if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4])
1253 {
1254 e0 = UVec4(v[0], v[2], v[4], v[6]);
1255 e1 = UVec4(v[1], v[3], v[5], v[7]);
1256 }
1257 else
1258 {
1259 e0 = clampedRGBA(blueContract(v[1], v[3], v[5], v[7]));
1260 e1 = clampedRGBA(blueContract(v[0], v[2], v[4], v[6]));
1261 }
1262 break;
1263
1264 case 13:
1265 {
1266 int32_t v0 = (int32_t)v[0];
1267 int32_t v1 = (int32_t)v[1];
1268 int32_t v2 = (int32_t)v[2];
1269 int32_t v3 = (int32_t)v[3];
1270 int32_t v4 = (int32_t)v[4];
1271 int32_t v5 = (int32_t)v[5];
1272 int32_t v6 = (int32_t)v[6];
1273 int32_t v7 = (int32_t)v[7];
1274 bitTransferSigned(v1, v0);
1275 bitTransferSigned(v3, v2);
1276 bitTransferSigned(v5, v4);
1277 bitTransferSigned(v7, v6);
1278
1279 if (v1 + v3 + v5 >= 0)
1280 {
1281 e0 = clampedRGBA(IVec4(v0, v2, v4, v6));
1282 e1 = clampedRGBA(IVec4(v0 + v1, v2 + v3, v4 + v5, v6 + v7));
1283 }
1284 else
1285 {
1286 e0 = clampedRGBA(blueContract(v0 + v1, v2 + v3, v4 + v5, v6 + v7));
1287 e1 = clampedRGBA(blueContract(v0, v2, v4, v6));
1288 }
1289
1290 break;
1291 }
1292
1293 case 14:
1294 decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
1295 e0.w() = v[6];
1296 e1.w() = v[7];
1297 break;
1298
1299 case 15:
1300 decodeHDREndpointMode15(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
1301 break;
1302
1303 default:
1304 DE_ASSERT(false);
1305 }
1306 }
1307 }
1308
computeColorEndpoints(ColorEndpointPair * dst,const Block128 & blockData,const uint32_t * endpointModes,int numPartitions,int numColorEndpointValues,const ISEParams & iseParams,int numBitsAvailable)1309 void computeColorEndpoints(ColorEndpointPair *dst, const Block128 &blockData, const uint32_t *endpointModes,
1310 int numPartitions, int numColorEndpointValues, const ISEParams &iseParams,
1311 int numBitsAvailable)
1312 {
1313 const int colorEndpointDataStart = numPartitions == 1 ? 17 : 29;
1314 ISEDecodedResult colorEndpointData[18];
1315
1316 {
1317 BitAccessStream dataStream(blockData, colorEndpointDataStart, numBitsAvailable, true);
1318 decodeISE(&colorEndpointData[0], numColorEndpointValues, dataStream, iseParams);
1319 }
1320
1321 {
1322 uint32_t unquantizedEndpoints[18];
1323 unquantizeColorEndpoints(&unquantizedEndpoints[0], &colorEndpointData[0], numColorEndpointValues, iseParams);
1324 decodeColorEndpoints(dst, &unquantizedEndpoints[0], &endpointModes[0], numPartitions);
1325 }
1326 }
1327
unquantizeWeights(uint32_t dst[64],const ISEDecodedResult * weightGrid,const ASTCBlockMode & blockMode)1328 void unquantizeWeights(uint32_t dst[64], const ISEDecodedResult *weightGrid, const ASTCBlockMode &blockMode)
1329 {
1330 const int numWeights = computeNumWeights(blockMode);
1331 const ISEParams &iseParams = blockMode.weightISEParams;
1332
1333 if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
1334 {
1335 const int rangeCase = iseParams.numBits * 2 + (iseParams.mode == ISEMODE_QUINT ? 1 : 0);
1336
1337 if (rangeCase == 0 || rangeCase == 1)
1338 {
1339 static const uint32_t map0[3] = {0, 32, 63};
1340 static const uint32_t map1[5] = {0, 16, 32, 47, 63};
1341 const uint32_t *const map = rangeCase == 0 ? &map0[0] : &map1[0];
1342 for (int i = 0; i < numWeights; i++)
1343 {
1344 DE_ASSERT(weightGrid[i].v < (rangeCase == 0 ? 3u : 5u));
1345 dst[i] = map[weightGrid[i].v];
1346 }
1347 }
1348 else
1349 {
1350 DE_ASSERT(rangeCase <= 6);
1351 static const uint32_t Ca[5] = {50, 28, 23, 13, 11};
1352 const uint32_t C = Ca[rangeCase - 2];
1353
1354 for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
1355 {
1356 const uint32_t a = getBit(weightGrid[weightNdx].m, 0);
1357 const uint32_t b = getBit(weightGrid[weightNdx].m, 1);
1358 const uint32_t c = getBit(weightGrid[weightNdx].m, 2);
1359
1360 const uint32_t A = a == 0 ? 0 : (1 << 7) - 1;
1361 const uint32_t B = rangeCase == 2 ? 0 :
1362 rangeCase == 3 ? 0 :
1363 rangeCase == 4 ? (b << 6) | (b << 2) | (b << 0) :
1364 rangeCase == 5 ? (b << 6) | (b << 1) :
1365 rangeCase == 6 ? (c << 6) | (b << 5) | (c << 1) | (b << 0) :
1366 (uint32_t)-1;
1367
1368 dst[weightNdx] = (((weightGrid[weightNdx].tq * C + B) ^ A) >> 2) | (A & 0x20);
1369 }
1370 }
1371 }
1372 else
1373 {
1374 DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
1375
1376 for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
1377 dst[weightNdx] = bitReplicationScale(weightGrid[weightNdx].v, iseParams.numBits, 6);
1378 }
1379
1380 for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
1381 dst[weightNdx] += dst[weightNdx] > 32 ? 1 : 0;
1382
1383 // Initialize nonexistent weights to poison values
1384 for (int weightNdx = numWeights; weightNdx < 64; weightNdx++)
1385 dst[weightNdx] = ~0u;
1386 }
1387
interpolateWeights(TexelWeightPair * dst,const uint32_t (& unquantizedWeights)[64],int blockWidth,int blockHeight,const ASTCBlockMode & blockMode)1388 void interpolateWeights(TexelWeightPair *dst, const uint32_t (&unquantizedWeights)[64], int blockWidth, int blockHeight,
1389 const ASTCBlockMode &blockMode)
1390 {
1391 const int numWeightsPerTexel = blockMode.isDualPlane ? 2 : 1;
1392 const uint32_t scaleX = (1024 + blockWidth / 2) / (blockWidth - 1);
1393 const uint32_t scaleY = (1024 + blockHeight / 2) / (blockHeight - 1);
1394
1395 DE_ASSERT(blockMode.weightGridWidth * blockMode.weightGridHeight * numWeightsPerTexel <=
1396 DE_LENGTH_OF_ARRAY(unquantizedWeights));
1397
1398 for (int texelY = 0; texelY < blockHeight; texelY++)
1399 {
1400 for (int texelX = 0; texelX < blockWidth; texelX++)
1401 {
1402 const uint32_t gX = (scaleX * texelX * (blockMode.weightGridWidth - 1) + 32) >> 6;
1403 const uint32_t gY = (scaleY * texelY * (blockMode.weightGridHeight - 1) + 32) >> 6;
1404 const uint32_t jX = gX >> 4;
1405 const uint32_t jY = gY >> 4;
1406 const uint32_t fX = gX & 0xf;
1407 const uint32_t fY = gY & 0xf;
1408
1409 const uint32_t w11 = (fX * fY + 8) >> 4;
1410 const uint32_t w10 = fY - w11;
1411 const uint32_t w01 = fX - w11;
1412 const uint32_t w00 = 16 - fX - fY + w11;
1413
1414 const uint32_t i00 = jY * blockMode.weightGridWidth + jX;
1415 const uint32_t i01 = i00 + 1;
1416 const uint32_t i10 = i00 + blockMode.weightGridWidth;
1417 const uint32_t i11 = i00 + blockMode.weightGridWidth + 1;
1418
1419 // These addresses can be out of bounds, but respective weights will be 0 then.
1420 DE_ASSERT(deInBounds32(i00, 0, blockMode.weightGridWidth * blockMode.weightGridHeight) || w00 == 0);
1421 DE_ASSERT(deInBounds32(i01, 0, blockMode.weightGridWidth * blockMode.weightGridHeight) || w01 == 0);
1422 DE_ASSERT(deInBounds32(i10, 0, blockMode.weightGridWidth * blockMode.weightGridHeight) || w10 == 0);
1423 DE_ASSERT(deInBounds32(i11, 0, blockMode.weightGridWidth * blockMode.weightGridHeight) || w11 == 0);
1424
1425 for (int texelWeightNdx = 0; texelWeightNdx < numWeightsPerTexel; texelWeightNdx++)
1426 {
1427 // & 0x3f clamps address to bounds of unquantizedWeights
1428 const uint32_t p00 = unquantizedWeights[(i00 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
1429 const uint32_t p01 = unquantizedWeights[(i01 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
1430 const uint32_t p10 = unquantizedWeights[(i10 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
1431 const uint32_t p11 = unquantizedWeights[(i11 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
1432
1433 dst[texelY * blockWidth + texelX].w[texelWeightNdx] =
1434 (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4;
1435 }
1436 }
1437 }
1438 }
1439
computeTexelWeights(TexelWeightPair * dst,const Block128 & blockData,int blockWidth,int blockHeight,const ASTCBlockMode & blockMode)1440 void computeTexelWeights(TexelWeightPair *dst, const Block128 &blockData, int blockWidth, int blockHeight,
1441 const ASTCBlockMode &blockMode)
1442 {
1443 ISEDecodedResult weightGrid[64];
1444
1445 {
1446 BitAccessStream dataStream(
1447 blockData, 127, computeNumRequiredBits(blockMode.weightISEParams, computeNumWeights(blockMode)), false);
1448 decodeISE(&weightGrid[0], computeNumWeights(blockMode), dataStream, blockMode.weightISEParams);
1449 }
1450
1451 {
1452 uint32_t unquantizedWeights[64];
1453 unquantizeWeights(&unquantizedWeights[0], &weightGrid[0], blockMode);
1454 interpolateWeights(dst, unquantizedWeights, blockWidth, blockHeight, blockMode);
1455 }
1456 }
1457
hash52(uint32_t v)1458 inline uint32_t hash52(uint32_t v)
1459 {
1460 uint32_t p = v;
1461 p ^= p >> 15;
1462 p -= p << 17;
1463 p += p << 7;
1464 p += p << 4;
1465 p ^= p >> 5;
1466 p += p << 16;
1467 p ^= p >> 7;
1468 p ^= p >> 3;
1469 p ^= p << 6;
1470 p ^= p >> 17;
1471 return p;
1472 }
1473
computeTexelPartition(uint32_t seedIn,uint32_t xIn,uint32_t yIn,uint32_t zIn,int numPartitions,bool smallBlock)1474 int computeTexelPartition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int numPartitions, bool smallBlock)
1475 {
1476 DE_ASSERT(zIn == 0);
1477 const uint32_t x = smallBlock ? xIn << 1 : xIn;
1478 const uint32_t y = smallBlock ? yIn << 1 : yIn;
1479 const uint32_t z = smallBlock ? zIn << 1 : zIn;
1480 const uint32_t seed = seedIn + 1024 * (numPartitions - 1);
1481 const uint32_t rnum = hash52(seed);
1482 uint8_t seed1 = (uint8_t)(rnum & 0xf);
1483 uint8_t seed2 = (uint8_t)((rnum >> 4) & 0xf);
1484 uint8_t seed3 = (uint8_t)((rnum >> 8) & 0xf);
1485 uint8_t seed4 = (uint8_t)((rnum >> 12) & 0xf);
1486 uint8_t seed5 = (uint8_t)((rnum >> 16) & 0xf);
1487 uint8_t seed6 = (uint8_t)((rnum >> 20) & 0xf);
1488 uint8_t seed7 = (uint8_t)((rnum >> 24) & 0xf);
1489 uint8_t seed8 = (uint8_t)((rnum >> 28) & 0xf);
1490 uint8_t seed9 = (uint8_t)((rnum >> 18) & 0xf);
1491 uint8_t seed10 = (uint8_t)((rnum >> 22) & 0xf);
1492 uint8_t seed11 = (uint8_t)((rnum >> 26) & 0xf);
1493 uint8_t seed12 = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf);
1494
1495 seed1 = (uint8_t)(seed1 * seed1);
1496 seed2 = (uint8_t)(seed2 * seed2);
1497 seed3 = (uint8_t)(seed3 * seed3);
1498 seed4 = (uint8_t)(seed4 * seed4);
1499 seed5 = (uint8_t)(seed5 * seed5);
1500 seed6 = (uint8_t)(seed6 * seed6);
1501 seed7 = (uint8_t)(seed7 * seed7);
1502 seed8 = (uint8_t)(seed8 * seed8);
1503 seed9 = (uint8_t)(seed9 * seed9);
1504 seed10 = (uint8_t)(seed10 * seed10);
1505 seed11 = (uint8_t)(seed11 * seed11);
1506 seed12 = (uint8_t)(seed12 * seed12);
1507
1508 const int shA = (seed & 2) != 0 ? 4 : 5;
1509 const int shB = numPartitions == 3 ? 6 : 5;
1510 const int sh1 = (seed & 1) != 0 ? shA : shB;
1511 const int sh2 = (seed & 1) != 0 ? shB : shA;
1512 const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2;
1513
1514 seed1 = (uint8_t)(seed1 >> sh1);
1515 seed2 = (uint8_t)(seed2 >> sh2);
1516 seed3 = (uint8_t)(seed3 >> sh1);
1517 seed4 = (uint8_t)(seed4 >> sh2);
1518 seed5 = (uint8_t)(seed5 >> sh1);
1519 seed6 = (uint8_t)(seed6 >> sh2);
1520 seed7 = (uint8_t)(seed7 >> sh1);
1521 seed8 = (uint8_t)(seed8 >> sh2);
1522 seed9 = (uint8_t)(seed9 >> sh3);
1523 seed10 = (uint8_t)(seed10 >> sh3);
1524 seed11 = (uint8_t)(seed11 >> sh3);
1525 seed12 = (uint8_t)(seed12 >> sh3);
1526
1527 const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14));
1528 const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10));
1529 const int c = numPartitions >= 3 ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0;
1530 const int d = numPartitions >= 4 ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0;
1531
1532 return a >= b && a >= c && a >= d ? 0 : b >= c && b >= d ? 1 : c >= d ? 2 : 3;
1533 }
1534
setTexelColors(void * dst,ColorEndpointPair * colorEndpoints,TexelWeightPair * texelWeights,int ccs,uint32_t partitionIndexSeed,int numPartitions,int blockWidth,int blockHeight,bool isSRGB,bool isLDRMode,const uint32_t * colorEndpointModes)1535 DecompressResult setTexelColors(void *dst, ColorEndpointPair *colorEndpoints, TexelWeightPair *texelWeights, int ccs,
1536 uint32_t partitionIndexSeed, int numPartitions, int blockWidth, int blockHeight,
1537 bool isSRGB, bool isLDRMode, const uint32_t *colorEndpointModes)
1538 {
1539 const bool smallBlock = blockWidth * blockHeight < 31;
1540 DecompressResult result = DECOMPRESS_RESULT_VALID_BLOCK;
1541 bool isHDREndpoint[4];
1542
1543 for (int i = 0; i < numPartitions; i++)
1544 isHDREndpoint[i] = isColorEndpointModeHDR(colorEndpointModes[i]);
1545
1546 for (int texelY = 0; texelY < blockHeight; texelY++)
1547 for (int texelX = 0; texelX < blockWidth; texelX++)
1548 {
1549 const int texelNdx = texelY * blockWidth + texelX;
1550 const int colorEndpointNdx = numPartitions == 1 ? 0 :
1551 computeTexelPartition(partitionIndexSeed, texelX, texelY,
1552 0, numPartitions, smallBlock);
1553 DE_ASSERT(colorEndpointNdx < numPartitions);
1554 const UVec4 &e0 = colorEndpoints[colorEndpointNdx].e0;
1555 const UVec4 &e1 = colorEndpoints[colorEndpointNdx].e1;
1556 const TexelWeightPair &weight = texelWeights[texelNdx];
1557
1558 if (isLDRMode && isHDREndpoint[colorEndpointNdx])
1559 {
1560 if (isSRGB)
1561 {
1562 ((uint8_t *)dst)[texelNdx * 4 + 0] = 0xff;
1563 ((uint8_t *)dst)[texelNdx * 4 + 1] = 0;
1564 ((uint8_t *)dst)[texelNdx * 4 + 2] = 0xff;
1565 ((uint8_t *)dst)[texelNdx * 4 + 3] = 0xff;
1566 }
1567 else
1568 {
1569 ((float *)dst)[texelNdx * 4 + 0] = 1.0f;
1570 ((float *)dst)[texelNdx * 4 + 1] = 0;
1571 ((float *)dst)[texelNdx * 4 + 2] = 1.0f;
1572 ((float *)dst)[texelNdx * 4 + 3] = 1.0f;
1573 }
1574
1575 result = DECOMPRESS_RESULT_ERROR;
1576 }
1577 else
1578 {
1579 for (int channelNdx = 0; channelNdx < 4; channelNdx++)
1580 {
1581 if (!isHDREndpoint[colorEndpointNdx] ||
1582 (channelNdx == 3 && colorEndpointModes[colorEndpointNdx] ==
1583 14)) // \note Alpha for mode 14 is treated the same as LDR.
1584 {
1585 const uint32_t c0 = (e0[channelNdx] << 8) | (isSRGB ? 0x80 : e0[channelNdx]);
1586 const uint32_t c1 = (e1[channelNdx] << 8) | (isSRGB ? 0x80 : e1[channelNdx]);
1587 const uint32_t w = weight.w[ccs == channelNdx ? 1 : 0];
1588 const uint32_t c = (c0 * (64 - w) + c1 * w + 32) / 64;
1589
1590 if (isSRGB)
1591 ((uint8_t *)dst)[texelNdx * 4 + channelNdx] = (uint8_t)((c & 0xff00) >> 8);
1592 else
1593 ((float *)dst)[texelNdx * 4 + channelNdx] = c == 65535 ? 1.0f : (float)c / 65536.0f;
1594 }
1595 else
1596 {
1597 DE_STATIC_ASSERT((de::meta::TypesSame<deFloat16, uint16_t>::Value));
1598 const uint32_t c0 = e0[channelNdx] << 4;
1599 const uint32_t c1 = e1[channelNdx] << 4;
1600 const uint32_t w = weight.w[ccs == channelNdx ? 1 : 0];
1601 const uint32_t c = (c0 * (64 - w) + c1 * w + 32) / 64;
1602 const uint32_t e = getBits(c, 11, 15);
1603 const uint32_t m = getBits(c, 0, 10);
1604 const uint32_t mt = m < 512 ? 3 * m : m >= 1536 ? 5 * m - 2048 : 4 * m - 512;
1605 const deFloat16 cf = (deFloat16)((e << 10) + (mt >> 3));
1606
1607 ((float *)dst)[texelNdx * 4 + channelNdx] = deFloat16To32(isFloat16InfOrNan(cf) ? 0x7bff : cf);
1608 }
1609 }
1610 }
1611 }
1612
1613 return result;
1614 }
1615
decompressBlock(void * dst,const Block128 & blockData,int blockWidth,int blockHeight,bool isSRGB,bool isLDR)1616 DecompressResult decompressBlock(void *dst, const Block128 &blockData, int blockWidth, int blockHeight, bool isSRGB,
1617 bool isLDR)
1618 {
1619 DE_ASSERT(isLDR || !isSRGB);
1620
1621 // Decode block mode.
1622
1623 const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10));
1624
1625 // Check for block mode errors.
1626
1627 if (blockMode.isError)
1628 {
1629 setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
1630 return DECOMPRESS_RESULT_ERROR;
1631 }
1632
1633 // Separate path for void-extent.
1634
1635 if (blockMode.isVoidExtent)
1636 return decodeVoidExtentBlock(dst, blockData, blockWidth, blockHeight, isSRGB, isLDR);
1637
1638 // Compute weight grid values.
1639
1640 const int numWeights = computeNumWeights(blockMode);
1641 const int numWeightDataBits = computeNumRequiredBits(blockMode.weightISEParams, numWeights);
1642 const int numPartitions = (int)blockData.getBits(11, 12) + 1;
1643
1644 // Check for errors in weight grid, partition and dual-plane parameters.
1645
1646 if (numWeights > 64 || numWeightDataBits > 96 || numWeightDataBits < 24 || blockMode.weightGridWidth > blockWidth ||
1647 blockMode.weightGridHeight > blockHeight || (numPartitions == 4 && blockMode.isDualPlane))
1648 {
1649 setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
1650 return DECOMPRESS_RESULT_ERROR;
1651 }
1652
1653 // Compute number of bits available for color endpoint data.
1654
1655 const bool isSingleUniqueCem = numPartitions == 1 || blockData.getBits(23, 24) == 0;
1656 const int numConfigDataBits = (numPartitions == 1 ? 17 :
1657 isSingleUniqueCem ? 29 :
1658 25 + 3 * numPartitions) +
1659 (blockMode.isDualPlane ? 2 : 0);
1660 const int numBitsForColorEndpoints = 128 - numWeightDataBits - numConfigDataBits;
1661 const int extraCemBitsStart = 127 - numWeightDataBits -
1662 (isSingleUniqueCem ? -1 :
1663 numPartitions == 4 ? 7 :
1664 numPartitions == 3 ? 4 :
1665 numPartitions == 2 ? 1 :
1666 0);
1667 // Decode color endpoint modes.
1668
1669 uint32_t colorEndpointModes[4];
1670 decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart);
1671
1672 const int numColorEndpointValues = computeNumColorEndpointValues(colorEndpointModes, numPartitions);
1673
1674 // Check for errors in color endpoint value count.
1675
1676 if (numColorEndpointValues > 18 || numBitsForColorEndpoints < deDivRoundUp32(13 * numColorEndpointValues, 5))
1677 {
1678 setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
1679 return DECOMPRESS_RESULT_ERROR;
1680 }
1681
1682 // Compute color endpoints.
1683
1684 ColorEndpointPair colorEndpoints[4];
1685 computeColorEndpoints(&colorEndpoints[0], blockData, &colorEndpointModes[0], numPartitions, numColorEndpointValues,
1686 computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues),
1687 numBitsForColorEndpoints);
1688
1689 // Compute texel weights.
1690
1691 TexelWeightPair texelWeights[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT];
1692 computeTexelWeights(&texelWeights[0], blockData, blockWidth, blockHeight, blockMode);
1693
1694 // Set texel colors.
1695
1696 const int ccs = blockMode.isDualPlane ? (int)blockData.getBits(extraCemBitsStart - 2, extraCemBitsStart - 1) : -1;
1697 const uint32_t partitionIndexSeed = numPartitions > 1 ? blockData.getBits(13, 22) : (uint32_t)-1;
1698
1699 return setTexelColors(dst, &colorEndpoints[0], &texelWeights[0], ccs, partitionIndexSeed, numPartitions, blockWidth,
1700 blockHeight, isSRGB, isLDR, &colorEndpointModes[0]);
1701 }
1702
decompress(const PixelBufferAccess & dst,const uint8_t * data,bool isSRGB,bool isLDR)1703 void decompress(const PixelBufferAccess &dst, const uint8_t *data, bool isSRGB, bool isLDR)
1704 {
1705 DE_ASSERT(isLDR || !isSRGB);
1706
1707 const int blockWidth = dst.getWidth();
1708 const int blockHeight = dst.getHeight();
1709
1710 union
1711 {
1712 uint8_t sRGB[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4];
1713 float linear[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4];
1714 } decompressedBuffer;
1715
1716 const Block128 blockData(data);
1717 decompressBlock(isSRGB ? (void *)&decompressedBuffer.sRGB[0] : (void *)&decompressedBuffer.linear[0], blockData,
1718 dst.getWidth(), dst.getHeight(), isSRGB, isLDR);
1719
1720 if (isSRGB)
1721 {
1722 for (int i = 0; i < blockHeight; i++)
1723 for (int j = 0; j < blockWidth; j++)
1724 {
1725 dst.setPixel(IVec4(decompressedBuffer.sRGB[(i * blockWidth + j) * 4 + 0],
1726 decompressedBuffer.sRGB[(i * blockWidth + j) * 4 + 1],
1727 decompressedBuffer.sRGB[(i * blockWidth + j) * 4 + 2],
1728 decompressedBuffer.sRGB[(i * blockWidth + j) * 4 + 3]),
1729 j, i);
1730 }
1731 }
1732 else
1733 {
1734 for (int i = 0; i < blockHeight; i++)
1735 for (int j = 0; j < blockWidth; j++)
1736 {
1737 dst.setPixel(Vec4(decompressedBuffer.linear[(i * blockWidth + j) * 4 + 0],
1738 decompressedBuffer.linear[(i * blockWidth + j) * 4 + 1],
1739 decompressedBuffer.linear[(i * blockWidth + j) * 4 + 2],
1740 decompressedBuffer.linear[(i * blockWidth + j) * 4 + 3]),
1741 j, i);
1742 }
1743 }
1744 }
1745
1746 // Helper class for setting bits in a 128-bit block.
1747 class AssignBlock128
1748 {
1749 private:
1750 typedef uint64_t Word;
1751
1752 enum
1753 {
1754 WORD_BYTES = sizeof(Word),
1755 WORD_BITS = 8 * WORD_BYTES,
1756 NUM_WORDS = 128 / WORD_BITS
1757 };
1758
1759 DE_STATIC_ASSERT(128 % WORD_BITS == 0);
1760
1761 public:
AssignBlock128(void)1762 AssignBlock128(void)
1763 {
1764 for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
1765 m_words[wordNdx] = 0;
1766 }
1767
setBit(int ndx,uint32_t val)1768 void setBit(int ndx, uint32_t val)
1769 {
1770 DE_ASSERT(de::inBounds(ndx, 0, 128));
1771 DE_ASSERT((val & 1) == val);
1772 const int wordNdx = ndx / WORD_BITS;
1773 const int bitNdx = ndx % WORD_BITS;
1774 m_words[wordNdx] = (m_words[wordNdx] & ~((Word)1 << bitNdx)) | ((Word)val << bitNdx);
1775 }
1776
setBits(int low,int high,uint32_t bits)1777 void setBits(int low, int high, uint32_t bits)
1778 {
1779 DE_ASSERT(de::inBounds(low, 0, 128));
1780 DE_ASSERT(de::inBounds(high, 0, 128));
1781 DE_ASSERT(de::inRange(high - low + 1, 0, 32));
1782 DE_ASSERT((bits & (((Word)1 << (high - low + 1)) - 1)) == bits);
1783
1784 if (high - low + 1 == 0)
1785 return;
1786
1787 const int word0Ndx = low / WORD_BITS;
1788 const int word1Ndx = high / WORD_BITS;
1789 const int lowNdxInW0 = low % WORD_BITS;
1790
1791 if (word0Ndx == word1Ndx)
1792 m_words[word0Ndx] =
1793 (m_words[word0Ndx] & ~((((Word)1 << (high - low + 1)) - 1) << lowNdxInW0)) | ((Word)bits << lowNdxInW0);
1794 else
1795 {
1796 DE_ASSERT(word1Ndx == word0Ndx + 1);
1797
1798 const int highNdxInW1 = high % WORD_BITS;
1799 const int numBitsToSetInW0 = WORD_BITS - lowNdxInW0;
1800 const Word bitsLowMask = ((Word)1 << numBitsToSetInW0) - 1;
1801
1802 m_words[word0Ndx] =
1803 (m_words[word0Ndx] & (((Word)1 << lowNdxInW0) - 1)) | (((Word)bits & bitsLowMask) << lowNdxInW0);
1804 m_words[word1Ndx] = (m_words[word1Ndx] & ~(((Word)1 << (highNdxInW1 + 1)) - 1)) |
1805 (((Word)bits & ~bitsLowMask) >> numBitsToSetInW0);
1806 }
1807 }
1808
assignToMemory(uint8_t * dst) const1809 void assignToMemory(uint8_t *dst) const
1810 {
1811 for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
1812 {
1813 for (int byteNdx = 0; byteNdx < WORD_BYTES; byteNdx++)
1814 dst[wordNdx * WORD_BYTES + byteNdx] = (uint8_t)((m_words[wordNdx] >> (8 * byteNdx)) & 0xff);
1815 }
1816 }
1817
pushBytesToVector(vector<uint8_t> & dst) const1818 void pushBytesToVector(vector<uint8_t> &dst) const
1819 {
1820 const int assignStartIndex = (int)dst.size();
1821 dst.resize(dst.size() + BLOCK_SIZE_BYTES);
1822 assignToMemory(&dst[assignStartIndex]);
1823 }
1824
1825 private:
1826 Word m_words[NUM_WORDS];
1827 };
1828
1829 // A helper for sequential access into a AssignBlock128.
1830 class BitAssignAccessStream
1831 {
1832 public:
BitAssignAccessStream(AssignBlock128 & dst,int startNdxInSrc,int length,bool forward)1833 BitAssignAccessStream(AssignBlock128 &dst, int startNdxInSrc, int length, bool forward)
1834 : m_dst(dst)
1835 , m_startNdxInSrc(startNdxInSrc)
1836 , m_length(length)
1837 , m_forward(forward)
1838 , m_ndx(0)
1839 {
1840 }
1841
1842 // Set the next num bits. Bits at positions greater than or equal to m_length are not touched.
setNext(int num,uint32_t bits)1843 void setNext(int num, uint32_t bits)
1844 {
1845 DE_ASSERT((bits & (((uint64_t)1 << num) - 1)) == bits);
1846
1847 if (num == 0 || m_ndx >= m_length)
1848 return;
1849
1850 const int end = m_ndx + num;
1851 const int numBitsToDst = de::max(0, de::min(m_length, end) - m_ndx);
1852 const int low = m_ndx;
1853 const int high = m_ndx + numBitsToDst - 1;
1854 const uint32_t actualBits = getBits(bits, 0, numBitsToDst - 1);
1855
1856 m_ndx += num;
1857
1858 return m_forward ?
1859 m_dst.setBits(m_startNdxInSrc + low, m_startNdxInSrc + high, actualBits) :
1860 m_dst.setBits(m_startNdxInSrc - high, m_startNdxInSrc - low, reverseBits(actualBits, numBitsToDst));
1861 }
1862
1863 private:
1864 AssignBlock128 &m_dst;
1865 const int m_startNdxInSrc;
1866 const int m_length;
1867 const bool m_forward;
1868
1869 int m_ndx;
1870 };
1871
1872 struct VoidExtentParams
1873 {
1874 DE_STATIC_ASSERT((de::meta::TypesSame<deFloat16, uint16_t>::Value));
1875 bool isHDR;
1876 uint16_t r;
1877 uint16_t g;
1878 uint16_t b;
1879 uint16_t a;
1880 // \note Currently extent coordinates are all set to all-ones.
1881
VoidExtentParamstcu::astc::__anone0fc7f7b0111::VoidExtentParams1882 VoidExtentParams(bool isHDR_, uint16_t r_, uint16_t g_, uint16_t b_, uint16_t a_)
1883 : isHDR(isHDR_)
1884 , r(r_)
1885 , g(g_)
1886 , b(b_)
1887 , a(a_)
1888 {
1889 }
1890 };
1891
generateVoidExtentBlock(const VoidExtentParams & params)1892 static AssignBlock128 generateVoidExtentBlock(const VoidExtentParams ¶ms)
1893 {
1894 AssignBlock128 block;
1895
1896 block.setBits(0, 8, 0x1fc); // \note Marks void-extent block.
1897 block.setBit(9, params.isHDR);
1898 block.setBits(10, 11, 3); // \note Spec shows that these bits are both set, although they serve no purpose.
1899
1900 // Extent coordinates - currently all-ones.
1901 block.setBits(12, 24, 0x1fff);
1902 block.setBits(25, 37, 0x1fff);
1903 block.setBits(38, 50, 0x1fff);
1904 block.setBits(51, 63, 0x1fff);
1905
1906 DE_ASSERT(!params.isHDR || (!isFloat16InfOrNan(params.r) && !isFloat16InfOrNan(params.g) &&
1907 !isFloat16InfOrNan(params.b) && !isFloat16InfOrNan(params.a)));
1908
1909 block.setBits(64, 79, params.r);
1910 block.setBits(80, 95, params.g);
1911 block.setBits(96, 111, params.b);
1912 block.setBits(112, 127, params.a);
1913
1914 return block;
1915 }
1916
1917 // An input array of ISE inputs for an entire ASTC block. Can be given as either single values in the
1918 // range [0, maximumValueOfISERange] or as explicit block value specifications. The latter is needed
1919 // so we can test all possible values of T and Q in a block, since multiple T or Q values may map
1920 // to the same set of decoded values.
1921 struct ISEInput
1922 {
1923 struct Block
1924 {
1925 uint32_t tOrQValue; //!< The 8-bit T or 7-bit Q in a trit or quint ISE block.
1926 uint32_t bitValues[5];
1927 };
1928
1929 bool isGivenInBlockForm;
1930 union
1931 {
1932 //!< \note 64 comes from the maximum number of weight values in an ASTC block.
1933 uint32_t plain[64];
1934 Block block[64];
1935 } value;
1936
ISEInputtcu::astc::__anone0fc7f7b0111::ISEInput1937 ISEInput(void) : isGivenInBlockForm(false)
1938 {
1939 }
1940 };
1941
computeISERangeMax(const ISEParams & iseParams)1942 static inline uint32_t computeISERangeMax(const ISEParams &iseParams)
1943 {
1944 switch (iseParams.mode)
1945 {
1946 case ISEMODE_TRIT:
1947 return (1u << iseParams.numBits) * 3 - 1;
1948 case ISEMODE_QUINT:
1949 return (1u << iseParams.numBits) * 5 - 1;
1950 case ISEMODE_PLAIN_BIT:
1951 return (1u << iseParams.numBits) - 1;
1952 default:
1953 DE_ASSERT(false);
1954 return -1;
1955 }
1956 }
1957
1958 struct NormalBlockParams
1959 {
1960 int weightGridWidth;
1961 int weightGridHeight;
1962 ISEParams weightISEParams;
1963 bool isDualPlane;
1964 uint32_t ccs; //! \note Irrelevant if !isDualPlane.
1965 int numPartitions;
1966 uint32_t colorEndpointModes[4];
1967 // \note Below members are irrelevant if numPartitions == 1.
1968 bool isMultiPartSingleCemMode; //! \note If true, the single CEM is at colorEndpointModes[0].
1969 uint32_t partitionSeed;
1970
NormalBlockParamstcu::astc::__anone0fc7f7b0111::NormalBlockParams1971 NormalBlockParams(void)
1972 : weightGridWidth(-1)
1973 , weightGridHeight(-1)
1974 , weightISEParams(ISEMODE_LAST, -1)
1975 , isDualPlane(true)
1976 , ccs((uint32_t)-1)
1977 , numPartitions(-1)
1978 , isMultiPartSingleCemMode(false)
1979 , partitionSeed((uint32_t)-1)
1980 {
1981 colorEndpointModes[0] = 0;
1982 colorEndpointModes[1] = 0;
1983 colorEndpointModes[2] = 0;
1984 colorEndpointModes[3] = 0;
1985 }
1986 };
1987
1988 struct NormalBlockISEInputs
1989 {
1990 ISEInput weight;
1991 ISEInput endpoint;
1992
NormalBlockISEInputstcu::astc::__anone0fc7f7b0111::NormalBlockISEInputs1993 NormalBlockISEInputs(void) : weight(), endpoint()
1994 {
1995 }
1996 };
1997
computeNumWeights(const NormalBlockParams & params)1998 static inline int computeNumWeights(const NormalBlockParams ¶ms)
1999 {
2000 return params.weightGridWidth * params.weightGridHeight * (params.isDualPlane ? 2 : 1);
2001 }
2002
computeNumBitsForColorEndpoints(const NormalBlockParams & params)2003 static inline int computeNumBitsForColorEndpoints(const NormalBlockParams ¶ms)
2004 {
2005 const int numWeightBits = computeNumRequiredBits(params.weightISEParams, computeNumWeights(params));
2006 const int numConfigDataBits = (params.numPartitions == 1 ? 17 :
2007 params.isMultiPartSingleCemMode ? 29 :
2008 25 + 3 * params.numPartitions) +
2009 (params.isDualPlane ? 2 : 0);
2010
2011 return 128 - numWeightBits - numConfigDataBits;
2012 }
2013
computeNumColorEndpointValues(const uint32_t * endpointModes,int numPartitions,bool isMultiPartSingleCemMode)2014 static inline int computeNumColorEndpointValues(const uint32_t *endpointModes, int numPartitions,
2015 bool isMultiPartSingleCemMode)
2016 {
2017 if (isMultiPartSingleCemMode)
2018 return numPartitions * computeNumColorEndpointValues(endpointModes[0]);
2019 else
2020 {
2021 int result = 0;
2022 for (int i = 0; i < numPartitions; i++)
2023 result += computeNumColorEndpointValues(endpointModes[i]);
2024 return result;
2025 }
2026 }
2027
isValidBlockParams(const NormalBlockParams & params,int blockWidth,int blockHeight)2028 static inline bool isValidBlockParams(const NormalBlockParams ¶ms, int blockWidth, int blockHeight)
2029 {
2030 const int numWeights = computeNumWeights(params);
2031 const int numWeightBits = computeNumRequiredBits(params.weightISEParams, numWeights);
2032 const int numColorEndpointValues = computeNumColorEndpointValues(
2033 ¶ms.colorEndpointModes[0], params.numPartitions, params.isMultiPartSingleCemMode);
2034 const int numBitsForColorEndpoints = computeNumBitsForColorEndpoints(params);
2035
2036 return numWeights <= 64 && de::inRange(numWeightBits, 24, 96) && params.weightGridWidth <= blockWidth &&
2037 params.weightGridHeight <= blockHeight && !(params.numPartitions == 4 && params.isDualPlane) &&
2038 numColorEndpointValues <= 18 && numBitsForColorEndpoints >= deDivRoundUp32(13 * numColorEndpointValues, 5);
2039 }
2040
2041 // Write bits 0 to 10 of an ASTC block.
writeBlockMode(AssignBlock128 & dst,const NormalBlockParams & blockParams)2042 static void writeBlockMode(AssignBlock128 &dst, const NormalBlockParams &blockParams)
2043 {
2044 const uint32_t d = blockParams.isDualPlane != 0;
2045 // r and h initialized in switch below.
2046 uint32_t r;
2047 uint32_t h;
2048 // a, b and blockModeLayoutNdx initialized in block mode layout index detecting loop below.
2049 uint32_t a = (uint32_t)-1;
2050 uint32_t b = (uint32_t)-1;
2051 int blockModeLayoutNdx;
2052
2053 // Find the values of r and h (ISE range).
2054 switch (computeISERangeMax(blockParams.weightISEParams))
2055 {
2056 case 1:
2057 r = 2;
2058 h = 0;
2059 break;
2060 case 2:
2061 r = 3;
2062 h = 0;
2063 break;
2064 case 3:
2065 r = 4;
2066 h = 0;
2067 break;
2068 case 4:
2069 r = 5;
2070 h = 0;
2071 break;
2072 case 5:
2073 r = 6;
2074 h = 0;
2075 break;
2076 case 7:
2077 r = 7;
2078 h = 0;
2079 break;
2080
2081 case 9:
2082 r = 2;
2083 h = 1;
2084 break;
2085 case 11:
2086 r = 3;
2087 h = 1;
2088 break;
2089 case 15:
2090 r = 4;
2091 h = 1;
2092 break;
2093 case 19:
2094 r = 5;
2095 h = 1;
2096 break;
2097 case 23:
2098 r = 6;
2099 h = 1;
2100 break;
2101 case 31:
2102 r = 7;
2103 h = 1;
2104 break;
2105
2106 default:
2107 DE_ASSERT(false);
2108 r = (uint32_t)-1;
2109 h = (uint32_t)-1;
2110 }
2111
2112 // Find block mode layout index, i.e. appropriate row in the "2d block mode layout" table in ASTC spec.
2113
2114 {
2115 enum BlockModeLayoutABVariable
2116 {
2117 Z = 0,
2118 A = 1,
2119 B = 2
2120 };
2121
2122 static const struct BlockModeLayout
2123 {
2124 int aNumBits;
2125 int bNumBits;
2126 BlockModeLayoutABVariable gridWidthVariableTerm;
2127 int gridWidthConstantTerm;
2128 BlockModeLayoutABVariable gridHeightVariableTerm;
2129 int gridHeightConstantTerm;
2130 } blockModeLayouts[] = {{2, 2, B, 4, A, 2}, {2, 2, B, 8, A, 2}, {2, 2, A, 2, B, 8}, {2, 1, A, 2, B, 6},
2131 {2, 1, B, 2, A, 2}, {2, 0, Z, 12, A, 2}, {2, 0, A, 2, Z, 12}, {0, 0, Z, 6, Z, 10},
2132 {0, 0, Z, 10, Z, 6}, {2, 2, A, 6, B, 6}};
2133
2134 for (blockModeLayoutNdx = 0; blockModeLayoutNdx < DE_LENGTH_OF_ARRAY(blockModeLayouts); blockModeLayoutNdx++)
2135 {
2136 const BlockModeLayout &layout = blockModeLayouts[blockModeLayoutNdx];
2137 const int aMax = (1 << layout.aNumBits) - 1;
2138 const int bMax = (1 << layout.bNumBits) - 1;
2139 const int variableOffsetsMax[3] = {0, aMax, bMax};
2140 const int widthMin = layout.gridWidthConstantTerm;
2141 const int heightMin = layout.gridHeightConstantTerm;
2142 const int widthMax = widthMin + variableOffsetsMax[layout.gridWidthVariableTerm];
2143 const int heightMax = heightMin + variableOffsetsMax[layout.gridHeightVariableTerm];
2144
2145 DE_ASSERT(layout.gridWidthVariableTerm != layout.gridHeightVariableTerm ||
2146 layout.gridWidthVariableTerm == Z);
2147
2148 if (de::inRange(blockParams.weightGridWidth, widthMin, widthMax) &&
2149 de::inRange(blockParams.weightGridHeight, heightMin, heightMax))
2150 {
2151 uint32_t defaultvalue = 0;
2152 uint32_t &widthVariable = layout.gridWidthVariableTerm == A ? a :
2153 layout.gridWidthVariableTerm == B ? b :
2154 defaultvalue;
2155 uint32_t &heightVariable = layout.gridHeightVariableTerm == A ? a :
2156 layout.gridHeightVariableTerm == B ? b :
2157 defaultvalue;
2158
2159 widthVariable = blockParams.weightGridWidth - layout.gridWidthConstantTerm;
2160 heightVariable = blockParams.weightGridHeight - layout.gridHeightConstantTerm;
2161
2162 break;
2163 }
2164 }
2165 }
2166
2167 // Set block mode bits.
2168
2169 const uint32_t a0 = getBit(a, 0);
2170 const uint32_t a1 = getBit(a, 1);
2171 const uint32_t b0 = getBit(b, 0);
2172 const uint32_t b1 = getBit(b, 1);
2173 const uint32_t r0 = getBit(r, 0);
2174 const uint32_t r1 = getBit(r, 1);
2175 const uint32_t r2 = getBit(r, 2);
2176
2177 #define SB(NDX, VAL) dst.setBit((NDX), (VAL))
2178 #define ASSIGN_BITS(B10, B9, B8, B7, B6, B5, B4, B3, B2, B1, B0) \
2179 do \
2180 { \
2181 SB(10, (B10)); \
2182 SB(9, (B9)); \
2183 SB(8, (B8)); \
2184 SB(7, (B7)); \
2185 SB(6, (B6)); \
2186 SB(5, (B5)); \
2187 SB(4, (B4)); \
2188 SB(3, (B3)); \
2189 SB(2, (B2)); \
2190 SB(1, (B1)); \
2191 SB(0, (B0)); \
2192 } while (false)
2193
2194 switch (blockModeLayoutNdx)
2195 {
2196 case 0:
2197 ASSIGN_BITS(d, h, b1, b0, a1, a0, r0, 0, 0, r2, r1);
2198 break;
2199 case 1:
2200 ASSIGN_BITS(d, h, b1, b0, a1, a0, r0, 0, 1, r2, r1);
2201 break;
2202 case 2:
2203 ASSIGN_BITS(d, h, b1, b0, a1, a0, r0, 1, 0, r2, r1);
2204 break;
2205 case 3:
2206 ASSIGN_BITS(d, h, 0, b, a1, a0, r0, 1, 1, r2, r1);
2207 break;
2208 case 4:
2209 ASSIGN_BITS(d, h, 1, b, a1, a0, r0, 1, 1, r2, r1);
2210 break;
2211 case 5:
2212 ASSIGN_BITS(d, h, 0, 0, a1, a0, r0, r2, r1, 0, 0);
2213 break;
2214 case 6:
2215 ASSIGN_BITS(d, h, 0, 1, a1, a0, r0, r2, r1, 0, 0);
2216 break;
2217 case 7:
2218 ASSIGN_BITS(d, h, 1, 1, 0, 0, r0, r2, r1, 0, 0);
2219 break;
2220 case 8:
2221 ASSIGN_BITS(d, h, 1, 1, 0, 1, r0, r2, r1, 0, 0);
2222 break;
2223 case 9:
2224 ASSIGN_BITS(b1, b0, 1, 0, a1, a0, r0, r2, r1, 0, 0);
2225 DE_ASSERT(d == 0 && h == 0);
2226 break;
2227 default:
2228 DE_ASSERT(false);
2229 }
2230
2231 #undef ASSIGN_BITS
2232 #undef SB
2233 }
2234
2235 // Write color endpoint mode data of an ASTC block.
writeColorEndpointModes(AssignBlock128 & dst,const uint32_t * colorEndpointModes,bool isMultiPartSingleCemMode,int numPartitions,int extraCemBitsStart)2236 static void writeColorEndpointModes(AssignBlock128 &dst, const uint32_t *colorEndpointModes,
2237 bool isMultiPartSingleCemMode, int numPartitions, int extraCemBitsStart)
2238 {
2239 if (numPartitions == 1)
2240 dst.setBits(13, 16, colorEndpointModes[0]);
2241 else
2242 {
2243 if (isMultiPartSingleCemMode)
2244 {
2245 dst.setBits(23, 24, 0);
2246 dst.setBits(25, 28, colorEndpointModes[0]);
2247 }
2248 else
2249 {
2250 DE_ASSERT(numPartitions > 0);
2251 const uint32_t minCem = *std::min_element(&colorEndpointModes[0], &colorEndpointModes[numPartitions]);
2252 const uint32_t maxCem = *std::max_element(&colorEndpointModes[0], &colorEndpointModes[numPartitions]);
2253 const uint32_t minCemClass = minCem / 4;
2254 const uint32_t maxCemClass = maxCem / 4;
2255 DE_ASSERT(maxCemClass - minCemClass <= 1);
2256 DE_UNREF(minCemClass); // \note For non-debug builds.
2257 const uint32_t highLevelSelector = de::max(1u, maxCemClass);
2258
2259 dst.setBits(23, 24, highLevelSelector);
2260
2261 for (int partNdx = 0; partNdx < numPartitions; partNdx++)
2262 {
2263 const uint32_t c = colorEndpointModes[partNdx] / 4 == highLevelSelector ? 1 : 0;
2264 const uint32_t m = colorEndpointModes[partNdx] % 4;
2265 const uint32_t lowMBit0Ndx = numPartitions + 2 * partNdx;
2266 const uint32_t lowMBit1Ndx = numPartitions + 2 * partNdx + 1;
2267 dst.setBit(25 + partNdx, c);
2268 dst.setBit(lowMBit0Ndx < 4 ? 25 + lowMBit0Ndx : extraCemBitsStart + lowMBit0Ndx - 4, getBit(m, 0));
2269 dst.setBit(lowMBit1Ndx < 4 ? 25 + lowMBit1Ndx : extraCemBitsStart + lowMBit1Ndx - 4, getBit(m, 1));
2270 }
2271 }
2272 }
2273 }
2274
encodeISETritBlock(BitAssignAccessStream & dst,int numBits,bool fromExplicitInputBlock,const ISEInput::Block & blockInput,const uint32_t * nonBlockInput,int numValues)2275 static void encodeISETritBlock(BitAssignAccessStream &dst, int numBits, bool fromExplicitInputBlock,
2276 const ISEInput::Block &blockInput, const uint32_t *nonBlockInput, int numValues)
2277 {
2278 // tritBlockTValue[t0][t1][t2][t3][t4] is a value of T (not necessarily the only one) that will yield the given trits when decoded.
2279 static const uint32_t tritBlockTValue[3][3][3][3][3] = {{{{{0, 128, 96}, {32, 160, 224}, {64, 192, 28}},
2280 {{16, 144, 112}, {48, 176, 240}, {80, 208, 156}},
2281 {{3, 131, 99}, {35, 163, 227}, {67, 195, 31}}},
2282 {{{4, 132, 100}, {36, 164, 228}, {68, 196, 60}},
2283 {{20, 148, 116}, {52, 180, 244}, {84, 212, 188}},
2284 {{19, 147, 115}, {51, 179, 243}, {83, 211, 159}}},
2285 {{{8, 136, 104}, {40, 168, 232}, {72, 200, 92}},
2286 {{24, 152, 120}, {56, 184, 248}, {88, 216, 220}},
2287 {{12, 140, 108}, {44, 172, 236}, {76, 204, 124}}}},
2288 {{{{1, 129, 97}, {33, 161, 225}, {65, 193, 29}},
2289 {{17, 145, 113}, {49, 177, 241}, {81, 209, 157}},
2290 {{7, 135, 103}, {39, 167, 231}, {71, 199, 63}}},
2291 {{{5, 133, 101}, {37, 165, 229}, {69, 197, 61}},
2292 {{21, 149, 117}, {53, 181, 245}, {85, 213, 189}},
2293 {{23, 151, 119}, {55, 183, 247}, {87, 215, 191}}},
2294 {{{9, 137, 105}, {41, 169, 233}, {73, 201, 93}},
2295 {{25, 153, 121}, {57, 185, 249}, {89, 217, 221}},
2296 {{13, 141, 109}, {45, 173, 237}, {77, 205, 125}}}},
2297 {{{{2, 130, 98}, {34, 162, 226}, {66, 194, 30}},
2298 {{18, 146, 114}, {50, 178, 242}, {82, 210, 158}},
2299 {{11, 139, 107}, {43, 171, 235}, {75, 203, 95}}},
2300 {{{6, 134, 102}, {38, 166, 230}, {70, 198, 62}},
2301 {{22, 150, 118}, {54, 182, 246}, {86, 214, 190}},
2302 {{27, 155, 123}, {59, 187, 251}, {91, 219, 223}}},
2303 {{{10, 138, 106}, {42, 170, 234}, {74, 202, 94}},
2304 {{26, 154, 122}, {58, 186, 250}, {90, 218, 222}},
2305 {{14, 142, 110}, {46, 174, 238}, {78, 206, 126}}}}};
2306
2307 DE_ASSERT(de::inRange(numValues, 1, 5));
2308
2309 uint32_t tritParts[5];
2310 uint32_t bitParts[5];
2311
2312 for (int i = 0; i < 5; i++)
2313 {
2314 if (i < numValues)
2315 {
2316 if (fromExplicitInputBlock)
2317 {
2318 bitParts[i] = blockInput.bitValues[i];
2319 tritParts[i] = -1; // \note Won't be used, but silences warning.
2320 }
2321 else
2322 {
2323 // \todo [2016-01-20 pyry] numBits = 0 doesn't make sense
2324 bitParts[i] = numBits > 0 ? getBits(nonBlockInput[i], 0, numBits - 1) : 0;
2325 tritParts[i] = nonBlockInput[i] >> numBits;
2326 }
2327 }
2328 else
2329 {
2330 bitParts[i] = 0;
2331 tritParts[i] = 0;
2332 }
2333 }
2334
2335 const uint32_t T = fromExplicitInputBlock ?
2336 blockInput.tOrQValue :
2337 tritBlockTValue[tritParts[0]][tritParts[1]][tritParts[2]][tritParts[3]][tritParts[4]];
2338
2339 dst.setNext(numBits, bitParts[0]);
2340 dst.setNext(2, getBits(T, 0, 1));
2341 dst.setNext(numBits, bitParts[1]);
2342 dst.setNext(2, getBits(T, 2, 3));
2343 dst.setNext(numBits, bitParts[2]);
2344 dst.setNext(1, getBit(T, 4));
2345 dst.setNext(numBits, bitParts[3]);
2346 dst.setNext(2, getBits(T, 5, 6));
2347 dst.setNext(numBits, bitParts[4]);
2348 dst.setNext(1, getBit(T, 7));
2349 }
2350
encodeISEQuintBlock(BitAssignAccessStream & dst,int numBits,bool fromExplicitInputBlock,const ISEInput::Block & blockInput,const uint32_t * nonBlockInput,int numValues)2351 static void encodeISEQuintBlock(BitAssignAccessStream &dst, int numBits, bool fromExplicitInputBlock,
2352 const ISEInput::Block &blockInput, const uint32_t *nonBlockInput, int numValues)
2353 {
2354 // quintBlockQValue[q0][q1][q2] is a value of Q (not necessarily the only one) that will yield the given quints when decoded.
2355 static const uint32_t quintBlockQValue[5][5][5] = {{{0, 32, 64, 96, 102},
2356 {8, 40, 72, 104, 110},
2357 {16, 48, 80, 112, 118},
2358 {24, 56, 88, 120, 126},
2359 {5, 37, 69, 101, 39}},
2360 {{1, 33, 65, 97, 103},
2361 {9, 41, 73, 105, 111},
2362 {17, 49, 81, 113, 119},
2363 {25, 57, 89, 121, 127},
2364 {13, 45, 77, 109, 47}},
2365 {{2, 34, 66, 98, 70},
2366 {10, 42, 74, 106, 78},
2367 {18, 50, 82, 114, 86},
2368 {26, 58, 90, 122, 94},
2369 {21, 53, 85, 117, 55}},
2370 {{3, 35, 67, 99, 71},
2371 {11, 43, 75, 107, 79},
2372 {19, 51, 83, 115, 87},
2373 {27, 59, 91, 123, 95},
2374 {29, 61, 93, 125, 63}},
2375 {{4, 36, 68, 100, 38},
2376 {12, 44, 76, 108, 46},
2377 {20, 52, 84, 116, 54},
2378 {28, 60, 92, 124, 62},
2379 {6, 14, 22, 30, 7}}};
2380
2381 DE_ASSERT(de::inRange(numValues, 1, 3));
2382
2383 uint32_t quintParts[3];
2384 uint32_t bitParts[3];
2385
2386 for (int i = 0; i < 3; i++)
2387 {
2388 if (i < numValues)
2389 {
2390 if (fromExplicitInputBlock)
2391 {
2392 bitParts[i] = blockInput.bitValues[i];
2393 quintParts[i] = -1; // \note Won't be used, but silences warning.
2394 }
2395 else
2396 {
2397 // \todo [2016-01-20 pyry] numBits = 0 doesn't make sense
2398 bitParts[i] = numBits > 0 ? getBits(nonBlockInput[i], 0, numBits - 1) : 0;
2399 quintParts[i] = nonBlockInput[i] >> numBits;
2400 }
2401 }
2402 else
2403 {
2404 bitParts[i] = 0;
2405 quintParts[i] = 0;
2406 }
2407 }
2408
2409 const uint32_t Q =
2410 fromExplicitInputBlock ? blockInput.tOrQValue : quintBlockQValue[quintParts[0]][quintParts[1]][quintParts[2]];
2411
2412 dst.setNext(numBits, bitParts[0]);
2413 dst.setNext(3, getBits(Q, 0, 2));
2414 dst.setNext(numBits, bitParts[1]);
2415 dst.setNext(2, getBits(Q, 3, 4));
2416 dst.setNext(numBits, bitParts[2]);
2417 dst.setNext(2, getBits(Q, 5, 6));
2418 }
2419
encodeISEBitBlock(BitAssignAccessStream & dst,int numBits,uint32_t value)2420 static void encodeISEBitBlock(BitAssignAccessStream &dst, int numBits, uint32_t value)
2421 {
2422 DE_ASSERT(de::inRange(value, 0u, (1u << numBits) - 1));
2423 dst.setNext(numBits, value);
2424 }
2425
encodeISE(BitAssignAccessStream & dst,const ISEParams & params,const ISEInput & input,int numValues)2426 static void encodeISE(BitAssignAccessStream &dst, const ISEParams ¶ms, const ISEInput &input, int numValues)
2427 {
2428 if (params.mode == ISEMODE_TRIT)
2429 {
2430 const int numBlocks = deDivRoundUp32(numValues, 5);
2431 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2432 {
2433 const int numValuesInBlock = blockNdx == numBlocks - 1 ? numValues - 5 * (numBlocks - 1) : 5;
2434 encodeISETritBlock(dst, params.numBits, input.isGivenInBlockForm,
2435 input.isGivenInBlockForm ? input.value.block[blockNdx] : ISEInput::Block(),
2436 input.isGivenInBlockForm ? DE_NULL : &input.value.plain[5 * blockNdx], numValuesInBlock);
2437 }
2438 }
2439 else if (params.mode == ISEMODE_QUINT)
2440 {
2441 const int numBlocks = deDivRoundUp32(numValues, 3);
2442 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2443 {
2444 const int numValuesInBlock = blockNdx == numBlocks - 1 ? numValues - 3 * (numBlocks - 1) : 3;
2445 encodeISEQuintBlock(dst, params.numBits, input.isGivenInBlockForm,
2446 input.isGivenInBlockForm ? input.value.block[blockNdx] : ISEInput::Block(),
2447 input.isGivenInBlockForm ? DE_NULL : &input.value.plain[3 * blockNdx],
2448 numValuesInBlock);
2449 }
2450 }
2451 else
2452 {
2453 DE_ASSERT(params.mode == ISEMODE_PLAIN_BIT);
2454 for (int i = 0; i < numValues; i++)
2455 encodeISEBitBlock(dst, params.numBits,
2456 input.isGivenInBlockForm ? input.value.block[i].bitValues[0] : input.value.plain[i]);
2457 }
2458 }
2459
writeWeightData(AssignBlock128 & dst,const ISEParams & iseParams,const ISEInput & input,int numWeights)2460 static void writeWeightData(AssignBlock128 &dst, const ISEParams &iseParams, const ISEInput &input, int numWeights)
2461 {
2462 const int numWeightBits = computeNumRequiredBits(iseParams, numWeights);
2463 BitAssignAccessStream access(dst, 127, numWeightBits, false);
2464 encodeISE(access, iseParams, input, numWeights);
2465 }
2466
writeColorEndpointData(AssignBlock128 & dst,const ISEParams & iseParams,const ISEInput & input,int numEndpoints,int numBitsForColorEndpoints,int colorEndpointDataStartNdx)2467 static void writeColorEndpointData(AssignBlock128 &dst, const ISEParams &iseParams, const ISEInput &input,
2468 int numEndpoints, int numBitsForColorEndpoints, int colorEndpointDataStartNdx)
2469 {
2470 BitAssignAccessStream access(dst, colorEndpointDataStartNdx, numBitsForColorEndpoints, true);
2471 encodeISE(access, iseParams, input, numEndpoints);
2472 }
2473
generateNormalBlock(const NormalBlockParams & blockParams,int blockWidth,int blockHeight,const NormalBlockISEInputs & iseInputs)2474 static AssignBlock128 generateNormalBlock(const NormalBlockParams &blockParams, int blockWidth, int blockHeight,
2475 const NormalBlockISEInputs &iseInputs)
2476 {
2477 DE_ASSERT(isValidBlockParams(blockParams, blockWidth, blockHeight));
2478 DE_UNREF(blockWidth); // \note For non-debug builds.
2479 DE_UNREF(blockHeight); // \note For non-debug builds.
2480
2481 AssignBlock128 block;
2482 const int numWeights = computeNumWeights(blockParams);
2483 const int numWeightBits = computeNumRequiredBits(blockParams.weightISEParams, numWeights);
2484
2485 writeBlockMode(block, blockParams);
2486
2487 block.setBits(11, 12, blockParams.numPartitions - 1);
2488 if (blockParams.numPartitions > 1)
2489 block.setBits(13, 22, blockParams.partitionSeed);
2490
2491 {
2492 const int extraCemBitsStart = 127 - numWeightBits -
2493 (blockParams.numPartitions == 1 || blockParams.isMultiPartSingleCemMode ? -1 :
2494 blockParams.numPartitions == 4 ? 7 :
2495 blockParams.numPartitions == 3 ? 4 :
2496 blockParams.numPartitions == 2 ? 1 :
2497 0);
2498
2499 writeColorEndpointModes(block, &blockParams.colorEndpointModes[0], blockParams.isMultiPartSingleCemMode,
2500 blockParams.numPartitions, extraCemBitsStart);
2501
2502 if (blockParams.isDualPlane)
2503 block.setBits(extraCemBitsStart - 2, extraCemBitsStart - 1, blockParams.ccs);
2504 }
2505
2506 writeWeightData(block, blockParams.weightISEParams, iseInputs.weight, numWeights);
2507
2508 {
2509 const int numColorEndpointValues = computeNumColorEndpointValues(
2510 &blockParams.colorEndpointModes[0], blockParams.numPartitions, blockParams.isMultiPartSingleCemMode);
2511 const int numBitsForColorEndpoints = computeNumBitsForColorEndpoints(blockParams);
2512 const int colorEndpointDataStartNdx = blockParams.numPartitions == 1 ? 17 : 29;
2513 const ISEParams &colorEndpointISEParams =
2514 computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues);
2515
2516 writeColorEndpointData(block, colorEndpointISEParams, iseInputs.endpoint, numColorEndpointValues,
2517 numBitsForColorEndpoints, colorEndpointDataStartNdx);
2518 }
2519
2520 return block;
2521 }
2522
2523 // Generate default ISE inputs for weight and endpoint data - gradient-ish values.
generateDefaultISEInputs(const NormalBlockParams & blockParams)2524 static NormalBlockISEInputs generateDefaultISEInputs(const NormalBlockParams &blockParams)
2525 {
2526 NormalBlockISEInputs result;
2527
2528 {
2529 result.weight.isGivenInBlockForm = false;
2530
2531 const int numWeights = computeNumWeights(blockParams);
2532 const int weightRangeMax = computeISERangeMax(blockParams.weightISEParams);
2533
2534 if (blockParams.isDualPlane)
2535 {
2536 for (int i = 0; i < numWeights; i += 2)
2537 result.weight.value.plain[i] = (i * weightRangeMax + (numWeights - 1) / 2) / (numWeights - 1);
2538
2539 for (int i = 1; i < numWeights; i += 2)
2540 result.weight.value.plain[i] =
2541 weightRangeMax - (i * weightRangeMax + (numWeights - 1) / 2) / (numWeights - 1);
2542 }
2543 else
2544 {
2545 for (int i = 0; i < numWeights; i++)
2546 result.weight.value.plain[i] = (i * weightRangeMax + (numWeights - 1) / 2) / (numWeights - 1);
2547 }
2548 }
2549
2550 {
2551 result.endpoint.isGivenInBlockForm = false;
2552
2553 const int numColorEndpointValues = computeNumColorEndpointValues(
2554 &blockParams.colorEndpointModes[0], blockParams.numPartitions, blockParams.isMultiPartSingleCemMode);
2555 const int numBitsForColorEndpoints = computeNumBitsForColorEndpoints(blockParams);
2556 const ISEParams &colorEndpointISEParams =
2557 computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues);
2558 const int colorEndpointRangeMax = computeISERangeMax(colorEndpointISEParams);
2559
2560 for (int i = 0; i < numColorEndpointValues; i++)
2561 result.endpoint.value.plain[i] =
2562 (i * colorEndpointRangeMax + (numColorEndpointValues - 1) / 2) / (numColorEndpointValues - 1);
2563 }
2564
2565 return result;
2566 }
2567
2568 static const ISEParams s_weightISEParamsCandidates[] = {
2569 ISEParams(ISEMODE_PLAIN_BIT, 1), ISEParams(ISEMODE_TRIT, 0), ISEParams(ISEMODE_PLAIN_BIT, 2),
2570 ISEParams(ISEMODE_QUINT, 0), ISEParams(ISEMODE_TRIT, 1), ISEParams(ISEMODE_PLAIN_BIT, 3),
2571 ISEParams(ISEMODE_QUINT, 1), ISEParams(ISEMODE_TRIT, 2), ISEParams(ISEMODE_PLAIN_BIT, 4),
2572 ISEParams(ISEMODE_QUINT, 2), ISEParams(ISEMODE_TRIT, 3), ISEParams(ISEMODE_PLAIN_BIT, 5)};
2573
generateRandomBlock(uint8_t * dst,const IVec3 & blockSize,de::Random & rnd)2574 void generateRandomBlock(uint8_t *dst, const IVec3 &blockSize, de::Random &rnd)
2575 {
2576 DE_ASSERT(blockSize.z() == 1);
2577
2578 if (rnd.getFloat() < 0.1f)
2579 {
2580 // Void extent block.
2581 const bool isVoidExtentHDR = rnd.getBool();
2582 const uint16_t r = isVoidExtentHDR ? deFloat32To16(rnd.getFloat(0.0f, 1.0f)) : (uint16_t)rnd.getInt(0, 0xffff);
2583 const uint16_t g = isVoidExtentHDR ? deFloat32To16(rnd.getFloat(0.0f, 1.0f)) : (uint16_t)rnd.getInt(0, 0xffff);
2584 const uint16_t b = isVoidExtentHDR ? deFloat32To16(rnd.getFloat(0.0f, 1.0f)) : (uint16_t)rnd.getInt(0, 0xffff);
2585 const uint16_t a = isVoidExtentHDR ? deFloat32To16(rnd.getFloat(0.0f, 1.0f)) : (uint16_t)rnd.getInt(0, 0xffff);
2586 generateVoidExtentBlock(VoidExtentParams(isVoidExtentHDR, r, g, b, a)).assignToMemory(dst);
2587 }
2588 else
2589 {
2590 // Not void extent block.
2591
2592 // Generate block params.
2593
2594 NormalBlockParams blockParams;
2595
2596 do
2597 {
2598 blockParams.weightGridWidth = rnd.getInt(2, blockSize.x());
2599 blockParams.weightGridHeight = rnd.getInt(2, blockSize.y());
2600 blockParams.weightISEParams =
2601 s_weightISEParamsCandidates[rnd.getInt(0, DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates) - 1)];
2602 blockParams.numPartitions = rnd.getInt(1, 4);
2603 blockParams.isMultiPartSingleCemMode = rnd.getFloat() < 0.25f;
2604 blockParams.isDualPlane = blockParams.numPartitions != 4 && rnd.getBool();
2605 blockParams.ccs = rnd.getInt(0, 3);
2606 blockParams.partitionSeed = rnd.getInt(0, 1023);
2607
2608 blockParams.colorEndpointModes[0] = rnd.getInt(0, 15);
2609
2610 {
2611 const int cemDiff = blockParams.isMultiPartSingleCemMode ? 0 :
2612 blockParams.colorEndpointModes[0] == 0 ? 1 :
2613 blockParams.colorEndpointModes[0] == 15 ? -1 :
2614 rnd.getBool() ? 1 :
2615 -1;
2616
2617 for (int i = 1; i < blockParams.numPartitions; i++)
2618 blockParams.colorEndpointModes[i] =
2619 blockParams.colorEndpointModes[0] + (cemDiff == -1 ? rnd.getInt(-1, 0) :
2620 cemDiff == 1 ? rnd.getInt(0, 1) :
2621 0);
2622 }
2623 } while (!isValidBlockParams(blockParams, blockSize.x(), blockSize.y()));
2624
2625 // Generate ISE inputs for both weight and endpoint data.
2626
2627 NormalBlockISEInputs iseInputs;
2628
2629 for (int weightOrEndpoints = 0; weightOrEndpoints <= 1; weightOrEndpoints++)
2630 {
2631 const bool setWeights = weightOrEndpoints == 0;
2632 const int numValues = setWeights ? computeNumWeights(blockParams) :
2633 computeNumColorEndpointValues(&blockParams.colorEndpointModes[0],
2634 blockParams.numPartitions,
2635 blockParams.isMultiPartSingleCemMode);
2636 const ISEParams iseParams =
2637 setWeights ? blockParams.weightISEParams :
2638 computeMaximumRangeISEParams(computeNumBitsForColorEndpoints(blockParams), numValues);
2639 ISEInput &iseInput = setWeights ? iseInputs.weight : iseInputs.endpoint;
2640
2641 iseInput.isGivenInBlockForm = rnd.getBool();
2642
2643 if (iseInput.isGivenInBlockForm)
2644 {
2645 const int numValuesPerISEBlock = iseParams.mode == ISEMODE_TRIT ? 5 :
2646 iseParams.mode == ISEMODE_QUINT ? 3 :
2647 1;
2648 const int iseBitMax = (1 << iseParams.numBits) - 1;
2649 const int numISEBlocks = deDivRoundUp32(numValues, numValuesPerISEBlock);
2650
2651 for (int iseBlockNdx = 0; iseBlockNdx < numISEBlocks; iseBlockNdx++)
2652 {
2653 iseInput.value.block[iseBlockNdx].tOrQValue = rnd.getInt(0, 255);
2654 for (int i = 0; i < numValuesPerISEBlock; i++)
2655 iseInput.value.block[iseBlockNdx].bitValues[i] = rnd.getInt(0, iseBitMax);
2656 }
2657 }
2658 else
2659 {
2660 const int rangeMax = computeISERangeMax(iseParams);
2661
2662 for (int valueNdx = 0; valueNdx < numValues; valueNdx++)
2663 iseInput.value.plain[valueNdx] = rnd.getInt(0, rangeMax);
2664 }
2665 }
2666
2667 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs).assignToMemory(dst);
2668 }
2669 }
2670
2671 } // namespace
2672
2673 // Generate block data for a given BlockTestType and format.
generateBlockCaseTestData(vector<uint8_t> & dst,CompressedTexFormat format,BlockTestType testType)2674 void generateBlockCaseTestData(vector<uint8_t> &dst, CompressedTexFormat format, BlockTestType testType)
2675 {
2676 DE_ASSERT(isAstcFormat(format));
2677 DE_ASSERT(!(isAstcSRGBFormat(format) && isBlockTestTypeHDROnly(testType)));
2678
2679 const IVec3 blockSize = getBlockPixelSize(format);
2680 DE_ASSERT(blockSize.z() == 1);
2681
2682 switch (testType)
2683 {
2684 case BLOCK_TEST_TYPE_VOID_EXTENT_LDR:
2685 // Generate a gradient-like set of LDR void-extent blocks.
2686 {
2687 const int numBlocks = 1 << 13;
2688 const uint32_t numValues = 1 << 16;
2689 dst.reserve(numBlocks * BLOCK_SIZE_BYTES);
2690
2691 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2692 {
2693 const uint32_t baseValue = blockNdx * (numValues - 1) / (numBlocks - 1);
2694 const uint16_t r = (uint16_t)((baseValue + numValues * 0 / 4) % numValues);
2695 const uint16_t g = (uint16_t)((baseValue + numValues * 1 / 4) % numValues);
2696 const uint16_t b = (uint16_t)((baseValue + numValues * 2 / 4) % numValues);
2697 const uint16_t a = (uint16_t)((baseValue + numValues * 3 / 4) % numValues);
2698 AssignBlock128 block;
2699
2700 generateVoidExtentBlock(VoidExtentParams(false, r, g, b, a)).pushBytesToVector(dst);
2701 }
2702
2703 break;
2704 }
2705
2706 case BLOCK_TEST_TYPE_VOID_EXTENT_HDR:
2707 // Generate a gradient-like set of HDR void-extent blocks, with values ranging from the largest finite negative to largest finite positive of fp16.
2708 {
2709 const float minValue = -65504.0f;
2710 const float maxValue = +65504.0f;
2711 const int numBlocks = 1 << 13;
2712 dst.reserve(numBlocks * BLOCK_SIZE_BYTES);
2713
2714 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2715 {
2716 const int rNdx = (blockNdx + numBlocks * 0 / 4) % numBlocks;
2717 const int gNdx = (blockNdx + numBlocks * 1 / 4) % numBlocks;
2718 const int bNdx = (blockNdx + numBlocks * 2 / 4) % numBlocks;
2719 const int aNdx = (blockNdx + numBlocks * 3 / 4) % numBlocks;
2720 const deFloat16 r =
2721 deFloat32To16(minValue + (float)rNdx * (maxValue - minValue) / (float)(numBlocks - 1));
2722 const deFloat16 g =
2723 deFloat32To16(minValue + (float)gNdx * (maxValue - minValue) / (float)(numBlocks - 1));
2724 const deFloat16 b =
2725 deFloat32To16(minValue + (float)bNdx * (maxValue - minValue) / (float)(numBlocks - 1));
2726 const deFloat16 a =
2727 deFloat32To16(minValue + (float)aNdx * (maxValue - minValue) / (float)(numBlocks - 1));
2728
2729 generateVoidExtentBlock(VoidExtentParams(true, r, g, b, a)).pushBytesToVector(dst);
2730 }
2731
2732 break;
2733 }
2734
2735 case BLOCK_TEST_TYPE_WEIGHT_GRID:
2736 // Generate different combinations of plane count, weight ISE params, and grid size.
2737 {
2738 for (int isDualPlane = 0; isDualPlane <= 1; isDualPlane++)
2739 for (int iseParamsNdx = 0; iseParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates);
2740 iseParamsNdx++)
2741 for (int weightGridWidth = 2; weightGridWidth <= 12; weightGridWidth++)
2742 for (int weightGridHeight = 2; weightGridHeight <= 12; weightGridHeight++)
2743 {
2744 NormalBlockParams blockParams;
2745 NormalBlockISEInputs iseInputs;
2746
2747 blockParams.weightGridWidth = weightGridWidth;
2748 blockParams.weightGridHeight = weightGridHeight;
2749 blockParams.isDualPlane = isDualPlane != 0;
2750 blockParams.weightISEParams = s_weightISEParamsCandidates[iseParamsNdx];
2751 blockParams.ccs = 0;
2752 blockParams.numPartitions = 1;
2753 blockParams.colorEndpointModes[0] = 0;
2754
2755 if (isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
2756 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
2757 generateDefaultISEInputs(blockParams))
2758 .pushBytesToVector(dst);
2759 }
2760
2761 break;
2762 }
2763
2764 case BLOCK_TEST_TYPE_WEIGHT_ISE:
2765 // For each weight ISE param set, generate blocks that cover:
2766 // - each single value of the ISE's range, at each position inside an ISE block
2767 // - for trit and quint ISEs, each single T or Q value of an ISE block
2768 {
2769 for (int iseParamsNdx = 0; iseParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates); iseParamsNdx++)
2770 {
2771 const ISEParams &iseParams = s_weightISEParamsCandidates[iseParamsNdx];
2772 NormalBlockParams blockParams;
2773
2774 blockParams.weightGridWidth = 4;
2775 blockParams.weightGridHeight = 4;
2776 blockParams.weightISEParams = iseParams;
2777 blockParams.numPartitions = 1;
2778 blockParams.isDualPlane =
2779 blockParams.weightGridWidth * blockParams.weightGridHeight < 24 ? true : false;
2780 blockParams.ccs = 0;
2781 blockParams.colorEndpointModes[0] = 0;
2782
2783 while (!isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
2784 {
2785 blockParams.weightGridWidth--;
2786 blockParams.weightGridHeight--;
2787 }
2788
2789 const int numValuesInISEBlock = iseParams.mode == ISEMODE_TRIT ? 5 :
2790 iseParams.mode == ISEMODE_QUINT ? 3 :
2791 1;
2792 const int numWeights = computeNumWeights(blockParams);
2793
2794 {
2795 const int numWeightValues = (int)computeISERangeMax(iseParams) + 1;
2796 const int numBlocks = deDivRoundUp32(numWeightValues, numWeights);
2797 NormalBlockISEInputs iseInputs = generateDefaultISEInputs(blockParams);
2798 iseInputs.weight.isGivenInBlockForm = false;
2799
2800 for (int offset = 0; offset < numValuesInISEBlock; offset++)
2801 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2802 {
2803 for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
2804 iseInputs.weight.value.plain[weightNdx] =
2805 (blockNdx * numWeights + weightNdx + offset) % numWeightValues;
2806
2807 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs)
2808 .pushBytesToVector(dst);
2809 }
2810 }
2811
2812 if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
2813 {
2814 NormalBlockISEInputs iseInputs = generateDefaultISEInputs(blockParams);
2815 iseInputs.weight.isGivenInBlockForm = true;
2816
2817 const int numTQValues = 1 << (iseParams.mode == ISEMODE_TRIT ? 8 : 7);
2818 const int numISEBlocksPerBlock = deDivRoundUp32(numWeights, numValuesInISEBlock);
2819 const int numBlocks = deDivRoundUp32(numTQValues, numISEBlocksPerBlock);
2820
2821 for (int offset = 0; offset < numValuesInISEBlock; offset++)
2822 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2823 {
2824 for (int iseBlockNdx = 0; iseBlockNdx < numISEBlocksPerBlock; iseBlockNdx++)
2825 {
2826 for (int i = 0; i < numValuesInISEBlock; i++)
2827 iseInputs.weight.value.block[iseBlockNdx].bitValues[i] = 0;
2828 iseInputs.weight.value.block[iseBlockNdx].tOrQValue =
2829 (blockNdx * numISEBlocksPerBlock + iseBlockNdx + offset) % numTQValues;
2830 }
2831
2832 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs)
2833 .pushBytesToVector(dst);
2834 }
2835 }
2836 }
2837
2838 break;
2839 }
2840
2841 case BLOCK_TEST_TYPE_CEMS:
2842 // For each plane count & partition count combination, generate all color endpoint mode combinations.
2843 {
2844 for (int isDualPlane = 0; isDualPlane <= 1; isDualPlane++)
2845 for (int numPartitions = 1; numPartitions <= (isDualPlane != 0 ? 3 : 4); numPartitions++)
2846 {
2847 // Multi-partition, single-CEM mode.
2848 if (numPartitions > 1)
2849 {
2850 for (uint32_t singleCem = 0; singleCem < 16; singleCem++)
2851 {
2852 NormalBlockParams blockParams;
2853 blockParams.weightGridWidth = 4;
2854 blockParams.weightGridHeight = 4;
2855 blockParams.isDualPlane = isDualPlane != 0;
2856 blockParams.ccs = 0;
2857 blockParams.numPartitions = numPartitions;
2858 blockParams.isMultiPartSingleCemMode = true;
2859 blockParams.colorEndpointModes[0] = singleCem;
2860 blockParams.partitionSeed = 634;
2861
2862 for (int iseParamsNdx = 0; iseParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates);
2863 iseParamsNdx++)
2864 {
2865 blockParams.weightISEParams = s_weightISEParamsCandidates[iseParamsNdx];
2866 if (isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
2867 {
2868 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
2869 generateDefaultISEInputs(blockParams))
2870 .pushBytesToVector(dst);
2871 break;
2872 }
2873 }
2874 }
2875 }
2876
2877 // Separate-CEM mode.
2878 for (uint32_t cem0 = 0; cem0 < 16; cem0++)
2879 for (uint32_t cem1 = 0; cem1 < (numPartitions >= 2 ? 16u : 1u); cem1++)
2880 for (uint32_t cem2 = 0; cem2 < (numPartitions >= 3 ? 16u : 1u); cem2++)
2881 for (uint32_t cem3 = 0; cem3 < (numPartitions >= 4 ? 16u : 1u); cem3++)
2882 {
2883 NormalBlockParams blockParams;
2884 blockParams.weightGridWidth = 4;
2885 blockParams.weightGridHeight = 4;
2886 blockParams.isDualPlane = isDualPlane != 0;
2887 blockParams.ccs = 0;
2888 blockParams.numPartitions = numPartitions;
2889 blockParams.isMultiPartSingleCemMode = false;
2890 blockParams.colorEndpointModes[0] = cem0;
2891 blockParams.colorEndpointModes[1] = cem1;
2892 blockParams.colorEndpointModes[2] = cem2;
2893 blockParams.colorEndpointModes[3] = cem3;
2894 blockParams.partitionSeed = 634;
2895
2896 {
2897 const uint32_t minCem =
2898 *std::min_element(&blockParams.colorEndpointModes[0],
2899 &blockParams.colorEndpointModes[numPartitions]);
2900 const uint32_t maxCem =
2901 *std::max_element(&blockParams.colorEndpointModes[0],
2902 &blockParams.colorEndpointModes[numPartitions]);
2903 const uint32_t minCemClass = minCem / 4;
2904 const uint32_t maxCemClass = maxCem / 4;
2905
2906 if (maxCemClass - minCemClass > 1)
2907 continue;
2908 }
2909
2910 for (int iseParamsNdx = 0;
2911 iseParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates); iseParamsNdx++)
2912 {
2913 blockParams.weightISEParams = s_weightISEParamsCandidates[iseParamsNdx];
2914 if (isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
2915 {
2916 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
2917 generateDefaultISEInputs(blockParams))
2918 .pushBytesToVector(dst);
2919 break;
2920 }
2921 }
2922 }
2923 }
2924
2925 break;
2926 }
2927
2928 case BLOCK_TEST_TYPE_PARTITION_SEED:
2929 // Test all partition seeds ("partition pattern indices").
2930 {
2931 for (int numPartitions = 2; numPartitions <= 4; numPartitions++)
2932 for (uint32_t partitionSeed = 0; partitionSeed < 1 << 10; partitionSeed++)
2933 {
2934 NormalBlockParams blockParams;
2935 blockParams.weightGridWidth = 4;
2936 blockParams.weightGridHeight = 4;
2937 blockParams.weightISEParams = ISEParams(ISEMODE_PLAIN_BIT, 2);
2938 blockParams.isDualPlane = false;
2939 blockParams.numPartitions = numPartitions;
2940 blockParams.isMultiPartSingleCemMode = true;
2941 blockParams.colorEndpointModes[0] = 0;
2942 blockParams.partitionSeed = partitionSeed;
2943
2944 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
2945 generateDefaultISEInputs(blockParams))
2946 .pushBytesToVector(dst);
2947 }
2948
2949 break;
2950 }
2951
2952 // \note Fall-through.
2953 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_LDR:
2954 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15:
2955 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15:
2956 // For each endpoint mode, for each pair of components in the endpoint value, test 10x10 combinations of values for that pair.
2957 // \note Separate modes for HDR and mode 15 due to different color scales and biases.
2958 {
2959 for (uint32_t cem = 0; cem < 16; cem++)
2960 {
2961 const bool isHDRCem = cem == 2 || cem == 3 || cem == 7 || cem == 11 || cem == 14 || cem == 15;
2962
2963 if ((testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_LDR && isHDRCem) ||
2964 (testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15 && (!isHDRCem || cem == 15)) ||
2965 (testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15 && cem != 15))
2966 continue;
2967
2968 NormalBlockParams blockParams;
2969 blockParams.weightGridWidth = 3;
2970 blockParams.weightGridHeight = 4;
2971 blockParams.weightISEParams = ISEParams(ISEMODE_PLAIN_BIT, 2);
2972 blockParams.isDualPlane = false;
2973 blockParams.numPartitions = 1;
2974 blockParams.colorEndpointModes[0] = cem;
2975
2976 {
2977 const int numBitsForEndpoints = computeNumBitsForColorEndpoints(blockParams);
2978 const int numEndpointParts = computeNumColorEndpointValues(cem);
2979 const ISEParams endpointISE = computeMaximumRangeISEParams(numBitsForEndpoints, numEndpointParts);
2980 const int endpointISERangeMax = computeISERangeMax(endpointISE);
2981
2982 for (int endpointPartNdx0 = 0; endpointPartNdx0 < numEndpointParts; endpointPartNdx0++)
2983 for (int endpointPartNdx1 = endpointPartNdx0 + 1; endpointPartNdx1 < numEndpointParts;
2984 endpointPartNdx1++)
2985 {
2986 NormalBlockISEInputs iseInputs = generateDefaultISEInputs(blockParams);
2987 const int numEndpointValues = de::min(10, endpointISERangeMax + 1);
2988
2989 for (int endpointValueNdx0 = 0; endpointValueNdx0 < numEndpointValues; endpointValueNdx0++)
2990 for (int endpointValueNdx1 = 0; endpointValueNdx1 < numEndpointValues;
2991 endpointValueNdx1++)
2992 {
2993 const int endpointValue0 =
2994 endpointValueNdx0 * endpointISERangeMax / (numEndpointValues - 1);
2995 const int endpointValue1 =
2996 endpointValueNdx1 * endpointISERangeMax / (numEndpointValues - 1);
2997
2998 iseInputs.endpoint.value.plain[endpointPartNdx0] = endpointValue0;
2999 iseInputs.endpoint.value.plain[endpointPartNdx1] = endpointValue1;
3000
3001 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs)
3002 .pushBytesToVector(dst);
3003 }
3004 }
3005 }
3006 }
3007
3008 break;
3009 }
3010
3011 case BLOCK_TEST_TYPE_ENDPOINT_ISE:
3012 // Similar to BLOCK_TEST_TYPE_WEIGHT_ISE, see above.
3013 {
3014 static const uint32_t endpointRangeMaximums[] = {5, 9, 11, 19, 23, 39, 47, 79, 95, 159, 191};
3015
3016 for (int endpointRangeNdx = 0; endpointRangeNdx < DE_LENGTH_OF_ARRAY(endpointRangeMaximums);
3017 endpointRangeNdx++)
3018 {
3019 bool validCaseGenerated = false;
3020
3021 for (int numPartitions = 1; !validCaseGenerated && numPartitions <= 4; numPartitions++)
3022 for (int isDual = 0; !validCaseGenerated && isDual <= 1; isDual++)
3023 for (int weightISEParamsNdx = 0;
3024 !validCaseGenerated &&
3025 weightISEParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates);
3026 weightISEParamsNdx++)
3027 for (int weightGridWidth = 2; !validCaseGenerated && weightGridWidth <= 12;
3028 weightGridWidth++)
3029 for (int weightGridHeight = 2; !validCaseGenerated && weightGridHeight <= 12;
3030 weightGridHeight++)
3031 {
3032 NormalBlockParams blockParams;
3033 blockParams.weightGridWidth = weightGridWidth;
3034 blockParams.weightGridHeight = weightGridHeight;
3035 blockParams.weightISEParams = s_weightISEParamsCandidates[weightISEParamsNdx];
3036 blockParams.isDualPlane = isDual != 0;
3037 blockParams.ccs = 0;
3038 blockParams.numPartitions = numPartitions;
3039 blockParams.isMultiPartSingleCemMode = true;
3040 blockParams.colorEndpointModes[0] = 12;
3041 blockParams.partitionSeed = 634;
3042
3043 if (isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
3044 {
3045 const ISEParams endpointISEParams = computeMaximumRangeISEParams(
3046 computeNumBitsForColorEndpoints(blockParams),
3047 computeNumColorEndpointValues(&blockParams.colorEndpointModes[0],
3048 numPartitions, true));
3049
3050 if (computeISERangeMax(endpointISEParams) ==
3051 endpointRangeMaximums[endpointRangeNdx])
3052 {
3053 validCaseGenerated = true;
3054
3055 const int numColorEndpoints = computeNumColorEndpointValues(
3056 &blockParams.colorEndpointModes[0], numPartitions,
3057 blockParams.isMultiPartSingleCemMode);
3058 const int numValuesInISEBlock = endpointISEParams.mode == ISEMODE_TRIT ? 5 :
3059 endpointISEParams.mode == ISEMODE_QUINT ?
3060 3 :
3061 1;
3062
3063 {
3064 const int numColorEndpointValues =
3065 (int)computeISERangeMax(endpointISEParams) + 1;
3066 const int numBlocks =
3067 deDivRoundUp32(numColorEndpointValues, numColorEndpoints);
3068 NormalBlockISEInputs iseInputs = generateDefaultISEInputs(blockParams);
3069 iseInputs.endpoint.isGivenInBlockForm = false;
3070
3071 for (int offset = 0; offset < numValuesInISEBlock; offset++)
3072 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
3073 {
3074 for (int endpointNdx = 0; endpointNdx < numColorEndpoints;
3075 endpointNdx++)
3076 iseInputs.endpoint.value.plain[endpointNdx] =
3077 (blockNdx * numColorEndpoints + endpointNdx + offset) %
3078 numColorEndpointValues;
3079
3080 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
3081 iseInputs)
3082 .pushBytesToVector(dst);
3083 }
3084 }
3085
3086 if (endpointISEParams.mode == ISEMODE_TRIT ||
3087 endpointISEParams.mode == ISEMODE_QUINT)
3088 {
3089 NormalBlockISEInputs iseInputs = generateDefaultISEInputs(blockParams);
3090 iseInputs.endpoint.isGivenInBlockForm = true;
3091
3092 const int numTQValues =
3093 1 << (endpointISEParams.mode == ISEMODE_TRIT ? 8 : 7);
3094 const int numISEBlocksPerBlock =
3095 deDivRoundUp32(numColorEndpoints, numValuesInISEBlock);
3096 const int numBlocks = deDivRoundUp32(numTQValues, numISEBlocksPerBlock);
3097
3098 for (int offset = 0; offset < numValuesInISEBlock; offset++)
3099 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
3100 {
3101 for (int iseBlockNdx = 0; iseBlockNdx < numISEBlocksPerBlock;
3102 iseBlockNdx++)
3103 {
3104 for (int i = 0; i < numValuesInISEBlock; i++)
3105 iseInputs.endpoint.value.block[iseBlockNdx]
3106 .bitValues[i] = 0;
3107 iseInputs.endpoint.value.block[iseBlockNdx].tOrQValue =
3108 (blockNdx * numISEBlocksPerBlock + iseBlockNdx +
3109 offset) %
3110 numTQValues;
3111 }
3112
3113 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
3114 iseInputs)
3115 .pushBytesToVector(dst);
3116 }
3117 }
3118 }
3119 }
3120 }
3121
3122 DE_ASSERT(validCaseGenerated);
3123 }
3124
3125 break;
3126 }
3127
3128 case BLOCK_TEST_TYPE_CCS:
3129 // For all partition counts, test all values of the CCS (color component selector).
3130 {
3131 for (int numPartitions = 1; numPartitions <= 3; numPartitions++)
3132 for (uint32_t ccs = 0; ccs < 4; ccs++)
3133 {
3134 NormalBlockParams blockParams;
3135 blockParams.weightGridWidth = 3;
3136 blockParams.weightGridHeight = 3;
3137 blockParams.weightISEParams = ISEParams(ISEMODE_PLAIN_BIT, 2);
3138 blockParams.isDualPlane = true;
3139 blockParams.ccs = ccs;
3140 blockParams.numPartitions = numPartitions;
3141 blockParams.isMultiPartSingleCemMode = true;
3142 blockParams.colorEndpointModes[0] = 8;
3143 blockParams.partitionSeed = 634;
3144
3145 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(),
3146 generateDefaultISEInputs(blockParams))
3147 .pushBytesToVector(dst);
3148 }
3149
3150 break;
3151 }
3152
3153 case BLOCK_TEST_TYPE_RANDOM:
3154 // Generate a number of random (including invalid) blocks.
3155 {
3156 const int numBlocks = 16384;
3157 const uint32_t seed = 1;
3158
3159 dst.resize(numBlocks * BLOCK_SIZE_BYTES);
3160
3161 generateRandomBlocks(&dst[0], numBlocks, format, seed);
3162
3163 break;
3164 }
3165
3166 default:
3167 DE_ASSERT(false);
3168 }
3169 }
3170
generateRandomBlocks(uint8_t * dst,size_t numBlocks,CompressedTexFormat format,uint32_t seed)3171 void generateRandomBlocks(uint8_t *dst, size_t numBlocks, CompressedTexFormat format, uint32_t seed)
3172 {
3173 const IVec3 blockSize = getBlockPixelSize(format);
3174 de::Random rnd(seed);
3175 size_t numBlocksGenerated = 0;
3176
3177 DE_ASSERT(isAstcFormat(format));
3178 DE_ASSERT(blockSize.z() == 1);
3179
3180 for (numBlocksGenerated = 0; numBlocksGenerated < numBlocks; numBlocksGenerated++)
3181 {
3182 uint8_t *const curBlockPtr = dst + numBlocksGenerated * BLOCK_SIZE_BYTES;
3183
3184 generateRandomBlock(curBlockPtr, blockSize, rnd);
3185 }
3186 }
3187
generateRandomValidBlocks(uint8_t * dst,size_t numBlocks,CompressedTexFormat format,TexDecompressionParams::AstcMode mode,uint32_t seed)3188 void generateRandomValidBlocks(uint8_t *dst, size_t numBlocks, CompressedTexFormat format,
3189 TexDecompressionParams::AstcMode mode, uint32_t seed)
3190 {
3191 const IVec3 blockSize = getBlockPixelSize(format);
3192 de::Random rnd(seed);
3193 size_t numBlocksGenerated = 0;
3194
3195 DE_ASSERT(isAstcFormat(format));
3196 DE_ASSERT(blockSize.z() == 1);
3197
3198 for (numBlocksGenerated = 0; numBlocksGenerated < numBlocks; numBlocksGenerated++)
3199 {
3200 uint8_t *const curBlockPtr = dst + numBlocksGenerated * BLOCK_SIZE_BYTES;
3201
3202 do
3203 {
3204 generateRandomBlock(curBlockPtr, blockSize, rnd);
3205 } while (!isValidBlock(curBlockPtr, format, mode));
3206 }
3207 }
3208
3209 // Generate a number of trivial blocks to fill unneeded space in a texture.
generateDefaultVoidExtentBlocks(uint8_t * dst,size_t numBlocks)3210 void generateDefaultVoidExtentBlocks(uint8_t *dst, size_t numBlocks)
3211 {
3212 AssignBlock128 block = generateVoidExtentBlock(VoidExtentParams(false, 0, 0, 0, 0));
3213 for (size_t ndx = 0; ndx < numBlocks; ndx++)
3214 block.assignToMemory(&dst[ndx * BLOCK_SIZE_BYTES]);
3215 }
3216
generateDefaultNormalBlocks(uint8_t * dst,size_t numBlocks,int blockWidth,int blockHeight)3217 void generateDefaultNormalBlocks(uint8_t *dst, size_t numBlocks, int blockWidth, int blockHeight)
3218 {
3219 NormalBlockParams blockParams;
3220
3221 blockParams.weightGridWidth = 3;
3222 blockParams.weightGridHeight = 3;
3223 blockParams.weightISEParams = ISEParams(ISEMODE_PLAIN_BIT, 5);
3224 blockParams.isDualPlane = false;
3225 blockParams.numPartitions = 1;
3226 blockParams.colorEndpointModes[0] = 8;
3227
3228 NormalBlockISEInputs iseInputs = generateDefaultISEInputs(blockParams);
3229 iseInputs.weight.isGivenInBlockForm = false;
3230
3231 const int numWeights = computeNumWeights(blockParams);
3232 const int weightRangeMax = computeISERangeMax(blockParams.weightISEParams);
3233
3234 for (size_t blockNdx = 0; blockNdx < numBlocks; blockNdx++)
3235 {
3236 for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
3237 iseInputs.weight.value.plain[weightNdx] =
3238 (uint32_t)((blockNdx * numWeights + weightNdx) * weightRangeMax / (numBlocks * numWeights - 1));
3239
3240 generateNormalBlock(blockParams, blockWidth, blockHeight, iseInputs)
3241 .assignToMemory(dst + blockNdx * BLOCK_SIZE_BYTES);
3242 }
3243 }
3244
isValidBlock(const uint8_t * data,CompressedTexFormat format,TexDecompressionParams::AstcMode mode)3245 bool isValidBlock(const uint8_t *data, CompressedTexFormat format, TexDecompressionParams::AstcMode mode)
3246 {
3247 const tcu::IVec3 blockPixelSize = getBlockPixelSize(format);
3248 const bool isSRGB = isAstcSRGBFormat(format);
3249 const bool isLDR = isSRGB || mode == TexDecompressionParams::ASTCMODE_LDR;
3250
3251 // sRGB is not supported in HDR mode
3252 DE_ASSERT(!(mode == TexDecompressionParams::ASTCMODE_HDR && isSRGB));
3253
3254 union
3255 {
3256 uint8_t sRGB[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4];
3257 float linear[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4];
3258 } tmpBuffer;
3259 const Block128 blockData(data);
3260 const DecompressResult result =
3261 decompressBlock((isSRGB ? (void *)&tmpBuffer.sRGB[0] : (void *)&tmpBuffer.linear[0]), blockData,
3262 blockPixelSize.x(), blockPixelSize.y(), isSRGB, isLDR);
3263
3264 return result == DECOMPRESS_RESULT_VALID_BLOCK;
3265 }
3266
decompress(const PixelBufferAccess & dst,const uint8_t * data,CompressedTexFormat format,TexDecompressionParams::AstcMode mode)3267 void decompress(const PixelBufferAccess &dst, const uint8_t *data, CompressedTexFormat format,
3268 TexDecompressionParams::AstcMode mode)
3269 {
3270 const bool isSRGBFormat = isAstcSRGBFormat(format);
3271
3272 #if defined(DE_DEBUG)
3273 const tcu::IVec3 blockPixelSize = getBlockPixelSize(format);
3274
3275 DE_ASSERT(dst.getWidth() == blockPixelSize.x() && dst.getHeight() == blockPixelSize.y() &&
3276 dst.getDepth() == blockPixelSize.z());
3277 DE_ASSERT(mode == TexDecompressionParams::ASTCMODE_LDR || mode == TexDecompressionParams::ASTCMODE_HDR);
3278 #endif
3279
3280 // sRGB is not supported in HDR mode
3281 DE_ASSERT(!(mode == TexDecompressionParams::ASTCMODE_HDR && isSRGBFormat));
3282
3283 decompress(dst, data, isSRGBFormat, isSRGBFormat || mode == TexDecompressionParams::ASTCMODE_LDR);
3284 }
3285
getBlockTestTypeName(BlockTestType testType)3286 const char *getBlockTestTypeName(BlockTestType testType)
3287 {
3288 switch (testType)
3289 {
3290 case BLOCK_TEST_TYPE_VOID_EXTENT_LDR:
3291 return "void_extent_ldr";
3292 case BLOCK_TEST_TYPE_VOID_EXTENT_HDR:
3293 return "void_extent_hdr";
3294 case BLOCK_TEST_TYPE_WEIGHT_GRID:
3295 return "weight_grid";
3296 case BLOCK_TEST_TYPE_WEIGHT_ISE:
3297 return "weight_ise";
3298 case BLOCK_TEST_TYPE_CEMS:
3299 return "color_endpoint_modes";
3300 case BLOCK_TEST_TYPE_PARTITION_SEED:
3301 return "partition_pattern_index";
3302 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_LDR:
3303 return "endpoint_value_ldr";
3304 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15:
3305 return "endpoint_value_hdr_cem_not_15";
3306 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15:
3307 return "endpoint_value_hdr_cem_15";
3308 case BLOCK_TEST_TYPE_ENDPOINT_ISE:
3309 return "endpoint_ise";
3310 case BLOCK_TEST_TYPE_CCS:
3311 return "color_component_selector";
3312 case BLOCK_TEST_TYPE_RANDOM:
3313 return "random";
3314 default:
3315 DE_ASSERT(false);
3316 return DE_NULL;
3317 }
3318 }
3319
getBlockTestTypeDescription(BlockTestType testType)3320 const char *getBlockTestTypeDescription(BlockTestType testType)
3321 {
3322 switch (testType)
3323 {
3324 case BLOCK_TEST_TYPE_VOID_EXTENT_LDR:
3325 return "Test void extent block, LDR mode";
3326 case BLOCK_TEST_TYPE_VOID_EXTENT_HDR:
3327 return "Test void extent block, HDR mode";
3328 case BLOCK_TEST_TYPE_WEIGHT_GRID:
3329 return "Test combinations of plane count, weight integer sequence encoding parameters, and weight grid size";
3330 case BLOCK_TEST_TYPE_WEIGHT_ISE:
3331 return "Test different integer sequence encoding block values for weight grid";
3332 case BLOCK_TEST_TYPE_CEMS:
3333 return "Test different color endpoint mode combinations, combined with different plane and partition counts";
3334 case BLOCK_TEST_TYPE_PARTITION_SEED:
3335 return "Test different partition pattern indices";
3336 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_LDR:
3337 return "Test various combinations of each pair of color endpoint values, for each LDR color endpoint mode";
3338 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15:
3339 return "Test various combinations of each pair of color endpoint values, for each HDR color endpoint mode "
3340 "other than mode 15";
3341 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15:
3342 return "Test various combinations of each pair of color endpoint values, HDR color endpoint mode 15";
3343 case BLOCK_TEST_TYPE_ENDPOINT_ISE:
3344 return "Test different integer sequence encoding block values for color endpoints";
3345 case BLOCK_TEST_TYPE_CCS:
3346 return "Test color component selector, for different partition counts";
3347 case BLOCK_TEST_TYPE_RANDOM:
3348 return "Random block test";
3349 default:
3350 DE_ASSERT(false);
3351 return DE_NULL;
3352 }
3353 }
3354
isBlockTestTypeHDROnly(BlockTestType testType)3355 bool isBlockTestTypeHDROnly(BlockTestType testType)
3356 {
3357 return testType == BLOCK_TEST_TYPE_VOID_EXTENT_HDR || testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15 ||
3358 testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15;
3359 }
3360
getBlockTestTypeColorScale(BlockTestType testType)3361 Vec4 getBlockTestTypeColorScale(BlockTestType testType)
3362 {
3363 switch (testType)
3364 {
3365 case tcu::astc::BLOCK_TEST_TYPE_VOID_EXTENT_HDR:
3366 return Vec4(0.5f / 65504.0f);
3367 case tcu::astc::BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15:
3368 return Vec4(1.0f / 65504.0f, 1.0f / 65504.0f, 1.0f / 65504.0f, 1.0f);
3369 case tcu::astc::BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15:
3370 return Vec4(1.0f / 65504.0f);
3371 default:
3372 return Vec4(1.0f);
3373 }
3374 }
3375
getBlockTestTypeColorBias(BlockTestType testType)3376 Vec4 getBlockTestTypeColorBias(BlockTestType testType)
3377 {
3378 switch (testType)
3379 {
3380 case tcu::astc::BLOCK_TEST_TYPE_VOID_EXTENT_HDR:
3381 return Vec4(0.5f);
3382 default:
3383 return Vec4(0.0f);
3384 }
3385 }
3386
3387 } // namespace astc
3388 } // namespace tcu
3389