1 // Compress/HuffmanDecoder.h 2 3 #ifndef ZIP7_INC_COMPRESS_HUFFMAN_DECODER_H 4 #define ZIP7_INC_COMPRESS_HUFFMAN_DECODER_H 5 6 #include "../../../C/CpuArch.h" 7 #include "../../Common/MyTypes.h" 8 9 namespace NCompress { 10 namespace NHuffman { 11 12 // const unsigned kNumTableBits_Default = 9; 13 14 #if 0 || 0 && defined(MY_CPU_64BIT) 15 // for debug or optimization: 16 // 64-BIT limit array can be faster for some compilers. 17 // for debug or optimization: 18 #define Z7_HUFF_USE_64BIT_LIMIT 19 #else 20 // sizet value variable allows to eliminate some move operation in some compilers. 21 // for debug or optimization: 22 // #define Z7_HUFF_USE_SIZET_VALUE 23 #endif 24 25 // v0 must normalized to (32 bits) : (v0 < ((UInt64)1 << 32)) 26 27 #ifdef Z7_HUFF_USE_64BIT_LIMIT 28 typedef UInt64 CLimitInt; 29 typedef UInt64 CValueInt; 30 // all _limits[*] are normalized and limited by ((UInt64)1 << 32). 31 // we don't use (v1) in this branch 32 #define Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax) 32 33 #define Z7_HUFF_TABLE_COMPARE(huf, kNumTableBits, v0, v1) \ 34 ((NCompress::NHuffman::CLimitInt)v0 >= (huf)->_limits[0]) 35 #define Z7_HUFF_GET_VAL_FOR_LIMITS(v0, v1, kNumBitsMax, kNumTableBits) (v0) 36 #define Z7_HUFF_GET_VAL_FOR_TABLE( v0, v1, kNumBitsMax, kNumTableBits) ((v0) >> (32 - kNumTableBits)) 37 #define Z7_HUFF_PRECALC_V1(kNumTableBits, v0, v1) 38 #else 39 typedef UInt32 CLimitInt; 40 typedef 41 #ifdef Z7_HUFF_USE_SIZET_VALUE 42 size_t 43 #else 44 UInt32 45 #endif 46 CValueInt; 47 // v1 must be precalculated from v0 in this branch 48 // _limits[0] and (v1) are normalized and limited by (1 << kNumTableBits). 49 // _limits[non_0] are normalized and limited by (1 << kNumBitsMax). 50 #define Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax) (kNumBitsMax) 51 #define Z7_HUFF_TABLE_COMPARE(huf, kNumTableBits, v0, v1) \ 52 ((NCompress::NHuffman::CLimitInt)v1 >= (huf)->_limits[0]) 53 #define Z7_HUFF_GET_VAL_FOR_LIMITS(v0, v1, kNumBitsMax, kNumTableBits) ((v0) >> (32 - kNumBitsMax)) 54 #define Z7_HUFF_GET_VAL_FOR_TABLE( v0, v1, kNumBitsMax, kNumTableBits) (v1) 55 #define Z7_HUFF_PRECALC_V1(kNumTableBits, v0, v1) const UInt32 v1 = ((v0) >> (32 - kNumTableBits)); 56 #endif 57 58 59 enum enum_BuildMode 60 { 61 k_BuildMode_Partial = 0, 62 k_BuildMode_Full = 1, 63 k_BuildMode_Full_or_Empty = 2 64 }; 65 66 67 template <class symType, class symType2, class symType4, unsigned kNumBitsMax, unsigned m_NumSymbols, unsigned kNumTableBits /* = kNumTableBits_Default */> 68 struct CDecoderBase 69 { 70 CLimitInt _limits[kNumBitsMax + 2 - kNumTableBits]; 71 UInt32 _poses[kNumBitsMax - kNumTableBits]; // unsigned 72 union 73 { 74 // if defined(MY_CPU_64BIT), we need 64-bit alignment for _symbols. 75 // if !defined(MY_CPU_64BIT), we need 32-bit alignment for _symbols 76 // but we provide alignment for _lens. 77 // _symbols also will be aligned, if _lens are aligned 78 #if defined(MY_CPU_64BIT) 79 UInt64 80 #else 81 UInt32 82 #endif 83 _pad_align[m_NumSymbols < (1u << sizeof(symType) * 8) ? 1 : -1]; 84 /* if symType is Byte, we use 16-bytes padding to avoid cache 85 bank conflict between _lens and _symbols: */ 86 Byte _lens[(1 << kNumTableBits) + (sizeof(symType) == 1 ? 16 : 0)]; 87 } _u; 88 symType _symbols[(1 << kNumTableBits) + m_NumSymbols - (kNumTableBits + 1)]; 89 90 /* 91 Z7_FORCE_INLINE 92 bool IsFull() const 93 { 94 return _limits[kNumBitsMax - kNumTableBits] == 95 (CLimitInt)1u << Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax); 96 } 97 Z7_FORCE_INLINE 98 bool IsEmpty() const 99 { 100 return _limits[kNumBitsMax - kNumTableBits] == 0; 101 } 102 Z7_FORCE_INLINE 103 bool Is_Full_or_Empty() const 104 { 105 return 0 == (_limits[kNumBitsMax - kNumTableBits] & 106 ~((CLimitInt)1 << Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax))); 107 } 108 */ 109 110 Z7_FORCE_INLINE throwCDecoderBase111 bool Build(const Byte *lens, enum_BuildMode buidMode = k_BuildMode_Partial) throw() 112 { 113 unsigned counts[kNumBitsMax + 1]; 114 size_t i; 115 for (i = 0; i <= kNumBitsMax; i++) 116 counts[i] = 0; 117 for (i = 0; i < m_NumSymbols; i++) 118 counts[lens[i]]++; 119 120 UInt32 sum = 0; 121 for (i = 1; i <= kNumTableBits; i++) 122 { 123 sum <<= 1; 124 sum += counts[i]; 125 } 126 127 CLimitInt startPos = (CLimitInt)sum; 128 _limits[0] = 129 #ifdef Z7_HUFF_USE_64BIT_LIMIT 130 startPos << (Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax) - kNumTableBits); 131 #else 132 startPos; 133 #endif 134 135 for (i = kNumTableBits + 1; i <= kNumBitsMax; i++) 136 { 137 startPos <<= 1; 138 _poses[i - (kNumTableBits + 1)] = (UInt32)(startPos - sum); 139 const unsigned cnt = counts[i]; 140 counts[i] = sum; 141 sum += cnt; 142 startPos += cnt; 143 _limits[i - kNumTableBits] = startPos << (Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax) - i); 144 } 145 146 _limits[kNumBitsMax + 1 - kNumTableBits] = 147 (CLimitInt)1 << Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax); 148 149 if (buidMode == k_BuildMode_Partial) 150 { 151 if (startPos > (1u << kNumBitsMax)) return false; 152 } 153 else 154 { 155 if (buidMode != k_BuildMode_Full && startPos == 0) return true; 156 if (startPos != (1u << kNumBitsMax)) return false; 157 } 158 size_t sum2 = 0; 159 for (i = 1; i <= kNumTableBits; i++) 160 { 161 const unsigned cnt = counts[i] << (kNumTableBits - i); 162 counts[i] = (unsigned)sum2 >> (kNumTableBits - i); 163 memset(_u._lens + sum2, (int)i, cnt); 164 sum2 += cnt; 165 } 166 167 #ifdef MY_CPU_64BIT 168 symType4 169 // UInt64 // for symType = UInt16 170 // UInt32 // for symType = Byte 171 #else 172 UInt32 173 #endif 174 v = 0; 175 for (i = 0; i < m_NumSymbols; i++, 176 v += 177 1 178 + ( (UInt32)1 << (sizeof(symType) * 8 * 1)) 179 // 0x00010001 // for symType = UInt16 180 // 0x00000101 // for symType = Byte 181 #ifdef MY_CPU_64BIT 182 + ((symType4)1 << (sizeof(symType) * 8 * 2)) 183 + ((symType4)1 << (sizeof(symType) * 8 * 3)) 184 // 0x0001000100010001 // for symType = UInt16 185 // 0x0000000001010101 // for symType = Byte 186 #endif 187 ) 188 { 189 const unsigned len = lens[i]; 190 if (len == 0) 191 continue; 192 const size_t offset = counts[len]; 193 counts[len] = (unsigned)offset + 1; 194 if (len >= kNumTableBits) 195 _symbols[offset] = (symType)v; 196 else 197 { 198 Byte *s2 = (Byte *)(void *)_symbols + (offset << 199 (kNumTableBits + sizeof(symType) / 2 - len)); 200 Byte *lim = s2 + ((size_t)1 << 201 (kNumTableBits + sizeof(symType) / 2 - len)); 202 if (len >= kNumTableBits - 2) 203 { 204 *(symType2 *)(void *)(s2 ) = (symType2)v; 205 *(symType2 *)(void *)(lim - sizeof(symType) * 2) = (symType2)v; 206 } 207 else 208 { 209 #ifdef MY_CPU_64BIT 210 symType4 *s = (symType4 *)(void *)s2; 211 do 212 { 213 s[0] = v; s[1] = v; s += 2; 214 } 215 while (s != (const symType4 *)(const void *)lim); 216 #else 217 symType2 *s = (symType2 *)(void *)s2; 218 do 219 { 220 s[0] = (symType2)v; s[1] = (symType2)v; s += 2; 221 s[0] = (symType2)v; s[1] = (symType2)v; s += 2; 222 } 223 while (s != (const symType2 *)(const void *)lim); 224 #endif 225 } 226 } 227 } 228 return true; 229 } 230 231 232 #define Z7_HUFF_DECODE_ERROR_SYM_CHECK_YES(_numBits_, kNumBitsMax, error_op) if (_numBits_ > kNumBitsMax) { error_op } 233 #define Z7_HUFF_DECODE_ERROR_SYM_CHECK_NO( _numBits_, kNumBitsMax, error_op) 234 235 236 #define Z7_HUFF_DECODE_BASE_TREE_BRANCH(sym, huf, kNumBitsMax, kNumTableBits, \ 237 v0, v1, \ 238 get_val_for_limits, \ 239 check_op, error_op, _numBits_) \ 240 { \ 241 const NHuffman::CValueInt _val = get_val_for_limits(v0, v1, kNumBitsMax, kNumTableBits); \ 242 _numBits_ = kNumTableBits + 1; \ 243 if ((NCompress::NHuffman::CLimitInt)_val >= (huf)->_limits[1]) \ 244 do { _numBits_++; } \ 245 while ((NCompress::NHuffman::CLimitInt)_val >= (huf)->_limits[_numBits_ - kNumTableBits]); \ 246 check_op(_numBits_, kNumBitsMax, error_op) \ 247 sym = (huf)->_symbols[(/* (UInt32) */ (_val >> ((Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax) - (unsigned)_numBits_)))) \ 248 - (huf)->_poses[_numBits_ - (kNumTableBits + 1)]]; \ 249 } 250 251 /* 252 Z7_HUFF_DECODE_BASE_TREE_BRANCH(sym, huf, kNumBitsMax, kNumTableBits, \ 253 v0, v1, \ 254 get_val_for_limits, \ 255 check_op, error_op, _numBits_) \ 256 257 */ 258 259 #define Z7_HUFF_DECODE_BASE(sym, huf, kNumBitsMax, kNumTableBits, \ 260 v0, v1, \ 261 get_val_for_table, get_val_for_limits, \ 262 check_op, error_op, move_pos_op, after_op, bs) \ 263 { \ 264 if (Z7_HUFF_TABLE_COMPARE(huf, kNumTableBits, v0, v1)) \ 265 { \ 266 const NHuffman::CValueInt _val = get_val_for_limits(v0, v1, kNumBitsMax, kNumTableBits); \ 267 size_t _numBits_ = kNumTableBits + 1; \ 268 if ((NCompress::NHuffman::CLimitInt)_val >= (huf)->_limits[1]) \ 269 do { _numBits_++; } \ 270 while ((NCompress::NHuffman::CLimitInt)_val >= (huf)->_limits[_numBits_ - kNumTableBits]); \ 271 check_op(_numBits_, kNumBitsMax, error_op) \ 272 sym = (huf)->_symbols[(/* (UInt32) */ (_val >> ((Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax) - (unsigned)_numBits_)))) \ 273 - (huf)->_poses[_numBits_ - (kNumTableBits + 1)]]; \ 274 move_pos_op(bs, _numBits_); \ 275 } \ 276 else \ 277 { \ 278 const size_t _val = get_val_for_table(v0, v1, kNumBitsMax, kNumTableBits); \ 279 const size_t _numBits_ = (huf)->_u._lens[_val]; \ 280 sym = (huf)->_symbols[_val]; \ 281 move_pos_op(bs, _numBits_); \ 282 } \ 283 after_op \ 284 } 285 286 #define Z7_HUFF_DECODE_10(sym, huf, kNumBitsMax, kNumTableBits, \ 287 v0, v1, \ 288 check_op, error_op, move_pos_op, after_op, bs) \ 289 Z7_HUFF_DECODE_BASE(sym, huf, kNumBitsMax, kNumTableBits, \ 290 v0, v1, \ 291 Z7_HUFF_GET_VAL_FOR_TABLE, \ 292 Z7_HUFF_GET_VAL_FOR_LIMITS, \ 293 check_op, error_op, move_pos_op, after_op, bs) \ 294 295 296 #define Z7_HUFF_DECODE_VAL_IN_HIGH32(sym, huf, kNumBitsMax, kNumTableBits, \ 297 v0, \ 298 check_op, error_op, move_pos_op, after_op, bs) \ 299 { \ 300 Z7_HUFF_PRECALC_V1(kNumTableBits, v0, _v1_temp) \ 301 Z7_HUFF_DECODE_10(sym, huf, kNumBitsMax, kNumTableBits, \ 302 v0, _v1_temp, \ 303 check_op, error_op, move_pos_op, after_op, bs) \ 304 } 305 306 #if 0 || defined(Z7_HUFF_USE_64BIT_LIMIT) 307 // this branch uses bitStream->GetValue_InHigh32bits(). 308 #define Z7_HUFF_DECODE_0(sym, huf, kNumBitsMax, kNumTableBits, bitStream, \ 309 check_op, error_op, move_pos_op) \ 310 { \ 311 const UInt32 v0 = (bitStream)->GetValue_InHigh32bits(); \ 312 Z7_HUFF_PRECALC_V1(kNumTableBits, v0, v1); \ 313 Z7_HUFF_DECODE_BASE(sym, huf, kNumBitsMax, kNumTableBits, \ 314 v0, v1, \ 315 Z7_HUFF_GET_VAL_FOR_TABLE, \ 316 Z7_HUFF_GET_VAL_FOR_LIMITS, \ 317 check_op, error_op, move_pos_op, {}, bitStream) \ 318 } 319 #else 320 /* 321 this branch uses bitStream->GetValue(). 322 So we use SIMPLE versions for v0, v1 calculation: 323 v0 is normalized for kNumBitsMax 324 v1 is normalized for kNumTableBits 325 */ 326 #define Z7_HUFF_GET_VAL_FOR_LIMITS_SIMPLE(v0, v1, kNumBitsMax, kNumTableBits) v0 327 #define Z7_HUFF_GET_VAL_FOR_TABLE_SIMPLE( v0, v1, kNumBitsMax, kNumTableBits) v1 328 #define Z7_HUFF_DECODE_0(sym, huf, kNumBitsMax, kNumTableBits, bitStream, check_op, error_op, move_pos_op) \ 329 { \ 330 const UInt32 v0 = (bitStream)->GetValue(kNumBitsMax); \ 331 const UInt32 v1 = v0 >> (kNumBitsMax - kNumTableBits); \ 332 Z7_HUFF_DECODE_BASE(sym, huf, kNumBitsMax, kNumTableBits, \ 333 v0, v1, \ 334 Z7_HUFF_GET_VAL_FOR_TABLE_SIMPLE, \ 335 Z7_HUFF_GET_VAL_FOR_LIMITS_SIMPLE, \ 336 check_op, error_op, move_pos_op, {}, bitStream) \ 337 } 338 #endif 339 340 #define Z7_HUFF_bitStream_MovePos(bitStream, numBits) (bitStream)->MovePos((unsigned)(numBits)) 341 342 #define Z7_HUFF_DECODE_1(sym, huf, kNumBitsMax, kNumTableBits, bitStream, check_op, error_op) \ 343 Z7_HUFF_DECODE_0(sym, huf, kNumBitsMax, kNumTableBits, bitStream, check_op, error_op, \ 344 Z7_HUFF_bitStream_MovePos) 345 346 // MovePosCheck 347 348 #define Z7_HUFF_DECODE_2(sym, huf, kNumBitsMax, kNumTableBits, bitStream, check_op, error_op) \ 349 Z7_HUFF_DECODE_0(sym, huf, kNumBitsMax, kNumTableBits, bitStream, check_op, error_op, \ 350 Z7_HUFF_bitStream_MovePos) 351 352 // MovePosCheck 353 354 #define Z7_HUFF_DECODE_CHECK(sym, huf, kNumBitsMax, kNumTableBits, bitStream, error_op) \ 355 Z7_HUFF_DECODE_1( sym, huf, kNumBitsMax, kNumTableBits, bitStream, \ 356 Z7_HUFF_DECODE_ERROR_SYM_CHECK_YES, error_op) 357 358 template <class TBitDecoder> 359 Z7_FORCE_INLINE Decode2CDecoderBase360 bool Decode2(TBitDecoder *bitStream, unsigned &sym) const 361 { 362 Z7_HUFF_DECODE_CHECK(sym, this, kNumBitsMax, kNumTableBits, bitStream, 363 { return false; } 364 ) 365 return true; 366 } 367 368 template <class TBitDecoder> 369 Z7_FORCE_INLINE Decode_SymCheck_MovePosCheckCDecoderBase370 bool Decode_SymCheck_MovePosCheck(TBitDecoder *bitStream, unsigned &sym) const 371 { 372 Z7_HUFF_DECODE_0(sym, this, kNumBitsMax, kNumTableBits, bitStream, 373 Z7_HUFF_DECODE_ERROR_SYM_CHECK_YES, 374 { return false; }, 375 { return (bitStream)->MovePosCheck; } 376 ) 377 } 378 379 template <class TBitDecoder> 380 Z7_FORCE_INLINE DecodeCDecoderBase381 unsigned Decode(TBitDecoder *bitStream) const 382 { 383 unsigned sym; 384 Z7_HUFF_DECODE_CHECK(sym, this, kNumBitsMax, kNumTableBits, bitStream, 385 { return (unsigned)(int)(Int32)0xffffffff; } 386 ) 387 return sym; 388 } 389 390 391 template <class TBitDecoder> 392 Z7_FORCE_INLINE DecodeFullCDecoderBase393 unsigned DecodeFull(TBitDecoder *bitStream) const 394 { 395 /* 396 const UInt32 val = bitStream->GetValue(kNumBitsMax); 397 if (val < _limits[kNumTableBits]) 398 { 399 const unsigned pair = _u._lens[(size_t)(val >> (kNumBitsMax - kNumTableBits))]; 400 bitStream->MovePos(pair & kPairLenMask); 401 return pair >> kNumPairLenBits; 402 } 403 404 unsigned numBits; 405 for (numBits = kNumTableBits + 1; val >= _limits[numBits]; numBits++); 406 407 bitStream->MovePos(numBits); 408 return _symbols[_poses[numBits] + (unsigned) 409 ((val - _limits[(size_t)numBits - 1]) >> (kNumBitsMax - numBits))]; 410 */ 411 unsigned sym; 412 Z7_HUFF_DECODE_2(sym, this, kNumBitsMax, kNumTableBits, bitStream, 413 Z7_HUFF_DECODE_ERROR_SYM_CHECK_NO, {} 414 ) 415 return sym; 416 } 417 }; 418 419 420 template <unsigned kNumBitsMax, unsigned m_NumSymbols, unsigned kNumTableBits /* = kNumTableBits_Default */> 421 struct CDecoder: public CDecoderBase 422 <UInt16, UInt32, UInt64, kNumBitsMax, m_NumSymbols, kNumTableBits> {}; 423 424 template <unsigned kNumBitsMax, unsigned m_NumSymbols, unsigned kNumTableBits /* = 7 */> 425 struct CDecoder256: public CDecoderBase 426 <Byte, UInt16, UInt32, kNumBitsMax, m_NumSymbols, kNumTableBits> {}; 427 428 429 template <unsigned numSymbols> 430 class CDecoder7b 431 { 432 public: 433 Byte _lens[1 << 7]; 434 Build(const Byte * lens,bool full)435 bool Build(const Byte *lens, bool full) throw() 436 { 437 const unsigned kNumBitsMax = 7; 438 439 unsigned counts[kNumBitsMax + 1]; 440 unsigned _poses[kNumBitsMax + 1]; 441 unsigned _limits[kNumBitsMax + 1]; 442 unsigned i; 443 for (i = 0; i <= kNumBitsMax; i++) 444 counts[i] = 0; 445 for (i = 0; i < numSymbols; i++) 446 counts[lens[i]]++; 447 448 _limits[0] = 0; 449 const unsigned kMaxValue = 1u << kNumBitsMax; 450 unsigned startPos = 0; 451 unsigned sum = 0; 452 453 for (i = 1; i <= kNumBitsMax; i++) 454 { 455 const unsigned cnt = counts[i]; 456 startPos += cnt << (kNumBitsMax - i); 457 _limits[i] = startPos; 458 counts[i] = sum; 459 _poses[i] = sum; 460 sum += cnt; 461 } 462 463 counts[0] = sum; 464 _poses[0] = sum; 465 466 if (full) 467 { 468 if (startPos != kMaxValue) 469 return false; 470 } 471 else 472 { 473 if (startPos > kMaxValue) 474 return false; 475 } 476 477 478 for (i = 0; i < numSymbols; i++) 479 { 480 const unsigned len = lens[i]; 481 if (len == 0) 482 continue; 483 const unsigned offset = counts[len]++; 484 { 485 Byte *dest = _lens + _limits[(size_t)len - 1] 486 + ((offset - _poses[len]) << (kNumBitsMax - len)); 487 const unsigned num = (unsigned)1 << (kNumBitsMax - len); 488 const unsigned val = (i << 3) + len; 489 for (unsigned k = 0; k < num; k++) 490 dest[k] = (Byte)val; 491 } 492 } 493 494 if (!full) 495 { 496 const unsigned limit = _limits[kNumBitsMax]; 497 const unsigned num = ((unsigned)1 << kNumBitsMax) - limit; 498 Byte *dest = _lens + limit; 499 for (unsigned k = 0; k < num; k++) 500 dest[k] = (Byte) 501 // (0x1f << 3); 502 ((0x1f << 3) + 0x7); 503 } 504 505 return true; 506 } 507 508 #define Z7_HUFF_DECODER_7B_DECODE(dest, huf, get_val, move_pos, bs) \ 509 { \ 510 const unsigned pair = huf->_lens[(size_t)get_val(7)]; \ 511 const unsigned numBits = pair & 0x7; \ 512 move_pos(bs, numBits); \ 513 dest = pair >> 3; \ 514 } 515 516 template <class TBitDecoder> Decode(TBitDecoder * bitStream)517 unsigned Decode(TBitDecoder *bitStream) const 518 { 519 const unsigned pair = _lens[(size_t)bitStream->GetValue(7)]; 520 bitStream->MovePos(pair & 0x7); 521 return pair >> 3; 522 } 523 }; 524 525 }} 526 527 #endif 528