1 // Rar5Decoder.cpp
2 // According to unRAR license, this code may not be used to develop
3 // a program that creates RAR archives
4
5 #include "StdAfx.h"
6
7 #define DICT_SIZE_MAX ((UInt64)1 << DICT_SIZE_BITS_MAX)
8
9 // #include <emmintrin.h> // SSE2
10 // #endif
11
12 #include "../../../C/CpuArch.h"
13 #if 0
14 #include "../../../C/Bra.h"
15 #endif
16
17 #if defined(MY_CPU_ARM64)
18 #include <arm_neon.h>
19 #endif
20
21 // #define Z7_RAR5_SHOW_STAT
22 // #include <stdio.h>
23 #ifdef Z7_RAR5_SHOW_STAT
24 #include <stdio.h>
25 #endif
26
27 #include "../Common/StreamUtils.h"
28
29 #include "Rar5Decoder.h"
30
31 /*
32 Note: original-unrar claims that encoder has limitation for Distance:
33 (Distance <= MaxWinSize - MAX_INC_LZ_MATCH)
34 MAX_INC_LZ_MATCH = 0x1001 + 3;
35 */
36
37 #define LZ_ERROR_TYPE_NO 0
38 #define LZ_ERROR_TYPE_HEADER 1
39 // #define LZ_ERROR_TYPE_SYM 1
40 #define LZ_ERROR_TYPE_DIST 2
41
42 static
My_ZeroMemory(void * p,size_t size)43 void My_ZeroMemory(void *p, size_t size)
44 {
45 #if defined(MY_CPU_AMD64) && !defined(_M_ARM64EC) \
46 && defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL <= 1400)
47 // __stosq((UInt64 *)(void *)win, 0, size / 8);
48 /*
49 printf("\n__stosb \n");
50 #define STEP_BIG (1 << 28)
51 for (size_t i = 0; i < ((UInt64)1 << 50); i += STEP_BIG)
52 {
53 printf("\n__stosb end %p\n", (void *)i);
54 __stosb((Byte *)p + i, 0, STEP_BIG);
55 }
56 */
57 // __stosb((Byte *)p, 0, 0);
58 __stosb((Byte *)p, 0, size);
59 #else
60 // SecureZeroMemory (win, STEP);
61 // ZeroMemory(win, STEP);
62 // memset(win, 0, STEP);
63 memset(p, 0, size);
64 #endif
65 }
66
67
68
69 #ifdef MY_CPU_LE_UNALIGN
70 #define Z7_RAR5_DEC_USE_UNALIGNED_COPY
71 #endif
72
73 #ifdef Z7_RAR5_DEC_USE_UNALIGNED_COPY
74
75 #define COPY_CHUNK_SIZE 16
76
77 #define COPY_CHUNK_4_2(dest, src) \
78 { \
79 ((UInt32 *)(void *)dest)[0] = ((const UInt32 *)(const void *)src)[0]; \
80 ((UInt32 *)(void *)dest)[1] = ((const UInt32 *)(const void *)src)[1]; \
81 src += 4 * 2; \
82 dest += 4 * 2; \
83 }
84
85 /* sse2 doesn't help here in GCC and CLANG.
86 so we disabled sse2 here */
87 #if 0
88 #if defined(MY_CPU_AMD64)
89 #define Z7_RAR5_DEC_USE_SSE2
90 #elif defined(MY_CPU_X86)
91 #if defined(_MSC_VER) && _MSC_VER >= 1300 && defined(_M_IX86_FP) && (_M_IX86_FP >= 2) \
92 || defined(__SSE2__) \
93 // || 1 == 1 // for debug only
94 #define Z7_RAR5_DEC_USE_SSE2
95 #endif
96 #endif
97 #endif
98
99 #if defined(MY_CPU_ARM64)
100
101 #define COPY_OFFSET_MIN 16
102 #define COPY_CHUNK1(dest, src) \
103 { \
104 vst1q_u8((uint8_t *)(void *)dest, \
105 vld1q_u8((const uint8_t *)(const void *)src)); \
106 src += 16; \
107 dest += 16; \
108 }
109
110 #define COPY_CHUNK(dest, src) \
111 { \
112 COPY_CHUNK1(dest, src) \
113 if (dest >= lim) break; \
114 COPY_CHUNK1(dest, src) \
115 }
116
117 #elif defined(Z7_RAR5_DEC_USE_SSE2)
118 #include <emmintrin.h> // sse2
119 #define COPY_OFFSET_MIN 16
120
121 #define COPY_CHUNK1(dest, src) \
122 { \
123 _mm_storeu_si128((__m128i *)(void *)dest, \
124 _mm_loadu_si128((const __m128i *)(const void *)src)); \
125 src += 16; \
126 dest += 16; \
127 }
128
129 #define COPY_CHUNK(dest, src) \
130 { \
131 COPY_CHUNK1(dest, src) \
132 if (dest >= lim) break; \
133 COPY_CHUNK1(dest, src) \
134 }
135
136 #elif defined(MY_CPU_64BIT)
137 #define COPY_OFFSET_MIN 8
138
139 #define COPY_CHUNK(dest, src) \
140 { \
141 ((UInt64 *)(void *)dest)[0] = ((const UInt64 *)(const void *)src)[0]; \
142 src += 8 * 1; dest += 8 * 1; \
143 ((UInt64 *)(void *)dest)[0] = ((const UInt64 *)(const void *)src)[0]; \
144 src += 8 * 1; dest += 8 * 1; \
145 }
146
147 #else
148 #define COPY_OFFSET_MIN 4
149
150 #define COPY_CHUNK(dest, src) \
151 { \
152 COPY_CHUNK_4_2(dest, src); \
153 COPY_CHUNK_4_2(dest, src); \
154 }
155
156 #endif
157 #endif
158
159
160 #ifndef COPY_CHUNK_SIZE
161 #define COPY_OFFSET_MIN 4
162 #define COPY_CHUNK_SIZE 8
163 #define COPY_CHUNK_2(dest, src) \
164 { \
165 const Byte a0 = src[0]; \
166 const Byte a1 = src[1]; \
167 dest[0] = a0; \
168 dest[1] = a1; \
169 src += 2; \
170 dest += 2; \
171 }
172 #define COPY_CHUNK(dest, src) \
173 { \
174 COPY_CHUNK_2(dest, src) \
175 COPY_CHUNK_2(dest, src) \
176 COPY_CHUNK_2(dest, src) \
177 COPY_CHUNK_2(dest, src) \
178 }
179 #endif
180
181
182 #define COPY_CHUNKS \
183 { \
184 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
185 do { COPY_CHUNK(dest, src) } \
186 while (dest < lim); \
187 }
188
189 namespace NCompress {
190 namespace NRar5 {
191
192 typedef
193 #if 1
194 unsigned
195 #else
196 size_t
197 #endif
198 CLenType;
199
200 // (len != 0)
201 static
202 Z7_FORCE_INLINE
203 // Z7_ATTRIB_NO_VECTOR
CopyMatch(size_t offset,Byte * dest,const Byte * src,const Byte * lim)204 void CopyMatch(size_t offset, Byte *dest, const Byte *src, const Byte *lim)
205 {
206 {
207 // (COPY_OFFSET_MIN >= 4)
208 if (offset >= COPY_OFFSET_MIN)
209 {
210 COPY_CHUNKS
211 // return;
212 }
213 else
214 #if (COPY_OFFSET_MIN > 4)
215 #if COPY_CHUNK_SIZE < 8
216 #error Stop_Compiling_Bad_COPY_CHUNK_SIZE
217 #endif
218 if (offset >= 4)
219 {
220 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
221 do
222 {
223 COPY_CHUNK_4_2(dest, src)
224 #if COPY_CHUNK_SIZE < 16
225 if (dest >= lim) break;
226 #endif
227 COPY_CHUNK_4_2(dest, src)
228 }
229 while (dest < lim);
230 // return;
231 }
232 else
233 #endif
234 {
235 // (offset < 4)
236 const unsigned b0 = src[0];
237 if (offset < 2)
238 {
239 #if defined(Z7_RAR5_DEC_USE_UNALIGNED_COPY) && (COPY_CHUNK_SIZE == 16)
240 #if defined(MY_CPU_64BIT)
241 {
242 const UInt64 v64 = (UInt64)b0 * 0x0101010101010101;
243 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
244 do
245 {
246 ((UInt64 *)(void *)dest)[0] = v64;
247 ((UInt64 *)(void *)dest)[1] = v64;
248 dest += 16;
249 }
250 while (dest < lim);
251 }
252 #else
253 {
254 UInt32 v = b0;
255 v |= v << 8;
256 v |= v << 16;
257 do
258 {
259 ((UInt32 *)(void *)dest)[0] = v;
260 ((UInt32 *)(void *)dest)[1] = v;
261 dest += 8;
262 ((UInt32 *)(void *)dest)[0] = v;
263 ((UInt32 *)(void *)dest)[1] = v;
264 dest += 8;
265 }
266 while (dest < lim);
267 }
268 #endif
269 #else
270 do
271 {
272 dest[0] = (Byte)b0;
273 dest[1] = (Byte)b0;
274 dest += 2;
275 dest[0] = (Byte)b0;
276 dest[1] = (Byte)b0;
277 dest += 2;
278 }
279 while (dest < lim);
280 #endif
281 }
282 else if (offset == 2)
283 {
284 const Byte b1 = src[1];
285 {
286 do
287 {
288 dest[0] = (Byte)b0;
289 dest[1] = b1;
290 dest += 2;
291 }
292 while (dest < lim);
293 }
294 }
295 else // (offset == 3)
296 {
297 const Byte b1 = src[1];
298 const Byte b2 = src[2];
299 do
300 {
301 dest[0] = (Byte)b0;
302 dest[1] = b1;
303 dest[2] = b2;
304 dest += 3;
305 }
306 while (dest < lim);
307 }
308 }
309 }
310 }
311
312 static const size_t kInputBufSize = 1 << 20;
313 static const UInt32 k_Filter_BlockSize_MAX = 1 << 22;
314 static const unsigned k_Filter_AfterPad_Size = 64;
315
316 #ifdef Z7_RAR5_SHOW_STAT
317 static const unsigned kNumStats1 = 10;
318 static const unsigned kNumStats2 = (1 << 12) + 16;
319 static UInt32 g_stats1[kNumStats1];
320 static UInt32 g_stats2[kNumStats1][kNumStats2];
321 #endif
322
323 #if 1
324 MY_ALIGN(32)
325 // DICT_SIZE_BITS_MAX-1 are required
326 static const Byte k_LenPlusTable[DICT_SIZE_BITS_MAX] =
327 { 0,0,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 };
328 #endif
329
330
331
332 class CBitDecoder
333 {
334 public:
335 const Byte *_buf;
336 const Byte *_bufCheck_Block; // min(ptr for _blockEnd, _bufCheck)
337 unsigned _bitPos; // = [0 ... 7]
338 bool _wasFinished;
339 bool _minorError;
340 unsigned _blockEndBits7; // = [0 ... 7] : the number of additional bits in (_blockEnd) poisition.
341 HRESULT _hres;
342 const Byte *_bufCheck; // relaxed limit (16 bytes before real end of input data in buffer)
343 Byte *_bufLim; // end if input data
344 Byte *_bufBase;
345 ISequentialInStream *_stream;
346
347 UInt64 _processedSize;
348 UInt64 _blockEnd; // absolute end of current block
349 // but it doesn't include additional _blockEndBits7 [0 ... 7] bits
350
351 Z7_FORCE_INLINE
CopyFrom(const CBitDecoder & a)352 void CopyFrom(const CBitDecoder &a)
353 {
354 _buf = a._buf;
355 _bufCheck_Block = a._bufCheck_Block;
356 _bitPos = a._bitPos;
357 _wasFinished = a._wasFinished;
358 _blockEndBits7 = a._blockEndBits7;
359 _bufCheck = a._bufCheck;
360 _bufLim = a._bufLim;
361 _bufBase = a._bufBase;
362
363 _processedSize = a._processedSize;
364 _blockEnd = a._blockEnd;
365 }
366
367 Z7_FORCE_INLINE
RestoreFrom2(const CBitDecoder & a)368 void RestoreFrom2(const CBitDecoder &a)
369 {
370 _buf = a._buf;
371 _bitPos = a._bitPos;
372 }
373
374 Z7_FORCE_INLINE
SetCheck_forBlock()375 void SetCheck_forBlock()
376 {
377 _bufCheck_Block = _bufCheck;
378 if (_bufCheck > _buf)
379 {
380 const UInt64 processed = GetProcessedSize_Round();
381 if (_blockEnd < processed)
382 _bufCheck_Block = _buf;
383 else
384 {
385 const UInt64 delta = _blockEnd - processed;
386 if ((size_t)(_bufCheck - _buf) > delta)
387 _bufCheck_Block = _buf + (size_t)delta;
388 }
389 }
390 }
391
392 Z7_FORCE_INLINE
IsBlockOverRead() const393 bool IsBlockOverRead() const
394 {
395 const UInt64 v = GetProcessedSize_Round();
396 if (v < _blockEnd) return false;
397 if (v > _blockEnd) return true;
398 return _bitPos > _blockEndBits7;
399 }
400
401 /*
402 CBitDecoder() throw():
403 _buf(0),
404 _bufLim(0),
405 _bufBase(0),
406 _stream(0),
407 _processedSize(0),
408 _wasFinished(false)
409 {}
410 */
411
412 Z7_FORCE_INLINE
Init()413 void Init() throw()
414 {
415 _blockEnd = 0;
416 _blockEndBits7 = 0;
417
418 _bitPos = 0;
419 _processedSize = 0;
420 _buf = _bufBase;
421 _bufLim = _bufBase;
422 _bufCheck = _buf;
423 _bufCheck_Block = _buf;
424 _wasFinished = false;
425 _minorError = false;
426 }
427
428 void Prepare2() throw();
429
430 Z7_FORCE_INLINE
Prepare()431 void Prepare() throw()
432 {
433 if (_buf >= _bufCheck)
434 Prepare2();
435 }
436
437 Z7_FORCE_INLINE
ExtraBitsWereRead() const438 bool ExtraBitsWereRead() const
439 {
440 return _buf >= _bufLim && (_buf > _bufLim || _bitPos != 0);
441 }
442
InputEofError() const443 Z7_FORCE_INLINE bool InputEofError() const { return ExtraBitsWereRead(); }
444
GetProcessedBits7() const445 Z7_FORCE_INLINE unsigned GetProcessedBits7() const { return _bitPos; }
GetProcessedSize_Round() const446 Z7_FORCE_INLINE UInt64 GetProcessedSize_Round() const { return _processedSize + (size_t)(_buf - _bufBase); }
GetProcessedSize() const447 Z7_FORCE_INLINE UInt64 GetProcessedSize() const { return _processedSize + (size_t)(_buf - _bufBase) + ((_bitPos + 7) >> 3); }
448
449 Z7_FORCE_INLINE
AlignToByte()450 void AlignToByte()
451 {
452 if (_bitPos != 0)
453 {
454 #if 1
455 // optional check of unused bits for strict checking:
456 // original-unrar doesn't check it:
457 const unsigned b = (unsigned)*_buf << _bitPos;
458 if (b & 0xff)
459 _minorError = true;
460 #endif
461 _buf++;
462 _bitPos = 0;
463 }
464 // _buf += (_bitPos + 7) >> 3;
465 // _bitPos = 0;
466 }
467
468 Z7_FORCE_INLINE
ReadByte_InAligned()469 Byte ReadByte_InAligned()
470 {
471 return *_buf++;
472 }
473
474 Z7_FORCE_INLINE
GetValue(unsigned numBits) const475 UInt32 GetValue(unsigned numBits) const
476 {
477 // 0 < numBits <= 17 : supported values
478 #if defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE_UNALIGN)
479 UInt32 v = GetBe32(_buf);
480 #if 1
481 return (v >> (32 - numBits - _bitPos)) & ((1u << numBits) - 1);
482 #else
483 return (v << _bitPos) >> (32 - numBits);
484 #endif
485 #else
486 UInt32 v = ((UInt32)_buf[0] << 16) | ((UInt32)_buf[1] << 8) | (UInt32)_buf[2];
487 v >>= 24 - numBits - _bitPos;
488 return v & ((1 << numBits) - 1);
489 #endif
490 }
491
492 Z7_FORCE_INLINE
GetValue_InHigh32bits() const493 UInt32 GetValue_InHigh32bits() const
494 {
495 // 0 < numBits <= 17 : supported vales
496 #if defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE_UNALIGN)
497 return GetBe32(_buf) << _bitPos;
498 #else
499 const UInt32 v = ((UInt32)_buf[0] << 16) | ((UInt32)_buf[1] << 8) | (UInt32)_buf[2];
500 return v << (_bitPos + 8);
501 #endif
502 }
503
504
505 Z7_FORCE_INLINE
MovePos(unsigned numBits)506 void MovePos(unsigned numBits)
507 {
508 numBits += _bitPos;
509 _buf += numBits >> 3;
510 _bitPos = numBits & 7;
511 }
512
513
514 Z7_FORCE_INLINE
ReadBits9(unsigned numBits)515 UInt32 ReadBits9(unsigned numBits)
516 {
517 const Byte *buf = _buf;
518 UInt32 v = ((UInt32)buf[0] << 8) | (UInt32)buf[1];
519 v &= (UInt32)0xFFFF >> _bitPos;
520 numBits += _bitPos;
521 v >>= 16 - numBits;
522 _buf = buf + (numBits >> 3);
523 _bitPos = numBits & 7;
524 return v;
525 }
526
527 Z7_FORCE_INLINE
ReadBits_9fix(unsigned numBits)528 UInt32 ReadBits_9fix(unsigned numBits)
529 {
530 const Byte *buf = _buf;
531 UInt32 v = ((UInt32)buf[0] << 8) | (UInt32)buf[1];
532 const UInt32 mask = (1u << numBits) - 1;
533 numBits += _bitPos;
534 v >>= 16 - numBits;
535 _buf = buf + (numBits >> 3);
536 _bitPos = numBits & 7;
537 return v & mask;
538 }
539
540 #if 1 && defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 8)
541 #define Z7_RAR5_USE_64BIT
542 #endif
543
544 #ifdef Z7_RAR5_USE_64BIT
545 #define MAX_DICT_LOG (sizeof(size_t) / 8 * 5 + 31)
546 #else
547 #define MAX_DICT_LOG 31
548 #endif
549
550 #ifdef Z7_RAR5_USE_64BIT
551
552 Z7_FORCE_INLINE
ReadBits_Big(unsigned numBits,UInt64 v)553 size_t ReadBits_Big(unsigned numBits, UInt64 v)
554 {
555 const UInt64 mask = ((UInt64)1 << numBits) - 1;
556 numBits += _bitPos;
557 const Byte *buf = _buf;
558 // UInt64 v = GetBe64(buf);
559 v >>= 64 - numBits;
560 _buf = buf + (numBits >> 3);
561 _bitPos = numBits & 7;
562 return (size_t)(v & mask);
563 }
564 #define ReadBits_Big25 ReadBits_Big
565
566 #else
567
568 // (numBits <= 25) for 32-bit mode
569 Z7_FORCE_INLINE
ReadBits_Big25(unsigned numBits,UInt32 v)570 size_t ReadBits_Big25(unsigned numBits, UInt32 v)
571 {
572 const UInt32 mask = ((UInt32)1 << numBits) - 1;
573 numBits += _bitPos;
574 v >>= 32 - numBits;
575 _buf += numBits >> 3;
576 _bitPos = numBits & 7;
577 return v & mask;
578 }
579
580 // numBits != 0
581 Z7_FORCE_INLINE
ReadBits_Big(unsigned numBits,UInt32 v)582 size_t ReadBits_Big(unsigned numBits, UInt32 v)
583 {
584 const Byte *buf = _buf;
585 // UInt32 v = GetBe32(buf);
586 #if 0
587 const UInt32 mask = ((UInt32)1 << numBits) - 1;
588 numBits += _bitPos;
589 if (numBits > 32)
590 {
591 v <<= numBits - 32;
592 v |= (UInt32)buf[4] >> (40 - numBits);
593 }
594 else
595 v >>= 32 - numBits;
596 _buf = buf + (numBits >> 3);
597 _bitPos = numBits & 7;
598 return v & mask;
599 #else
600 v <<= _bitPos;
601 v |= (UInt32)buf[4] >> (8 - _bitPos);
602 v >>= 32 - numBits;
603 numBits += _bitPos;
604 _buf = buf + (numBits >> 3);
605 _bitPos = numBits & 7;
606 return v;
607 #endif
608 }
609 #endif
610 };
611
612
613 static const unsigned kLookaheadSize = 16;
614 static const unsigned kInputBufferPadZone = kLookaheadSize;
615
616 Z7_NO_INLINE
Prepare2()617 void CBitDecoder::Prepare2() throw()
618 {
619 if (_buf > _bufLim)
620 return;
621
622 size_t rem = (size_t)(_bufLim - _buf);
623 if (rem != 0)
624 memmove(_bufBase, _buf, rem);
625
626 _bufLim = _bufBase + rem;
627 _processedSize += (size_t)(_buf - _bufBase);
628 _buf = _bufBase;
629
630 // we do not look ahead more than 16 bytes before limit checks.
631
632 if (!_wasFinished)
633 {
634 while (rem <= kLookaheadSize)
635 {
636 UInt32 processed = (UInt32)(kInputBufSize - rem);
637 // processed = 33; // for debug
638 _hres = _stream->Read(_bufLim, processed, &processed);
639 _bufLim += processed;
640 rem += processed;
641 if (processed == 0 || _hres != S_OK)
642 {
643 _wasFinished = true;
644 // if (_hres != S_OK) throw CInBufferException(result);
645 break;
646 }
647 }
648 }
649
650 // we always fill pad zone here.
651 // so we don't need to call Prepare2() if (_wasFinished == true)
652 memset(_bufLim, 0xFF, kLookaheadSize);
653
654 if (rem < kLookaheadSize)
655 {
656 _bufCheck = _buf;
657 // memset(_bufLim, 0xFF, kLookaheadSize - rem);
658 }
659 else
660 _bufCheck = _bufLim - kLookaheadSize;
661
662 SetCheck_forBlock();
663 }
664
665
666 enum FilterType
667 {
668 FILTER_DELTA = 0,
669 FILTER_E8,
670 FILTER_E8E9,
671 FILTER_ARM
672 };
673
674 static const size_t kWriteStep = (size_t)1 << 18;
675 // (size_t)1 << 22; // original-unrar
676
677 // Original unRAR claims that maximum possible filter block size is (1 << 16) now,
678 // and (1 << 17) is minimum win size required to support filter.
679 // Original unRAR uses (1u << 18) for "extra safety and possible filter area size expansion"
680 // We can use any win size, but we use same (1u << 18) for compatibility
681 // with WinRar
682
683 // static const unsigned kWinSize_Log_Min = 17;
684 static const size_t kWinSize_Min = 1u << 18;
685
CDecoder()686 CDecoder::CDecoder():
687 _isSolid(false),
688 _is_v7(false),
689 _wasInit(false),
690 // _dictSizeLog(0),
691 _dictSize(kWinSize_Min),
692 _window(NULL),
693 _winPos(0),
694 _winSize(0),
695 _dictSize_forCheck(0),
696 _lzSize(0),
697 _lzEnd(0),
698 _writtenFileSize(0),
699 _filters(NULL),
700 _winSize_Allocated(0),
701 _inputBuf(NULL)
702 {
703 #if 1
704 memcpy(m_LenPlusTable, k_LenPlusTable, sizeof(k_LenPlusTable));
705 #endif
706 // printf("\nsizeof(CDecoder) == %d\n", sizeof(CDecoder));
707 }
708
~CDecoder()709 CDecoder::~CDecoder()
710 {
711 #ifdef Z7_RAR5_SHOW_STAT
712 printf("\n%4d :", 0);
713 for (unsigned k = 0; k < kNumStats1; k++)
714 printf(" %8u", (unsigned)g_stats1[k]);
715 printf("\n");
716 for (unsigned i = 0; i < kNumStats2; i++)
717 {
718 printf("\n%4d :", i);
719 for (unsigned k = 0; k < kNumStats1; k++)
720 printf(" %8u", (unsigned)g_stats2[k][i]);
721 }
722 printf("\n");
723 #endif
724
725 #define Z7_RAR_FREE_WINDOW ::BigFree(_window);
726
727 Z7_RAR_FREE_WINDOW
728 z7_AlignedFree(_inputBuf);
729 z7_AlignedFree(_filters);
730 }
731
732 Z7_NO_INLINE
DeleteUnusedFilters()733 void CDecoder::DeleteUnusedFilters()
734 {
735 if (_numUnusedFilters != 0)
736 {
737 // printf("\nDeleteUnusedFilters _numFilters = %6u\n", _numFilters);
738 const unsigned n = _numFilters - _numUnusedFilters;
739 _numFilters = n;
740 memmove(_filters, _filters + _numUnusedFilters, n * sizeof(CFilter));
741 _numUnusedFilters = 0;
742 }
743 }
744
745
746 Z7_NO_INLINE
WriteData(const Byte * data,size_t size)747 HRESULT CDecoder::WriteData(const Byte *data, size_t size)
748 {
749 HRESULT res = S_OK;
750 if (!_unpackSize_Defined || _writtenFileSize < _unpackSize)
751 {
752 size_t cur = size;
753 if (_unpackSize_Defined)
754 {
755 const UInt64 rem = _unpackSize - _writtenFileSize;
756 if (cur > rem)
757 cur = (size_t)rem;
758 }
759 res = WriteStream(_outStream, data, cur);
760 if (res != S_OK)
761 _writeError = true;
762 }
763 _writtenFileSize += size;
764 return res;
765 }
766
767
768 #if defined(MY_CPU_SIZEOF_POINTER) \
769 && ( MY_CPU_SIZEOF_POINTER == 4 \
770 || MY_CPU_SIZEOF_POINTER == 8)
771 #define BR_CONV_USE_OPT_PC_PTR
772 #endif
773
774 #ifdef BR_CONV_USE_OPT_PC_PTR
775 #define BR_PC_INIT(lim_back) pc -= (UInt32)(SizeT)data;
776 #define BR_PC_GET (pc + (UInt32)(SizeT)data)
777 #else
778 #define BR_PC_INIT(lim_back) pc += (UInt32)dataSize - (lim_back);
779 #define BR_PC_GET (pc - (UInt32)(SizeT)(data_lim - data))
780 #endif
781
782 #ifdef MY_CPU_LE_UNALIGN
783 #define Z7_RAR5_FILTER_USE_LE_UNALIGN
784 #endif
785
786 #ifdef Z7_RAR5_FILTER_USE_LE_UNALIGN
787 #define RAR_E8_FILT(mask) \
788 { \
789 for (;;) \
790 { UInt32 v; \
791 do { \
792 v = GetUi32(data) ^ (UInt32)0xe8e8e8e8; \
793 data += 4; \
794 if ((v & ((UInt32)(mask) << (8 * 0))) == 0) { data -= 3; break; } \
795 if ((v & ((UInt32)(mask) << (8 * 1))) == 0) { data -= 2; break; } \
796 if ((v & ((UInt32)(mask) << (8 * 2))) == 0) { data -= 1; break; } } \
797 while((v & ((UInt32)(mask) << (8 * 3)))); \
798 if (data > data_lim) break; \
799 const UInt32 offset = BR_PC_GET & (kFileSize - 1); \
800 const UInt32 addr = GetUi32(data); \
801 data += 4; \
802 if (addr < kFileSize) \
803 SetUi32(data - 4, addr - offset) \
804 else if (addr > ~offset) /* if (addr > ((UInt32)0xFFFFFFFF - offset)) */ \
805 SetUi32(data - 4, addr + kFileSize) \
806 } \
807 }
808 #else
809 #define RAR_E8_FILT(get_byte) \
810 { \
811 for (;;) \
812 { \
813 if ((get_byte) != 0xe8) \
814 if ((get_byte) != 0xe8) \
815 if ((get_byte) != 0xe8) \
816 if ((get_byte) != 0xe8) \
817 continue; \
818 { if (data > data_lim) break; \
819 const UInt32 offset = BR_PC_GET & (kFileSize - 1); \
820 const UInt32 addr = GetUi32(data); \
821 data += 4; \
822 if (addr < kFileSize) \
823 SetUi32(data - 4, addr - offset) \
824 else if (addr > ~offset) /* if (addr > ((UInt32)0xFFFFFFFF - offset)) */ \
825 SetUi32(data - 4, addr + kFileSize) \
826 } \
827 } \
828 }
829 #endif
830
ExecuteFilter(const CFilter & f)831 HRESULT CDecoder::ExecuteFilter(const CFilter &f)
832 {
833 Byte *data = _filterSrc;
834 UInt32 dataSize = f.Size;
835 // printf("\nType = %d offset = %9d size = %5d", f.Type, (unsigned)(f.Start - _lzFileStart), dataSize);
836
837 if (f.Type == FILTER_DELTA)
838 {
839 // static unsigned g1 = 0, g2 = 0; g1 += dataSize;
840 // if (g2++ % 100 == 0) printf("DELTA num %8u, size %8u MiB, channels = %2u curSize=%8u\n", g2, (g1 >> 20), f.Channels, dataSize);
841 _filterDst.AllocAtLeast_max((size_t)dataSize, k_Filter_BlockSize_MAX);
842 if (!_filterDst.IsAllocated())
843 return E_OUTOFMEMORY;
844
845 Byte *dest = _filterDst;
846 const unsigned numChannels = f.Channels;
847 unsigned curChannel = 0;
848 do
849 {
850 Byte prevByte = 0;
851 Byte *dest2 = dest + curChannel;
852 const Byte *dest_lim = dest + dataSize;
853 for (; dest2 < dest_lim; dest2 += numChannels)
854 *dest2 = (prevByte = (Byte)(prevByte - *data++));
855 }
856 while (++curChannel != numChannels);
857 // return WriteData(dest, dataSize);
858 data = dest;
859 }
860 else if (f.Type < FILTER_ARM)
861 {
862 // FILTER_E8 or FILTER_E8E9
863 if (dataSize > 4)
864 {
865 UInt32 pc = (UInt32)(f.Start - _lzFileStart);
866 const UInt32 kFileSize = (UInt32)1 << 24;
867 const Byte *data_lim = data + dataSize - 4;
868 BR_PC_INIT(4) // because (data_lim) was moved back for 4 bytes
869 data[dataSize] = 0xe8;
870 if (f.Type == FILTER_E8)
871 {
872 // static unsigned g1 = 0; g1 += dataSize; printf("\n FILTER_E8 %u", (g1 >> 20));
873 #ifdef Z7_RAR5_FILTER_USE_LE_UNALIGN
874 RAR_E8_FILT (0xff)
875 #else
876 RAR_E8_FILT (*data++)
877 #endif
878 }
879 else
880 {
881 // static unsigned g1 = 0; g1 += dataSize; printf("\n FILTER_E8_E9 %u", (g1 >> 20));
882 #ifdef Z7_RAR5_FILTER_USE_LE_UNALIGN
883 RAR_E8_FILT (0xfe)
884 #else
885 RAR_E8_FILT (*data++ & 0xfe)
886 #endif
887 }
888 }
889 data = _filterSrc;
890 }
891 else if (f.Type == FILTER_ARM)
892 {
893 UInt32 pc = (UInt32)(f.Start - _lzFileStart);
894 #if 0
895 // z7_BranchConv_ARM_Dec expects that (fileOffset & 3) == 0;
896 // but even if (fileOffset & 3) then current code
897 // in z7_BranchConv_ARM_Dec works same way as unrar's code still.
898 z7_BranchConv_ARM_Dec(data, dataSize, pc - 8);
899 #else
900 dataSize &= ~(UInt32)3;
901 if (dataSize)
902 {
903 Byte *data_lim = data + dataSize;
904 data_lim[3] = 0xeb;
905 BR_PC_INIT(0)
906 pc -= 4; // because (data) will point to next instruction
907 for (;;) // do
908 {
909 data += 4;
910 if (data[-1] != 0xeb)
911 continue;
912 if (data > data_lim)
913 break;
914 {
915 UInt32 v = GetUi32a(data - 4) - (BR_PC_GET >> 2);
916 v &= 0x00ffffff;
917 v |= 0xeb000000;
918 SetUi32a(data - 4, v)
919 }
920 }
921 }
922 #endif
923 data = _filterSrc;
924 }
925 else
926 {
927 _unsupportedFilter = true;
928 My_ZeroMemory(data, dataSize);
929 // return S_OK; // unrar
930 }
931 // return WriteData(_filterSrc, (size_t)f.Size);
932 return WriteData(data, (size_t)f.Size);
933 }
934
935
WriteBuf()936 HRESULT CDecoder::WriteBuf()
937 {
938 DeleteUnusedFilters();
939
940 const UInt64 lzSize = _lzSize + _winPos;
941
942 for (unsigned i = 0; i < _numFilters;)
943 {
944 const CFilter &f = _filters[i];
945 const UInt64 blockStart = f.Start;
946 const size_t lzAvail = (size_t)(lzSize - _lzWritten);
947 if (lzAvail == 0)
948 break;
949
950 if (blockStart > _lzWritten)
951 {
952 const UInt64 rem = blockStart - _lzWritten;
953 size_t size = lzAvail;
954 if (size > rem)
955 size = (size_t)rem;
956 if (size != 0) // is it true always ?
957 {
958 RINOK(WriteData(_window + _winPos - lzAvail, size))
959 _lzWritten += size;
960 }
961 continue;
962 }
963
964 const UInt32 blockSize = f.Size;
965 size_t offset = (size_t)(_lzWritten - blockStart);
966 if (offset == 0)
967 {
968 _filterSrc.AllocAtLeast_max(
969 (size_t)blockSize + k_Filter_AfterPad_Size,
970 k_Filter_BlockSize_MAX + k_Filter_AfterPad_Size);
971 if (!_filterSrc.IsAllocated())
972 return E_OUTOFMEMORY;
973 }
974
975 const size_t blockRem = (size_t)blockSize - offset;
976 size_t size = lzAvail;
977 if (size > blockRem)
978 size = blockRem;
979 memcpy(_filterSrc + offset, _window + _winPos - lzAvail, size);
980 _lzWritten += size;
981 offset += size;
982 if (offset != blockSize)
983 return S_OK;
984
985 _numUnusedFilters = ++i;
986 RINOK(ExecuteFilter(f))
987 }
988
989 DeleteUnusedFilters();
990
991 if (_numFilters)
992 return S_OK;
993
994 const size_t lzAvail = (size_t)(lzSize - _lzWritten);
995 RINOK(WriteData(_window + _winPos - lzAvail, lzAvail))
996 _lzWritten += lzAvail;
997 return S_OK;
998 }
999
1000
1001 Z7_NO_INLINE
ReadUInt32(CBitDecoder & bi)1002 static UInt32 ReadUInt32(CBitDecoder &bi)
1003 {
1004 const unsigned numBits = (unsigned)bi.ReadBits_9fix(2) * 8 + 8;
1005 UInt32 v = 0;
1006 unsigned i = 0;
1007 do
1008 {
1009 v += (UInt32)bi.ReadBits_9fix(8) << i;
1010 i += 8;
1011 }
1012 while (i != numBits);
1013 return v;
1014 }
1015
1016
1017 static const unsigned MAX_UNPACK_FILTERS = 8192;
1018
AddFilter(CBitDecoder & _bitStream)1019 HRESULT CDecoder::AddFilter(CBitDecoder &_bitStream)
1020 {
1021 DeleteUnusedFilters();
1022
1023 if (_numFilters >= MAX_UNPACK_FILTERS)
1024 {
1025 RINOK(WriteBuf())
1026 DeleteUnusedFilters();
1027 if (_numFilters >= MAX_UNPACK_FILTERS)
1028 {
1029 _unsupportedFilter = true;
1030 InitFilters();
1031 }
1032 }
1033
1034 _bitStream.Prepare();
1035
1036 CFilter f;
1037 const UInt32 blockStart = ReadUInt32(_bitStream);
1038 f.Size = ReadUInt32(_bitStream);
1039
1040 if (f.Size > k_Filter_BlockSize_MAX)
1041 {
1042 _unsupportedFilter = true;
1043 f.Size = 0; // unrar 5.5.5
1044 }
1045
1046 f.Type = (Byte)_bitStream.ReadBits_9fix(3);
1047 f.Channels = 0;
1048 if (f.Type == FILTER_DELTA)
1049 f.Channels = (Byte)(_bitStream.ReadBits_9fix(5) + 1);
1050 f.Start = _lzSize + _winPos + blockStart;
1051
1052 #if 0
1053 static unsigned z_cnt = 0; if (z_cnt++ % 100 == 0)
1054 printf ("\nFilter %7u : %4u : %8p, st=%8x, size=%8x, type=%u ch=%2u",
1055 z_cnt, (unsigned)_filters.Size(), (void *)(size_t)(_lzSize + _winPos),
1056 (unsigned)blockStart, (unsigned)f.Size, (unsigned)f.Type, (unsigned)f.Channels);
1057 #endif
1058
1059 if (f.Start < _filterEnd)
1060 _unsupportedFilter = true;
1061 else
1062 {
1063 _filterEnd = f.Start + f.Size;
1064 if (f.Size != 0)
1065 {
1066 if (!_filters)
1067 {
1068 _filters = (CFilter *)z7_AlignedAlloc(MAX_UNPACK_FILTERS * sizeof(CFilter));
1069 if (!_filters)
1070 return E_OUTOFMEMORY;
1071 }
1072 // printf("\n_numFilters = %6u\n", _numFilters);
1073 const unsigned i = _numFilters++;
1074 _filters[i] = f;
1075 }
1076 }
1077
1078 return S_OK;
1079 }
1080
1081
1082 #define RIF(x) { if (!(x)) return S_FALSE; }
1083
1084 #if 1
1085 #define PRINT_CNT(name, skip)
1086 #else
1087 #define PRINT_CNT(name, skip) \
1088 { static unsigned g_cnt = 0; if (g_cnt++ % skip == 0) printf("\n%16s: %8u", name, g_cnt); }
1089 #endif
1090
ReadTables(CBitDecoder & _bitStream)1091 HRESULT CDecoder::ReadTables(CBitDecoder &_bitStream)
1092 {
1093 if (_progress)
1094 {
1095 const UInt64 packSize = _bitStream.GetProcessedSize();
1096 if (packSize - _progress_Pack >= (1u << 24)
1097 || _writtenFileSize - _progress_Unpack >= (1u << 26))
1098 {
1099 _progress_Pack = packSize;
1100 _progress_Unpack = _writtenFileSize;
1101 RINOK(_progress->SetRatioInfo(&_progress_Pack, &_writtenFileSize))
1102 }
1103 // printf("\ntable read pos=%p packSize=%p _writtenFileSize = %p\n", (size_t)_winPos, (size_t)packSize, (size_t)_writtenFileSize);
1104 }
1105
1106 // _bitStream is aligned already
1107 _bitStream.Prepare();
1108 {
1109 const unsigned flags = _bitStream.ReadByte_InAligned();
1110 /* ((flags & 20) == 0) in all rar archives now,
1111 but (flags & 20) flag can be used as some decoding hint in future versions of original rar.
1112 So we ignore that bit here. */
1113 unsigned checkSum = _bitStream.ReadByte_InAligned();
1114 checkSum ^= flags;
1115 const unsigned num = (flags >> 3) & 3;
1116 if (num >= 3)
1117 return S_FALSE;
1118 UInt32 blockSize = _bitStream.ReadByte_InAligned();
1119 checkSum ^= blockSize;
1120 if (num != 0)
1121 {
1122 {
1123 const unsigned b = _bitStream.ReadByte_InAligned();
1124 checkSum ^= b;
1125 blockSize += (UInt32)b << 8;
1126 }
1127 if (num > 1)
1128 {
1129 const unsigned b = _bitStream.ReadByte_InAligned();
1130 checkSum ^= b;
1131 blockSize += (UInt32)b << 16;
1132 }
1133 }
1134 if (checkSum != 0x5A)
1135 return S_FALSE;
1136 unsigned blockSizeBits7 = (flags & 7) + 1;
1137 blockSize += (UInt32)(blockSizeBits7 >> 3);
1138 if (blockSize == 0)
1139 {
1140 // it's error in data stream
1141 // but original-unrar ignores that error
1142 _bitStream._minorError = true;
1143 #if 1
1144 // we ignore that error as original-unrar:
1145 blockSizeBits7 = 0;
1146 blockSize = 1;
1147 #else
1148 // we can stop decoding:
1149 return S_FALSE;
1150 #endif
1151 }
1152 blockSize--;
1153 blockSizeBits7 &= 7;
1154 PRINT_CNT("Blocks", 100)
1155 /*
1156 {
1157 static unsigned g_prev = 0;
1158 static unsigned g_cnt = 0;
1159 unsigned proc = unsigned(_winPos);
1160 if (g_cnt++ % 100 == 0) printf(" c_size = %8u ", blockSize);
1161 if (g_cnt++ % 100 == 1) printf(" unp_size = %8u", proc - g_prev);
1162 g_prev = proc;
1163 }
1164 */
1165 _bitStream._blockEndBits7 = blockSizeBits7;
1166 _bitStream._blockEnd = _bitStream.GetProcessedSize_Round() + blockSize;
1167 _bitStream.SetCheck_forBlock();
1168 _isLastBlock = ((flags & 0x40) != 0);
1169 if ((flags & 0x80) == 0)
1170 {
1171 if (!_tableWasFilled)
1172 // if (blockSize != 0 || blockSizeBits7 != 0)
1173 if (blockSize + blockSizeBits7 != 0)
1174 return S_FALSE;
1175 return S_OK;
1176 }
1177 _tableWasFilled = false;
1178 }
1179
1180 PRINT_CNT("Tables", 100);
1181
1182 const unsigned kLevelTableSize = 20;
1183 const unsigned k_NumHufTableBits_Level = 6;
1184 NHuffman::CDecoder256<kNumHufBits, kLevelTableSize, k_NumHufTableBits_Level> m_LevelDecoder;
1185 const unsigned kTablesSizesSum_MAX = kMainTableSize + kDistTableSize_MAX + kAlignTableSize + kLenTableSize;
1186 Byte lens[kTablesSizesSum_MAX];
1187 {
1188 // (kLevelTableSize + 16 < kTablesSizesSum). So we use lens[] array for (Level) table
1189 // Byte lens2[kLevelTableSize + 16];
1190 unsigned i = 0;
1191 do
1192 {
1193 if (_bitStream._buf >= _bitStream._bufCheck_Block)
1194 {
1195 _bitStream.Prepare();
1196 if (_bitStream.IsBlockOverRead())
1197 return S_FALSE;
1198 }
1199 const unsigned len = (unsigned)_bitStream.ReadBits_9fix(4);
1200 if (len == 15)
1201 {
1202 unsigned num = (unsigned)_bitStream.ReadBits_9fix(4);
1203 if (num != 0)
1204 {
1205 num += 2;
1206 num += i;
1207 // we are allowed to overwrite to lens[] for extra 16 bytes after kLevelTableSize
1208 #if 0
1209 if (num > kLevelTableSize)
1210 {
1211 // we ignore this error as original-unrar
1212 num = kLevelTableSize;
1213 // return S_FALSE;
1214 }
1215 #endif
1216 do
1217 lens[i++] = 0;
1218 while (i < num);
1219 continue;
1220 }
1221 }
1222 lens[i++] = (Byte)len;
1223 }
1224 while (i < kLevelTableSize);
1225 if (_bitStream.IsBlockOverRead())
1226 return S_FALSE;
1227 RIF(m_LevelDecoder.Build(lens, NHuffman::k_BuildMode_Full))
1228 }
1229
1230 unsigned i = 0;
1231 const unsigned tableSize = _is_v7 ?
1232 kTablesSizesSum_MAX :
1233 kTablesSizesSum_MAX - kExtraDistSymbols_v7;
1234 do
1235 {
1236 if (_bitStream._buf >= _bitStream._bufCheck_Block)
1237 {
1238 // if (_bitStream._buf >= _bitStream._bufCheck)
1239 _bitStream.Prepare();
1240 if (_bitStream.IsBlockOverRead())
1241 return S_FALSE;
1242 }
1243 const unsigned sym = m_LevelDecoder.DecodeFull(&_bitStream);
1244 if (sym < 16)
1245 lens[i++] = (Byte)sym;
1246 #if 0
1247 else if (sym > kLevelTableSize)
1248 return S_FALSE;
1249 #endif
1250 else
1251 {
1252 unsigned num = ((sym /* - 16 */) & 1) * 4;
1253 num += num + 3 + (unsigned)_bitStream.ReadBits9(num + 3);
1254 num += i;
1255 if (num > tableSize)
1256 {
1257 // we ignore this error as original-unrar
1258 num = tableSize;
1259 // return S_FALSE;
1260 }
1261 unsigned v = 0;
1262 if (sym < 16 + 2)
1263 {
1264 if (i == 0)
1265 return S_FALSE;
1266 v = lens[(size_t)i - 1];
1267 }
1268 do
1269 lens[i++] = (Byte)v;
1270 while (i < num);
1271 }
1272 }
1273 while (i < tableSize);
1274
1275 if (_bitStream.IsBlockOverRead())
1276 return S_FALSE;
1277 if (_bitStream.InputEofError())
1278 return S_FALSE;
1279
1280 /* We suppose that original-rar encoder can create only two cases for Huffman:
1281 1) Empty Huffman tree (if num_used_symbols == 0)
1282 2) Full Huffman tree (if num_used_symbols != 0)
1283 Usually the block contains at least one symbol for m_MainDecoder.
1284 So original-rar-encoder creates full Huffman tree for m_MainDecoder.
1285 But we suppose that (num_used_symbols == 0) is possible for m_MainDecoder,
1286 because file must be finished with (_isLastBlock) flag,
1287 even if there are no symbols in m_MainDecoder.
1288 So we use k_BuildMode_Full_or_Empty for m_MainDecoder.
1289 */
1290 const NHuffman::enum_BuildMode buildMode = NHuffman::
1291 k_BuildMode_Full_or_Empty; // strict check
1292 // k_BuildMode_Partial; // non-strict check (ignore errors)
1293
1294 RIF(m_MainDecoder.Build(&lens[0], buildMode))
1295 if (!_is_v7)
1296 {
1297 #if 1
1298 /* we use this manual loop to avoid compiler BUG.
1299 GCC 4.9.2 compiler has BUG with overlapping memmove() to right in local array. */
1300 Byte *dest = lens + kMainTableSize + kDistTableSize_v6 +
1301 kAlignTableSize + kLenTableSize - 1;
1302 unsigned num = kAlignTableSize + kLenTableSize;
1303 do
1304 {
1305 dest[kExtraDistSymbols_v7] = dest[0];
1306 dest--;
1307 }
1308 while (--num);
1309 #else
1310 memmove(lens + kMainTableSize + kDistTableSize_v6 + kExtraDistSymbols_v7,
1311 lens + kMainTableSize + kDistTableSize_v6,
1312 kAlignTableSize + kLenTableSize);
1313 #endif
1314 memset(lens + kMainTableSize + kDistTableSize_v6, 0, kExtraDistSymbols_v7);
1315 }
1316
1317 RIF(m_DistDecoder.Build(&lens[kMainTableSize], buildMode))
1318 RIF( m_LenDecoder.Build(&lens[kMainTableSize
1319 + kDistTableSize_MAX + kAlignTableSize], buildMode))
1320
1321 _useAlignBits = false;
1322 for (i = 0; i < kAlignTableSize; i++)
1323 if (lens[kMainTableSize + kDistTableSize_MAX + (size_t)i] != kNumAlignBits)
1324 {
1325 RIF(m_AlignDecoder.Build(&lens[kMainTableSize + kDistTableSize_MAX], buildMode))
1326 _useAlignBits = true;
1327 break;
1328 }
1329
1330 _tableWasFilled = true;
1331 return S_OK;
1332 }
1333
SlotToLen(CBitDecoder & _bitStream,CLenType slot)1334 static inline CLenType SlotToLen(CBitDecoder &_bitStream, CLenType slot)
1335 {
1336 const unsigned numBits = ((unsigned)slot >> 2) - 1;
1337 return ((4 | (slot & 3)) << numBits) + (CLenType)_bitStream.ReadBits9(numBits);
1338 }
1339
1340
1341 static const unsigned kSymbolRep = 258;
1342 static const unsigned kMaxMatchLen = 0x1001 + 3;
1343
1344 enum enum_exit_type
1345 {
1346 Z7_RAR_EXIT_TYPE_NONE,
1347 Z7_RAR_EXIT_TYPE_ADD_FILTER
1348 };
1349
1350
1351 #define LZ_RESTORE \
1352 { \
1353 _reps[0] = rep0; \
1354 _winPos = (size_t)(winPos - _window); \
1355 _buf_Res = _bitStream._buf; \
1356 _bitPos_Res = _bitStream._bitPos; \
1357 }
1358
1359 #define LZ_LOOP_BREAK_OK { break; }
1360 // #define LZ_LOOP_BREAK_ERROR { _lzError = LZ_ERROR_TYPE_SYM; break; }
1361 // #define LZ_LOOP_BREAK_ERROR { LZ_RESTORE; return S_FALSE; }
1362 #define LZ_LOOP_BREAK_ERROR { goto decode_error; }
1363 // goto decode_error; }
1364 // #define LZ_LOOP_BREAK_ERROR { break; }
1365
1366 #define Z7_RAR_HUFF_DECODE_CHECK_break(sym, huf, kNumTableBits, bitStream) \
1367 Z7_HUFF_DECODE_CHECK(sym, huf, kNumHufBits, kNumTableBits, bitStream, { LZ_LOOP_BREAK_ERROR })
1368
1369
DecodeLZ2(const CBitDecoder & bitStream)1370 HRESULT CDecoder::DecodeLZ2(const CBitDecoder &bitStream) throw()
1371 {
1372 #if 0
1373 Byte k_LenPlusTable_LOC[DICT_SIZE_BITS_MAX];
1374 memcpy(k_LenPlusTable_LOC, k_LenPlusTable, sizeof(k_LenPlusTable));
1375 #endif
1376
1377 PRINT_CNT("DecodeLZ2", 2000);
1378
1379 CBitDecoder _bitStream;
1380 _bitStream.CopyFrom(bitStream);
1381 // _bitStream._stream = _inStream;
1382 // _bitStream._bufBase = _inputBuf;
1383 // _bitStream.Init();
1384
1385 // _reps[*] can be larger than _winSize, if _winSize was reduced in solid stream.
1386 size_t rep0 = _reps[0];
1387 // size_t rep1 = _reps[1];
1388 // Byte *win = _window;
1389 Byte *winPos = _window + _winPos;
1390 const Byte *limit = _window + _limit;
1391 _exitType = Z7_RAR_EXIT_TYPE_NONE;
1392
1393 for (;;)
1394 {
1395 if (winPos >= limit)
1396 LZ_LOOP_BREAK_OK
1397 // (winPos < limit)
1398 if (_bitStream._buf >= _bitStream._bufCheck_Block)
1399 {
1400 if (_bitStream.InputEofError())
1401 LZ_LOOP_BREAK_OK
1402 if (_bitStream._buf >= _bitStream._bufCheck)
1403 {
1404 if (!_bitStream._wasFinished)
1405 LZ_LOOP_BREAK_OK
1406 // _bitStream._wasFinished == true
1407 // we don't need Prepare() here, because all data was read
1408 // and PadZone (16 bytes) after data was filled.
1409 }
1410 const UInt64 processed = _bitStream.GetProcessedSize_Round();
1411 // some cases are error, but the caller will process such error cases.
1412 if (processed >= _bitStream._blockEnd &&
1413 (processed > _bitStream._blockEnd
1414 || _bitStream.GetProcessedBits7() >= _bitStream._blockEndBits7))
1415 LZ_LOOP_BREAK_OK
1416 // that check is not required, but it can help, if there is BUG in another code
1417 if (!_tableWasFilled)
1418 LZ_LOOP_BREAK_ERROR
1419 }
1420
1421 #if 0
1422 const unsigned sym = m_MainDecoder.Decode(&_bitStream);
1423 #else
1424 unsigned sym;
1425 Z7_RAR_HUFF_DECODE_CHECK_break(sym, &m_MainDecoder, k_NumHufTableBits_Main, &_bitStream)
1426 #endif
1427
1428 if (sym < 256)
1429 {
1430 *winPos++ = (Byte)sym;
1431 // _lzSize++;
1432 continue;
1433 }
1434
1435 CLenType len;
1436
1437 if (sym < kSymbolRep + kNumReps)
1438 {
1439 if (sym >= kSymbolRep)
1440 {
1441 if (sym != kSymbolRep)
1442 {
1443 size_t dist = _reps[1];
1444 _reps[1] = rep0;
1445 rep0 = dist;
1446 if (sym >= kSymbolRep + 2)
1447 {
1448 #if 1
1449 rep0 = _reps[(size_t)sym - kSymbolRep];
1450 _reps[(size_t)sym - kSymbolRep] = _reps[2];
1451 _reps[2] = dist;
1452 #else
1453 if (sym != kSymbolRep + 2)
1454 {
1455 rep0 = _reps[3];
1456 _reps[3] = _reps[2];
1457 _reps[2] = dist;
1458 }
1459 else
1460 {
1461 rep0 = _reps[2];
1462 _reps[2] = dist;
1463 }
1464 #endif
1465 }
1466 }
1467 #if 0
1468 len = m_LenDecoder.Decode(&_bitStream);
1469 if (len >= kLenTableSize)
1470 LZ_LOOP_BREAK_ERROR
1471 #else
1472 Z7_RAR_HUFF_DECODE_CHECK_break(len, &m_LenDecoder, k_NumHufTableBits_Len, &_bitStream)
1473 #endif
1474 if (len >= 8)
1475 len = SlotToLen(_bitStream, len);
1476 len += 2;
1477 // _lastLen = (UInt32)len;
1478 }
1479 else if (sym != 256)
1480 {
1481 len = (CLenType)_lastLen;
1482 if (len == 0)
1483 {
1484 // we ignore (_lastLen == 0) case, like original-unrar.
1485 // that case can mean error in stream.
1486 // lzError = true;
1487 // return S_FALSE;
1488 continue;
1489 }
1490 }
1491 else
1492 {
1493 _exitType = Z7_RAR_EXIT_TYPE_ADD_FILTER;
1494 LZ_LOOP_BREAK_OK
1495 }
1496 }
1497 #if 0
1498 else if (sym >= kMainTableSize)
1499 LZ_LOOP_BREAK_ERROR
1500 #endif
1501 else
1502 {
1503 _reps[3] = _reps[2];
1504 _reps[2] = _reps[1];
1505 _reps[1] = rep0;
1506 len = sym - (kSymbolRep + kNumReps);
1507 if (len >= 8)
1508 len = SlotToLen(_bitStream, len);
1509 len += 2;
1510 // _lastLen = (UInt32)len;
1511
1512 #if 0
1513 rep0 = (UInt32)m_DistDecoder.Decode(&_bitStream);
1514 #else
1515 Z7_RAR_HUFF_DECODE_CHECK_break(rep0, &m_DistDecoder, k_NumHufTableBits_Dist, &_bitStream)
1516 #endif
1517
1518 if (rep0 >= 4)
1519 {
1520 #if 0
1521 if (rep0 >= kDistTableSize_MAX)
1522 LZ_LOOP_BREAK_ERROR
1523 #endif
1524 const unsigned numBits = ((unsigned)rep0 - 2) >> 1;
1525 rep0 = (2 | (rep0 & 1)) << numBits;
1526
1527 const Byte *buf = _bitStream._buf;
1528 #ifdef Z7_RAR5_USE_64BIT
1529 const UInt64 v = GetBe64(buf);
1530 #else
1531 const UInt32 v = GetBe32(buf);
1532 #endif
1533
1534 // _lastLen = (UInt32)len;
1535 if (numBits < kNumAlignBits)
1536 {
1537 rep0 += // _bitStream.ReadBits9(numBits);
1538 _bitStream.ReadBits_Big25(numBits, v);
1539 }
1540 else
1541 {
1542 #if !defined(MY_CPU_AMD64)
1543 len += k_LenPlusTable[numBits];
1544 #elif 0
1545 len += k_LenPlusTable_LOC[numBits];
1546 #elif 1
1547 len += m_LenPlusTable[numBits];
1548 #elif 1 && defined(MY_CPU_64BIT) && defined(MY_CPU_AMD64)
1549 // len += (unsigned)((UInt64)0xfffffffeaa554000 >> (numBits * 2)) & 3;
1550 len += (unsigned)((UInt64)0xfffffffffeaa5540 >> (numBits * 2 - 8)) & 3;
1551 #elif 1
1552 len += 3;
1553 len -= (unsigned)(numBits - 7) >> (sizeof(unsigned) * 8 - 1);
1554 len -= (unsigned)(numBits - 12) >> (sizeof(unsigned) * 8 - 1);
1555 len -= (unsigned)(numBits - 17) >> (sizeof(unsigned) * 8 - 1);
1556 #elif 1
1557 len += 3;
1558 len -= (0x155aabf >> (numBits - 4) >> (numBits - 4)) & 3;
1559 #elif 1
1560 len += (numBits >= 7);
1561 len += (numBits >= 12);
1562 len += (numBits >= 17);
1563 #endif
1564 // _lastLen = (UInt32)len;
1565 if (_useAlignBits)
1566 {
1567 // if (numBits > kNumAlignBits)
1568 rep0 += (_bitStream.ReadBits_Big25(numBits - kNumAlignBits, v) << kNumAlignBits);
1569 #if 0
1570 const unsigned a = m_AlignDecoder.Decode(&_bitStream);
1571 if (a >= kAlignTableSize)
1572 LZ_LOOP_BREAK_ERROR
1573 #else
1574 unsigned a;
1575 Z7_RAR_HUFF_DECODE_CHECK_break(a, &m_AlignDecoder, k_NumHufTableBits_Align, &_bitStream)
1576 #endif
1577 rep0 += a;
1578 }
1579 else
1580 rep0 += _bitStream.ReadBits_Big(numBits, v);
1581 #ifndef Z7_RAR5_USE_64BIT
1582 if (numBits >= 30) // we don't want 32-bit overflow case
1583 rep0 = (size_t)0 - 1 - 1;
1584 #endif
1585 }
1586 }
1587 rep0++;
1588 }
1589
1590 {
1591 _lastLen = (UInt32)len;
1592 // len != 0
1593
1594 #ifdef Z7_RAR5_SHOW_STAT
1595 {
1596 size_t index = rep0;
1597 if (index >= kNumStats1)
1598 index = kNumStats1 - 1;
1599 g_stats1[index]++;
1600 g_stats2[index][len]++;
1601 }
1602 #endif
1603
1604 Byte *dest = winPos;
1605 winPos += len;
1606 if (rep0 <= _dictSize_forCheck)
1607 {
1608 const Byte *src;
1609 const size_t winPos_temp = (size_t)(dest - _window);
1610 if (rep0 > winPos_temp)
1611 {
1612 if (_lzSize == 0)
1613 goto error_dist;
1614 size_t back = rep0 - winPos_temp;
1615 // STAT_INC(g_NumOver)
1616 src = dest + (_winSize - rep0);
1617 if (back < len)
1618 {
1619 // len -= (CLenType)back;
1620 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
1621 do
1622 *dest++ = *src++;
1623 while (--back);
1624 src = dest - rep0;
1625 }
1626 }
1627 else
1628 src = dest - rep0;
1629 CopyMatch(rep0, dest, src, winPos);
1630 continue;
1631 }
1632
1633 error_dist:
1634 // LZ_LOOP_BREAK_ERROR;
1635 _lzError = LZ_ERROR_TYPE_DIST;
1636 do
1637 *dest++ = 0;
1638 while (dest < winPos);
1639 continue;
1640 }
1641 }
1642
1643 LZ_RESTORE
1644 return S_OK;
1645
1646 #if 1
1647 decode_error:
1648 /*
1649 if (_bitStream._hres != S_OK)
1650 return _bitStream._hres;
1651 */
1652 LZ_RESTORE
1653 return S_FALSE;
1654 #endif
1655 }
1656
1657
1658
DecodeLZ()1659 HRESULT CDecoder::DecodeLZ()
1660 {
1661 CBitDecoder _bitStream;
1662 _bitStream._stream = _inStream;
1663 _bitStream._bufBase = _inputBuf;
1664 _bitStream.Init();
1665
1666 // _reps[*] can be larger than _winSize, if _winSize was reduced in solid stream.
1667 size_t winPos = _winPos;
1668 Byte *win = _window;
1669 size_t limit;
1670 {
1671 size_t rem = _winSize - winPos;
1672 if (rem > kWriteStep)
1673 rem = kWriteStep;
1674 limit = winPos + rem;
1675 }
1676
1677 for (;;)
1678 {
1679 if (winPos >= limit)
1680 {
1681 _winPos = winPos < _winSize ? winPos : _winSize;
1682 RINOK(WriteBuf())
1683 if (_unpackSize_Defined && _writtenFileSize > _unpackSize)
1684 break; // return S_FALSE;
1685 const size_t wp = _winPos;
1686 size_t rem = _winSize - wp;
1687 if (rem == 0)
1688 {
1689 _lzSize += wp;
1690 winPos -= wp;
1691 // (winPos < kMaxMatchLen < _winSize)
1692 // so memmove is not required here
1693 if (winPos)
1694 memcpy(win, win + _winSize, winPos);
1695 limit = _winSize;
1696 if (limit >= kWriteStep)
1697 {
1698 limit = kWriteStep;
1699 continue;
1700 }
1701 rem = _winSize - winPos;
1702 }
1703 if (rem > kWriteStep)
1704 rem = kWriteStep;
1705 limit = winPos + rem;
1706 continue;
1707 }
1708
1709 // (winPos < limit)
1710
1711 if (_bitStream._buf >= _bitStream._bufCheck_Block)
1712 {
1713 _winPos = winPos;
1714 if (_bitStream.InputEofError())
1715 break; // return S_FALSE;
1716 _bitStream.Prepare();
1717
1718 const UInt64 processed = _bitStream.GetProcessedSize_Round();
1719 if (processed >= _bitStream._blockEnd)
1720 {
1721 if (processed > _bitStream._blockEnd)
1722 break; // return S_FALSE;
1723 {
1724 const unsigned bits7 = _bitStream.GetProcessedBits7();
1725 if (bits7 >= _bitStream._blockEndBits7)
1726 {
1727 if (bits7 > _bitStream._blockEndBits7)
1728 {
1729 #if 1
1730 // we ignore thar error as original unrar
1731 _bitStream._minorError = true;
1732 #else
1733 break; // return S_FALSE;
1734 #endif
1735 }
1736 _bitStream.AlignToByte();
1737 // if (!_bitStream.AlignToByte()) break;
1738 if (_isLastBlock)
1739 {
1740 if (_bitStream.InputEofError())
1741 break;
1742 /*
1743 // packSize can be 15 bytes larger for encrypted archive
1744 if (_packSize_Defined && _packSize < _bitStream.GetProcessedSize())
1745 break;
1746 */
1747 if (_bitStream._minorError)
1748 return S_FALSE;
1749 return _bitStream._hres;
1750 // break;
1751 }
1752 RINOK(ReadTables(_bitStream))
1753 continue;
1754 }
1755 }
1756 }
1757
1758 // end of block was not reached.
1759 // so we must decode more symbols
1760 // that check is not required, but it can help, if there is BUG in another code
1761 if (!_tableWasFilled)
1762 break; // return S_FALSE;
1763 }
1764
1765 _limit = limit;
1766 _winPos = winPos;
1767 RINOK(DecodeLZ2(_bitStream))
1768 _bitStream._buf = _buf_Res;
1769 _bitStream._bitPos = _bitPos_Res;
1770
1771 winPos = _winPos;
1772 if (_exitType == Z7_RAR_EXIT_TYPE_ADD_FILTER)
1773 {
1774 RINOK(AddFilter(_bitStream))
1775 continue;
1776 }
1777 }
1778
1779 _winPos = winPos;
1780
1781 if (_bitStream._hres != S_OK)
1782 return _bitStream._hres;
1783
1784 return S_FALSE;
1785 }
1786
1787
1788
CodeReal()1789 HRESULT CDecoder::CodeReal()
1790 {
1791 _unsupportedFilter = false;
1792 _writeError = false;
1793 /*
1794 if (!_isSolid || !_wasInit)
1795 {
1796 _wasInit = true;
1797 // _lzSize = 0;
1798 _lzWritten = 0;
1799 _winPos = 0;
1800 for (unsigned i = 0; i < kNumReps; i++)
1801 _reps[i] = (size_t)0 - 1;
1802 _lastLen = 0;
1803 _tableWasFilled = false;
1804 }
1805 */
1806 _isLastBlock = false;
1807
1808 InitFilters();
1809
1810 _filterEnd = 0;
1811 _writtenFileSize = 0;
1812 const UInt64 lzSize = _lzSize + _winPos;
1813 _lzFileStart = lzSize;
1814 _lzWritten = lzSize;
1815
1816 HRESULT res = DecodeLZ();
1817
1818 HRESULT res2 = S_OK;
1819 if (!_writeError && res != E_OUTOFMEMORY)
1820 res2 = WriteBuf();
1821 /*
1822 if (res == S_OK)
1823 if (InputEofError())
1824 res = S_FALSE;
1825 */
1826 if (res == S_OK)
1827 {
1828 // _solidAllowed = true;
1829 res = res2;
1830 }
1831 if (res == S_OK && _unpackSize_Defined && _writtenFileSize != _unpackSize)
1832 return S_FALSE;
1833 return res;
1834 }
1835
1836
1837
Z7_COM7F_IMF(CDecoder::Code (ISequentialInStream * inStream,ISequentialOutStream * outStream,const UInt64 *,const UInt64 * outSize,ICompressProgressInfo * progress))1838 Z7_COM7F_IMF(CDecoder::Code(ISequentialInStream *inStream, ISequentialOutStream *outStream,
1839 const UInt64 * /* inSize */, const UInt64 *outSize, ICompressProgressInfo *progress))
1840 {
1841 _lzError = LZ_ERROR_TYPE_NO;
1842 /*
1843 if file is soild, but decoding of previous file was not finished,
1844 we still try to decode new file.
1845 We need correct huffman table at starting block.
1846 And rar encoder probably writes huffman table at start block, if file is big.
1847 So we have good chance to get correct huffman table in some file after corruption.
1848 Also we try to recover window by filling zeros, if previous file
1849 was decoded to smaller size than required.
1850 But if filling size is big, we do full reset of window instead.
1851 */
1852 #define Z7_RAR_RECOVER_SOLID_LIMIT (1 << 20)
1853 // #define Z7_RAR_RECOVER_SOLID_LIMIT 0 // do not fill zeros
1854 {
1855 // if (_winPos > 100) _winPos -= 100; // for debug: corruption
1856 const UInt64 lzSize = _lzSize + _winPos;
1857 if (!_isSolid || !_wasInit
1858 || (lzSize < _lzEnd
1859 #if Z7_RAR_RECOVER_SOLID_LIMIT != 0
1860 && lzSize + Z7_RAR_RECOVER_SOLID_LIMIT < _lzEnd
1861 #endif
1862 ))
1863 {
1864 if (_isSolid)
1865 _lzError = LZ_ERROR_TYPE_HEADER;
1866 _lzEnd = 0;
1867 _lzSize = 0;
1868 _lzWritten = 0;
1869 _winPos = 0;
1870 for (unsigned i = 0; i < kNumReps; i++)
1871 _reps[i] = (size_t)0 - 1;
1872 _lastLen = 0;
1873 _tableWasFilled = false;
1874 _wasInit = true;
1875 }
1876 #if Z7_RAR_RECOVER_SOLID_LIMIT != 0
1877 else if (lzSize < _lzEnd)
1878 {
1879 #if 0
1880 return S_FALSE;
1881 #else
1882 // we can report that recovering was made:
1883 // _lzError = LZ_ERROR_TYPE_HEADER;
1884 // We write zeros to area after corruption:
1885 if (_window)
1886 {
1887 UInt64 rem = _lzEnd - lzSize;
1888 const size_t ws = _winSize;
1889 if (rem >= ws)
1890 {
1891 My_ZeroMemory(_window, ws);
1892 _lzSize = ws;
1893 _winPos = 0;
1894 }
1895 else
1896 {
1897 const size_t cur = ws - _winPos;
1898 if (cur <= rem)
1899 {
1900 rem -= cur;
1901 My_ZeroMemory(_window + _winPos, cur);
1902 _lzSize += _winPos;
1903 _winPos = 0;
1904 }
1905 My_ZeroMemory(_window + _winPos, (size_t)rem);
1906 _winPos += (size_t)rem;
1907 }
1908 }
1909 // else return S_FALSE;
1910 #endif
1911 }
1912 #endif
1913 }
1914
1915 // we don't want _lzSize overflow
1916 if (_lzSize >= DICT_SIZE_MAX)
1917 _lzSize = DICT_SIZE_MAX;
1918 _lzEnd = _lzSize + _winPos;
1919 // _lzSize <= DICT_SIZE_MAX
1920 // _lzEnd <= DICT_SIZE_MAX * 2
1921
1922 size_t newSize = _dictSize;
1923 if (newSize < kWinSize_Min)
1924 newSize = kWinSize_Min;
1925
1926 _unpackSize = 0;
1927 _unpackSize_Defined = (outSize != NULL);
1928 if (_unpackSize_Defined)
1929 _unpackSize = *outSize;
1930
1931 if ((Int64)_unpackSize >= 0)
1932 _lzEnd += _unpackSize; // known end after current file
1933 else
1934 _lzEnd = 0; // unknown end
1935
1936 if (_isSolid && _window)
1937 {
1938 // If dictionary was decreased in solid, we use old dictionary.
1939 if (newSize > _dictSize_forCheck)
1940 {
1941 // If dictionary was increased in solid, we don't want grow.
1942 return S_FALSE; // E_OUTOFMEMORY
1943 }
1944 // (newSize <= _winSize)
1945 }
1946 else
1947 {
1948 _dictSize_forCheck = newSize;
1949 {
1950 size_t newSize_small = newSize;
1951 const size_t k_Win_AlignSize = 1u << 18;
1952 /* here we add (1 << 7) instead of (COPY_CHUNK_SIZE - 1), because
1953 we want to get same (_winSize) for different COPY_CHUNK_SIZE values. */
1954 // newSize += (COPY_CHUNK_SIZE - 1) + (k_Win_AlignSize - 1); // for debug : we can get smallest (_winSize)
1955 newSize += (1 << 7) + k_Win_AlignSize;
1956 newSize &= ~(size_t)(k_Win_AlignSize - 1);
1957 if (newSize < newSize_small)
1958 return E_OUTOFMEMORY;
1959 }
1960 // (!_isSolid || !_window)
1961 const size_t allocSize = newSize + kMaxMatchLen + 64;
1962 if (allocSize < newSize)
1963 return E_OUTOFMEMORY;
1964 if (!_window || allocSize > _winSize_Allocated)
1965 {
1966 Z7_RAR_FREE_WINDOW
1967 _window = NULL;
1968 _winSize_Allocated = 0;
1969 Byte *win = (Byte *)::BigAlloc(allocSize);
1970 if (!win)
1971 return E_OUTOFMEMORY;
1972 _window = win;
1973 _winSize_Allocated = allocSize;
1974 }
1975 _winSize = newSize;
1976 }
1977
1978 if (!_inputBuf)
1979 {
1980 _inputBuf = (Byte *)z7_AlignedAlloc(kInputBufSize + kInputBufferPadZone);
1981 if (!_inputBuf)
1982 return E_OUTOFMEMORY;
1983 }
1984
1985 _inStream = inStream;
1986 _outStream = outStream;
1987 _progress = progress;
1988 _progress_Pack = 0;
1989 _progress_Unpack = 0;
1990
1991 const HRESULT res = CodeReal();
1992
1993 if (res != S_OK)
1994 return res;
1995 // _lzError = LZ_ERROR_TYPE_HEADER; // for debug
1996 if (_lzError)
1997 return S_FALSE;
1998 if (_unsupportedFilter)
1999 return E_NOTIMPL;
2000 return S_OK;
2001 }
2002
2003
Z7_COM7F_IMF(CDecoder::SetDecoderProperties2 (const Byte * data,UInt32 size))2004 Z7_COM7F_IMF(CDecoder::SetDecoderProperties2(const Byte *data, UInt32 size))
2005 {
2006 if (size != 2)
2007 return E_INVALIDARG;
2008 const unsigned pow = data[0];
2009 const unsigned b1 = data[1];
2010 const unsigned frac = b1 >> 3;
2011 // unsigned pow = 15 + 8;
2012 // unsigned frac = 1;
2013 if (pow + ((frac + 31) >> 5) > MAX_DICT_LOG - 17)
2014 // if (frac + (pow << 8) >= ((8 * 2 + 7) << 5) + 8 / 8)
2015 return E_NOTIMPL;
2016 _dictSize = (size_t)(frac + 32) << (pow + 12);
2017 _isSolid = (b1 & 1) != 0;
2018 _is_v7 = (b1 & 2) != 0;
2019 // printf("\ndict size = %p\n", (void *)(size_t)_dictSize);
2020 return S_OK;
2021 }
2022
2023 }}
2024