xref: /aosp_15_r20/external/lzma/CPP/7zip/Compress/LzmsDecoder.cpp (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1 // LzmsDecoder.cpp
2 // The code is based on LZMS description from wimlib code
3 
4 #include "StdAfx.h"
5 
6 #include "../../../C/Alloc.h"
7 
8 #include "LzmsDecoder.h"
9 
10 namespace NCompress {
11 namespace NLzms {
12 
13 class CBitDecoder
14 {
15 public:
16   const Byte *_buf;
17   unsigned _bitPos;
18 
Init(const Byte * buf,size_t size)19   void Init(const Byte *buf, size_t size) throw()
20   {
21     _buf = buf + size;
22     _bitPos = 0;
23   }
24 
25   Z7_FORCE_INLINE
GetValue(unsigned numBits) const26   UInt32 GetValue(unsigned numBits) const
27   {
28     UInt32 v =
29         ((UInt32)_buf[-1] << 16) |
30         ((UInt32)_buf[-2] << 8) |
31          (UInt32)_buf[-3];
32     v >>= 24 - numBits - _bitPos;
33     return v & ((1u << numBits) - 1);
34   }
35 
36   Z7_FORCE_INLINE
GetValue_InHigh32bits()37   UInt32 GetValue_InHigh32bits()
38   {
39     return GetUi32(_buf - 4) << _bitPos;
40   }
41 
MovePos(unsigned numBits)42   void MovePos(unsigned numBits)
43   {
44     _bitPos += numBits;
45     _buf -= (_bitPos >> 3);
46     _bitPos &= 7;
47   }
48 
ReadBits32(unsigned numBits)49   UInt32 ReadBits32(unsigned numBits)
50   {
51     UInt32 mask = (((UInt32)1 << numBits) - 1);
52     numBits += _bitPos;
53     const Byte *buf = _buf;
54     UInt32 v = GetUi32(buf - 4);
55     if (numBits > 32)
56     {
57       v <<= (numBits - 32);
58       v |= (UInt32)buf[-5] >> (40 - numBits);
59     }
60     else
61       v >>= (32 - numBits);
62     _buf = buf - (numBits >> 3);
63     _bitPos = numBits & 7;
64     return v & mask;
65   }
66 };
67 
68 static UInt32 g_PosBases[k_NumPosSyms /* + 1 */];
69 
70 static Byte g_PosDirectBits[k_NumPosSyms];
71 
72 static const Byte k_PosRuns[31] =
73 {
74   8, 0, 9, 7, 10, 15, 15, 20, 20, 30, 33, 40, 42, 45, 60, 73,
75   80, 85, 95, 105, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
76 };
77 
78 static UInt32 g_LenBases[k_NumLenSyms];
79 
80 static const Byte k_LenDirectBits[k_NumLenSyms] =
81 {
82   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
83   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2,
84   2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 6,
85   7, 8, 9, 10, 16, 30,
86 };
87 
88 static struct CInit
89 {
CInitNCompress::NLzms::CInit90   CInit()
91   {
92     {
93       unsigned sum = 0;
94       for (unsigned i = 0; i < sizeof(k_PosRuns); i++)
95       {
96         unsigned t = k_PosRuns[i];
97         for (unsigned y = 0; y < t; y++)
98           g_PosDirectBits[sum + y] = (Byte)i;
99         sum += t;
100       }
101     }
102     {
103       UInt32 sum = 1;
104       for (unsigned i = 0; i < k_NumPosSyms; i++)
105       {
106         g_PosBases[i] = sum;
107         sum += (UInt32)1 << g_PosDirectBits[i];
108       }
109       // g_PosBases[k_NumPosSyms] = sum;
110     }
111     {
112       UInt32 sum = 1;
113       for (unsigned i = 0; i < k_NumLenSyms; i++)
114       {
115         g_LenBases[i] = sum;
116         sum += (UInt32)1 << k_LenDirectBits[i];
117       }
118     }
119   }
120 } g_Init;
121 
GetNumPosSlots(size_t size)122 static unsigned GetNumPosSlots(size_t size)
123 {
124   if (size < 2)
125     return 0;
126 
127   size--;
128 
129   if (size >= g_PosBases[k_NumPosSyms - 1])
130     return k_NumPosSyms;
131   unsigned left = 0;
132   unsigned right = k_NumPosSyms;
133   for (;;)
134   {
135     const unsigned m = (left + right) / 2;
136     if (left == m)
137       return m + 1;
138     if (size >= g_PosBases[m])
139       left = m;
140     else
141       right = m;
142   }
143 }
144 
145 
146 static const Int32 k_x86_WindowSize = 65535;
147 static const Int32 k_x86_TransOffset = 1023;
148 
149 static const size_t k_x86_HistorySize = 1 << 16;
150 
x86_Filter(Byte * data,UInt32 size,Int32 * history)151 static void x86_Filter(Byte *data, UInt32 size, Int32 *history)
152 {
153   if (size <= 17)
154     return;
155 
156   Byte isCode[256];
157   memset(isCode, 0, 256);
158   isCode[0x48] = 1;
159   isCode[0x4C] = 1;
160   isCode[0xE8] = 1;
161   isCode[0xE9] = 1;
162   isCode[0xF0] = 1;
163   isCode[0xFF] = 1;
164 
165   {
166     for (size_t i = 0; i < k_x86_HistorySize; i++)
167       history[i] = -(Int32)k_x86_WindowSize - 1;
168   }
169 
170   size -= 16;
171   const unsigned kSave = 6;
172   const Byte savedByte = data[(size_t)size + kSave];
173   data[(size_t)size + kSave] = 0xE8;
174   Int32 last_x86_pos = -k_x86_TransOffset - 1;
175 
176   // first byte is ignored
177   Int32 i = 0;
178 
179   for (;;)
180   {
181     Byte *p = data + (UInt32)i;
182 
183     for (;;)
184     {
185       if (isCode[*(++p)]) break;
186       if (isCode[*(++p)]) break;
187     }
188 
189     i = (Int32)(p - data);
190     if ((UInt32)i >= size)
191       break;
192 
193     UInt32 codeLen;
194 
195     Int32 maxTransOffset = k_x86_TransOffset;
196 
197     const Byte b = p[0];
198 
199     if ((b & 0x80) == 0) // REX (0x48 or 0x4c)
200     {
201       const unsigned b2 = p[2] - 0x5; // [RIP + disp32]
202       if (b2 & 0x7)
203         continue;
204       if (p[1] != 0x8d) // LEA
205       {
206         if (p[1] != 0x8b || b != 0x48 || (b2 & 0xf7))
207           continue;
208         // MOV RAX / RCX, [RIP + disp32]
209       }
210       codeLen = 3;
211     }
212     else if (b == 0xE8)
213     {
214       // CALL
215       codeLen = 1;
216       maxTransOffset /= 2;
217     }
218     else if (b == 0xE9)
219     {
220       // JUMP
221       i += 4;
222       continue;
223     }
224     else if (b == 0xF0)
225     {
226       if (p[1] != 0x83 || p[2] != 0x05)
227         continue;
228       // LOCK ADD [RIP + disp32], imm8
229       // LOCK ADD [disp32], imm8
230       codeLen = 3;
231     }
232     else
233     // if (b == 0xFF)
234     {
235       if (p[1] != 0x15)
236         continue;
237       // CALL [RIP + disp32];
238       // CALL [disp32];
239       codeLen = 2;
240     }
241 
242     Int32 *target;
243     {
244       Byte *p2 = p + codeLen;
245       UInt32 n = GetUi32(p2);
246       if (i - last_x86_pos <= maxTransOffset)
247       {
248         n = (UInt32)((Int32)n - i);
249         SetUi32(p2, n)
250       }
251       target = history + (((UInt32)i + n) & 0xFFFF);
252     }
253 
254     i += (Int32)(codeLen + sizeof(UInt32) - 1);
255 
256     if (i - *target <= k_x86_WindowSize)
257       last_x86_pos = i;
258     *target = i;
259   }
260 
261   data[(size_t)size + kSave] = savedByte;
262 }
263 
264 
265 
266 // static const int kLenIdNeedInit = -2;
267 
CDecoder()268 CDecoder::CDecoder():
269   _x86_history(NULL)
270 {
271 }
272 
~CDecoder()273 CDecoder::~CDecoder()
274 {
275   ::MidFree(_x86_history);
276 }
277 
278 // #define RIF(x) { if (!(x)) return false; }
279 
280 #define LIMIT_CHECK if (_bs._buf < _rc.cur) return S_FALSE;
281 // #define LIMIT_CHECK
282 
283 #define READ_BITS_CHECK(numDirectBits) \
284   if (_bs._buf < _rc.cur) return S_FALSE; \
285   if ((size_t)(_bs._buf - _rc.cur) < (numDirectBits >> 3)) return S_FALSE;
286 
287 
288 #define HUFF_DEC(sym, pp) \
289     sym = pp.DecodeFull(&_bs); \
290     pp.Freqs[sym]++; \
291     if (--pp.RebuildRem == 0) pp.Rebuild();
292 
293 
CodeReal(const Byte * in,size_t inSize,Byte * _win,size_t outSize)294 HRESULT CDecoder::CodeReal(const Byte *in, size_t inSize, Byte *_win, size_t outSize)
295 {
296   // size_t inSizeT = (size_t)(inSize);
297   // Byte *_win;
298   // size_t _pos;
299   _pos = 0;
300 
301   CBitDecoder _bs;
302   CRangeDecoder _rc;
303 
304   if (inSize < 8 || (inSize & 1) != 0)
305     return S_FALSE;
306   _rc.Init(in, inSize);
307   if (_rc.code >= _rc.range)
308     return S_FALSE;
309   _bs.Init(in, inSize);
310 
311   {
312     {
313       {
314         for (unsigned i = 0 ; i < k_NumReps + 1; i++)
315           _reps[i] = i + 1;
316       }
317 
318       {
319         for (unsigned i = 0 ; i < k_NumReps + 1; i++)
320           _deltaReps[i] = i + 1;
321       }
322 
323       mainState = 0;
324       matchState = 0;
325 
326       { for (size_t i = 0; i < k_NumMainProbs; i++) mainProbs[i].Init(); }
327       { for (size_t i = 0; i < k_NumMatchProbs; i++) matchProbs[i].Init(); }
328 
329       {
330         for (size_t k = 0; k < k_NumReps; k++)
331         {
332           lzRepStates[k] = 0;
333           for (size_t i = 0; i < k_NumRepProbs; i++)
334             lzRepProbs[k][i].Init();
335         }
336       }
337       {
338         for (size_t k = 0; k < k_NumReps; k++)
339         {
340           deltaRepStates[k] = 0;
341           for (size_t i = 0; i < k_NumRepProbs; i++)
342             deltaRepProbs[k][i].Init();
343         }
344       }
345 
346       m_LitDecoder.Init();
347       m_LenDecoder.Init();
348       m_PowerDecoder.Init();
349       unsigned numPosSyms = GetNumPosSlots(outSize);
350       if (numPosSyms < 2)
351         numPosSyms = 2;
352       m_PosDecoder.Init(numPosSyms);
353       m_DeltaDecoder.Init(numPosSyms);
354     }
355   }
356 
357   {
358     unsigned prevType = 0;
359 
360     while (_pos < outSize)
361     {
362       if (_rc.Decode(&mainState, k_NumMainProbs, mainProbs) == 0)
363       {
364         unsigned number;
365         HUFF_DEC(number, m_LitDecoder)
366         LIMIT_CHECK
367         _win[_pos++] = (Byte)number;
368         prevType = 0;
369       }
370       else if (_rc.Decode(&matchState, k_NumMatchProbs, matchProbs) == 0)
371       {
372         UInt32 distance;
373 
374         if (_rc.Decode(&lzRepStates[0], k_NumRepProbs, lzRepProbs[0]) == 0)
375         {
376           unsigned number;
377           HUFF_DEC(number, m_PosDecoder)
378           LIMIT_CHECK
379 
380           const unsigned numDirectBits = g_PosDirectBits[number];
381           distance = g_PosBases[number];
382           READ_BITS_CHECK(numDirectBits)
383           distance += _bs.ReadBits32(numDirectBits);
384           // LIMIT_CHECK
385           _reps[3] = _reps[2];
386           _reps[2] = _reps[1];
387           _reps[1] = _reps[0];
388           _reps[0] = distance;
389         }
390         else
391         {
392           if (_rc.Decode(&lzRepStates[1], k_NumRepProbs, lzRepProbs[1]) == 0)
393           {
394             if (prevType != 1)
395               distance = _reps[0];
396             else
397             {
398               distance = _reps[1];
399               _reps[1] = _reps[0];
400               _reps[0] = distance;
401             }
402           }
403           else if (_rc.Decode(&lzRepStates[2], k_NumRepProbs, lzRepProbs[2]) == 0)
404           {
405             if (prevType != 1)
406             {
407               distance = _reps[1];
408               _reps[1] = _reps[0];
409               _reps[0] = distance;
410             }
411             else
412             {
413               distance = _reps[2];
414               _reps[2] = _reps[1];
415               _reps[1] = _reps[0];
416               _reps[0] = distance;
417             }
418           }
419           else
420           {
421             if (prevType != 1)
422             {
423               distance = _reps[2];
424               _reps[2] = _reps[1];
425               _reps[1] = _reps[0];
426               _reps[0] = distance;
427             }
428             else
429             {
430               distance = _reps[3];
431               _reps[3] = _reps[2];
432               _reps[2] = _reps[1];
433               _reps[1] = _reps[0];
434               _reps[0] = distance;
435             }
436           }
437         }
438 
439         unsigned lenSlot;
440         HUFF_DEC(lenSlot, m_LenDecoder)
441         LIMIT_CHECK
442 
443         UInt32 len = g_LenBases[lenSlot];
444         {
445           const unsigned numDirectBits = k_LenDirectBits[lenSlot];
446           READ_BITS_CHECK(numDirectBits)
447           len += _bs.ReadBits32(numDirectBits);
448         }
449         // LIMIT_CHECK
450 
451         if (len > outSize - _pos)
452           return S_FALSE;
453 
454         if (distance > _pos)
455           return S_FALSE;
456 
457         Byte *dest = _win + _pos;
458         const Byte *src = dest - distance;
459         _pos += len;
460         do
461           *dest++ = *src++;
462         while (--len);
463 
464         prevType = 1;
465       }
466       else
467       {
468         UInt64 distance;
469 
470         unsigned power;
471         UInt32 distance32;
472 
473         if (_rc.Decode(&deltaRepStates[0], k_NumRepProbs, deltaRepProbs[0]) == 0)
474         {
475           HUFF_DEC(power, m_PowerDecoder)
476           LIMIT_CHECK
477 
478           unsigned number;
479           HUFF_DEC(number, m_DeltaDecoder)
480           LIMIT_CHECK
481 
482           const unsigned numDirectBits = g_PosDirectBits[number];
483           distance32 = g_PosBases[number];
484           READ_BITS_CHECK(numDirectBits)
485           distance32 += _bs.ReadBits32(numDirectBits);
486           // LIMIT_CHECK
487 
488           distance = ((UInt64)power << 32) | distance32;
489 
490           _deltaReps[3] = _deltaReps[2];
491           _deltaReps[2] = _deltaReps[1];
492           _deltaReps[1] = _deltaReps[0];
493           _deltaReps[0] = distance;
494         }
495         else
496         {
497           if (_rc.Decode(&deltaRepStates[1], k_NumRepProbs, deltaRepProbs[1]) == 0)
498           {
499             if (prevType != 2)
500               distance = _deltaReps[0];
501             else
502             {
503               distance = _deltaReps[1];
504               _deltaReps[1] = _deltaReps[0];
505               _deltaReps[0] = distance;
506             }
507           }
508           else if (_rc.Decode(&deltaRepStates[2], k_NumRepProbs, deltaRepProbs[2]) == 0)
509           {
510             if (prevType != 2)
511             {
512               distance = _deltaReps[1];
513               _deltaReps[1] = _deltaReps[0];
514               _deltaReps[0] = distance;
515             }
516             else
517             {
518               distance = _deltaReps[2];
519               _deltaReps[2] = _deltaReps[1];
520               _deltaReps[1] = _deltaReps[0];
521               _deltaReps[0] = distance;
522             }
523           }
524           else
525           {
526             if (prevType != 2)
527             {
528               distance = _deltaReps[2];
529               _deltaReps[2] = _deltaReps[1];
530               _deltaReps[1] = _deltaReps[0];
531               _deltaReps[0] = distance;
532             }
533             else
534             {
535               distance = _deltaReps[3];
536               _deltaReps[3] = _deltaReps[2];
537               _deltaReps[2] = _deltaReps[1];
538               _deltaReps[1] = _deltaReps[0];
539               _deltaReps[0] = distance;
540             }
541           }
542           distance32 = (UInt32)_deltaReps[0] & 0xFFFFFFFF;
543           power = (UInt32)(_deltaReps[0] >> 32);
544         }
545 
546         const UInt32 dist = (distance32 << power);
547 
548         unsigned lenSlot;
549         HUFF_DEC(lenSlot, m_LenDecoder)
550         LIMIT_CHECK
551 
552         UInt32 len = g_LenBases[lenSlot];
553         {
554           const unsigned numDirectBits = k_LenDirectBits[lenSlot];
555           READ_BITS_CHECK(numDirectBits)
556           len += _bs.ReadBits32(numDirectBits);
557         }
558         // LIMIT_CHECK
559 
560         if (len > outSize - _pos)
561           return S_FALSE;
562 
563         size_t span = (size_t)1 << power;
564         if ((UInt64)dist + span > _pos)
565           return S_FALSE;
566         Byte *dest = _win + _pos - span;
567         const Byte *src = dest - dist;
568         _pos += len;
569         do
570         {
571           *(dest + span) = (Byte)(*(dest) + *(src + span) - *(src));
572           src++;
573           dest++;
574         }
575         while (--len);
576 
577         prevType = 2;
578       }
579     }
580   }
581 
582   _rc.Normalize();
583   if (_rc.code != 0)
584     return S_FALSE;
585   if (_rc.cur > _bs._buf
586       || (_rc.cur == _bs._buf && _bs._bitPos != 0))
587     return S_FALSE;
588 
589   /*
590   int delta = (int)(_bs._buf - _rc.cur);
591   if (_bs._bitPos != 0)
592     delta--;
593   if ((delta & 1))
594     delta--;
595   printf("%d ", delta);
596   */
597 
598   return S_OK;
599 }
600 
Code(const Byte * in,size_t inSize,Byte * out,size_t outSize)601 HRESULT CDecoder::Code(const Byte *in, size_t inSize, Byte *out, size_t outSize)
602 {
603   if (!_x86_history)
604   {
605     _x86_history = (Int32 *)::MidAlloc(sizeof(Int32) * k_x86_HistorySize);
606     if (!_x86_history)
607       return E_OUTOFMEMORY;
608   }
609   HRESULT res;
610   // try
611   {
612     res = CodeReal(in, inSize, out, outSize);
613   }
614   // catch (...) { res = S_FALSE; }
615   x86_Filter(out, (UInt32)_pos, _x86_history);
616   return res;
617 }
618 
619 }}
620