xref: /aosp_15_r20/external/lzma/C/Ppmd7.c (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1 /* Ppmd7.c -- PPMdH codec
2 2023-09-07 : Igor Pavlov : Public domain
3 This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
4 
5 #include "Precomp.h"
6 
7 #include <string.h>
8 
9 #include "Ppmd7.h"
10 
11 /* define PPMD7_ORDER_0_SUPPPORT to suport order-0 mode, unsupported by orignal PPMd var.H. code */
12 // #define PPMD7_ORDER_0_SUPPPORT
13 
14 MY_ALIGN(16)
15 static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
16 MY_ALIGN(16)
17 static const UInt16 PPMD7_kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
18 
19 #define MAX_FREQ 124
20 #define UNIT_SIZE 12
21 
22 #define U2B(nu) ((UInt32)(nu) * UNIT_SIZE)
23 #define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1])
24 #define I2U(indx) ((unsigned)p->Indx2Units[indx])
25 #define I2U_UInt16(indx) ((UInt16)p->Indx2Units[indx])
26 
27 #define REF(ptr) Ppmd_GetRef(p, ptr)
28 
29 #define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr))
30 
31 #define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
32 #define STATS(ctx) Ppmd7_GetStats(p, ctx)
33 #define ONE_STATE(ctx) Ppmd7Context_OneState(ctx)
34 #define SUFFIX(ctx) CTX((ctx)->Suffix)
35 
36 typedef CPpmd7_Context * PPMD7_CTX_PTR;
37 
38 struct CPpmd7_Node_;
39 
40 typedef Ppmd_Ref_Type(struct CPpmd7_Node_) CPpmd7_Node_Ref;
41 
42 typedef struct CPpmd7_Node_
43 {
44   UInt16 Stamp; /* must be at offset 0 as CPpmd7_Context::NumStats. Stamp=0 means free */
45   UInt16 NU;
46   CPpmd7_Node_Ref Next; /* must be at offset >= 4 */
47   CPpmd7_Node_Ref Prev;
48 } CPpmd7_Node;
49 
50 #define NODE(r)  Ppmd_GetPtr_Type(p, r, CPpmd7_Node)
51 
Ppmd7_Construct(CPpmd7 * p)52 void Ppmd7_Construct(CPpmd7 *p)
53 {
54   unsigned i, k, m;
55 
56   p->Base = NULL;
57 
58   for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++)
59   {
60     unsigned step = (i >= 12 ? 4 : (i >> 2) + 1);
61     do { p->Units2Indx[k++] = (Byte)i; } while (--step);
62     p->Indx2Units[i] = (Byte)k;
63   }
64 
65   p->NS2BSIndx[0] = (0 << 1);
66   p->NS2BSIndx[1] = (1 << 1);
67   memset(p->NS2BSIndx + 2, (2 << 1), 9);
68   memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11);
69 
70   for (i = 0; i < 3; i++)
71     p->NS2Indx[i] = (Byte)i;
72 
73   for (m = i, k = 1; i < 256; i++)
74   {
75     p->NS2Indx[i] = (Byte)m;
76     if (--k == 0)
77       k = (++m) - 2;
78   }
79 
80   memcpy(p->ExpEscape, PPMD7_kExpEscape, 16);
81 }
82 
83 
Ppmd7_Free(CPpmd7 * p,ISzAllocPtr alloc)84 void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc)
85 {
86   ISzAlloc_Free(alloc, p->Base);
87   p->Size = 0;
88   p->Base = NULL;
89 }
90 
91 
Ppmd7_Alloc(CPpmd7 * p,UInt32 size,ISzAllocPtr alloc)92 BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc)
93 {
94   if (!p->Base || p->Size != size)
95   {
96     Ppmd7_Free(p, alloc);
97     p->AlignOffset = (4 - size) & 3;
98     if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL)
99       return False;
100     p->Size = size;
101   }
102   return True;
103 }
104 
105 
106 
107 // ---------- Internal Memory Allocator ----------
108 
109 /* We can use CPpmd7_Node in list of free units (as in Ppmd8)
110    But we still need one additional list walk pass in Ppmd7_GlueFreeBlocks().
111    So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in Ppmd7_InsertNode() / Ppmd7_RemoveNode()
112 */
113 
114 #define EMPTY_NODE 0
115 
116 
Ppmd7_InsertNode(CPpmd7 * p,void * node,unsigned indx)117 static void Ppmd7_InsertNode(CPpmd7 *p, void *node, unsigned indx)
118 {
119   *((CPpmd_Void_Ref *)node) = p->FreeList[indx];
120   // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx];
121 
122   p->FreeList[indx] = REF(node);
123 
124 }
125 
126 
Ppmd7_RemoveNode(CPpmd7 * p,unsigned indx)127 static void *Ppmd7_RemoveNode(CPpmd7 *p, unsigned indx)
128 {
129   CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]);
130   p->FreeList[indx] = *node;
131   // CPpmd7_Node *node = NODE((CPpmd7_Node_Ref)p->FreeList[indx]);
132   // p->FreeList[indx] = node->Next;
133   return node;
134 }
135 
136 
Ppmd7_SplitBlock(CPpmd7 * p,void * ptr,unsigned oldIndx,unsigned newIndx)137 static void Ppmd7_SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
138 {
139   unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
140   ptr = (Byte *)ptr + U2B(I2U(newIndx));
141   if (I2U(i = U2I(nu)) != nu)
142   {
143     unsigned k = I2U(--i);
144     Ppmd7_InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1);
145   }
146   Ppmd7_InsertNode(p, ptr, i);
147 }
148 
149 
150 /* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */
151 
152 typedef union
153 {
154   CPpmd7_Node     Node;
155   CPpmd7_Node_Ref NextRef;
156 } CPpmd7_Node_Union;
157 
158 /* Original PPmdH (Ppmd7) code uses doubly linked list in Ppmd7_GlueFreeBlocks()
159    we use single linked list similar to Ppmd8 code */
160 
161 
Ppmd7_GlueFreeBlocks(CPpmd7 * p)162 static void Ppmd7_GlueFreeBlocks(CPpmd7 *p)
163 {
164   /*
165   we use first UInt16 field of 12-bytes UNITs as record type stamp
166     CPpmd_State    { Byte Symbol; Byte Freq; : Freq != 0
167     CPpmd7_Context { UInt16 NumStats;        : NumStats != 0
168     CPpmd7_Node    { UInt16 Stamp            : Stamp == 0 for free record
169                                              : Stamp == 1 for head record and guard
170     Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd7_Context record.
171   */
172   CPpmd7_Node_Ref head, n = 0;
173 
174   p->GlueCount = 255;
175 
176 
177   /* we set guard NODE at LoUnit */
178   if (p->LoUnit != p->HiUnit)
179     ((CPpmd7_Node *)(void *)p->LoUnit)->Stamp = 1;
180 
181   {
182     /* Create list of free blocks.
183        We still need one additional list walk pass before Glue. */
184     unsigned i;
185     for (i = 0; i < PPMD_NUM_INDEXES; i++)
186     {
187       const UInt16 nu = I2U_UInt16(i);
188       CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i];
189       p->FreeList[i] = 0;
190       while (next != 0)
191       {
192         /* Don't change the order of the following commands: */
193         CPpmd7_Node_Union *un = (CPpmd7_Node_Union *)NODE(next);
194         const CPpmd7_Node_Ref tmp = next;
195         next = un->NextRef;
196         un->Node.Stamp = EMPTY_NODE;
197         un->Node.NU = nu;
198         un->Node.Next = n;
199         n = tmp;
200       }
201     }
202   }
203 
204   head = n;
205   /* Glue and Fill must walk the list in same direction */
206   {
207     /* Glue free blocks */
208     CPpmd7_Node_Ref *prev = &head;
209     while (n)
210     {
211       CPpmd7_Node *node = NODE(n);
212       UInt32 nu = node->NU;
213       n = node->Next;
214       if (nu == 0)
215       {
216         *prev = n;
217         continue;
218       }
219       prev = &node->Next;
220       for (;;)
221       {
222         CPpmd7_Node *node2 = node + nu;
223         nu += node2->NU;
224         if (node2->Stamp != EMPTY_NODE || nu >= 0x10000)
225           break;
226         node->NU = (UInt16)nu;
227         node2->NU = 0;
228       }
229     }
230   }
231 
232   /* Fill lists of free blocks */
233   for (n = head; n != 0;)
234   {
235     CPpmd7_Node *node = NODE(n);
236     UInt32 nu = node->NU;
237     unsigned i;
238     n = node->Next;
239     if (nu == 0)
240       continue;
241     for (; nu > 128; nu -= 128, node += 128)
242       Ppmd7_InsertNode(p, node, PPMD_NUM_INDEXES - 1);
243     if (I2U(i = U2I(nu)) != nu)
244     {
245       unsigned k = I2U(--i);
246       Ppmd7_InsertNode(p, node + k, (unsigned)nu - k - 1);
247     }
248     Ppmd7_InsertNode(p, node, i);
249   }
250 }
251 
252 
253 Z7_NO_INLINE
Ppmd7_AllocUnitsRare(CPpmd7 * p,unsigned indx)254 static void *Ppmd7_AllocUnitsRare(CPpmd7 *p, unsigned indx)
255 {
256   unsigned i;
257 
258   if (p->GlueCount == 0)
259   {
260     Ppmd7_GlueFreeBlocks(p);
261     if (p->FreeList[indx] != 0)
262       return Ppmd7_RemoveNode(p, indx);
263   }
264 
265   i = indx;
266 
267   do
268   {
269     if (++i == PPMD_NUM_INDEXES)
270     {
271       UInt32 numBytes = U2B(I2U(indx));
272       Byte *us = p->UnitsStart;
273       p->GlueCount--;
274       return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : NULL;
275     }
276   }
277   while (p->FreeList[i] == 0);
278 
279   {
280     void *block = Ppmd7_RemoveNode(p, i);
281     Ppmd7_SplitBlock(p, block, i, indx);
282     return block;
283   }
284 }
285 
286 
Ppmd7_AllocUnits(CPpmd7 * p,unsigned indx)287 static void *Ppmd7_AllocUnits(CPpmd7 *p, unsigned indx)
288 {
289   if (p->FreeList[indx] != 0)
290     return Ppmd7_RemoveNode(p, indx);
291   {
292     UInt32 numBytes = U2B(I2U(indx));
293     Byte *lo = p->LoUnit;
294     if ((UInt32)(p->HiUnit - lo) >= numBytes)
295     {
296       p->LoUnit = lo + numBytes;
297       return lo;
298     }
299   }
300   return Ppmd7_AllocUnitsRare(p, indx);
301 }
302 
303 
304 #define MEM_12_CPY(dest, src, num) \
305   { UInt32 *d = (UInt32 *)(dest); \
306     const UInt32 *z = (const UInt32 *)(src); \
307     unsigned n = (num); \
308     do { \
309       d[0] = z[0]; \
310       d[1] = z[1]; \
311       d[2] = z[2]; \
312       z += 3; \
313       d += 3; \
314     } while (--n); \
315   }
316 
317 
318 /*
319 static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU)
320 {
321   unsigned i0 = U2I(oldNU);
322   unsigned i1 = U2I(newNU);
323   if (i0 == i1)
324     return oldPtr;
325   if (p->FreeList[i1] != 0)
326   {
327     void *ptr = Ppmd7_RemoveNode(p, i1);
328     MEM_12_CPY(ptr, oldPtr, newNU)
329     Ppmd7_InsertNode(p, oldPtr, i0);
330     return ptr;
331   }
332   Ppmd7_SplitBlock(p, oldPtr, i0, i1);
333   return oldPtr;
334 }
335 */
336 
337 
338 #define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
SetSuccessor(CPpmd_State * p,CPpmd_Void_Ref v)339 static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
340 {
341   Ppmd_SET_SUCCESSOR(p, v)
342 }
343 
344 
345 
346 Z7_NO_INLINE
347 static
Ppmd7_RestartModel(CPpmd7 * p)348 void Ppmd7_RestartModel(CPpmd7 *p)
349 {
350   unsigned i, k;
351 
352   memset(p->FreeList, 0, sizeof(p->FreeList));
353 
354   p->Text = p->Base + p->AlignOffset;
355   p->HiUnit = p->Text + p->Size;
356   p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE;
357   p->GlueCount = 0;
358 
359   p->OrderFall = p->MaxOrder;
360   p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1;
361   p->PrevSuccess = 0;
362 
363   {
364     CPpmd7_Context *mc = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
365     CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* Ppmd7_AllocUnits(p, PPMD_NUM_INDEXES - 1); */
366 
367     p->LoUnit += U2B(256 / 2);
368     p->MaxContext = p->MinContext = mc;
369     p->FoundState = s;
370 
371     mc->NumStats = 256;
372     mc->Union2.SummFreq = 256 + 1;
373     mc->Union4.Stats = REF(s);
374     mc->Suffix = 0;
375 
376     for (i = 0; i < 256; i++, s++)
377     {
378       s->Symbol = (Byte)i;
379       s->Freq = 1;
380       SetSuccessor(s, 0);
381     }
382 
383     #ifdef PPMD7_ORDER_0_SUPPPORT
384     if (p->MaxOrder == 0)
385     {
386       CPpmd_Void_Ref r = REF(mc);
387       s = p->FoundState;
388       for (i = 0; i < 256; i++, s++)
389         SetSuccessor(s, r);
390       return;
391     }
392     #endif
393   }
394 
395   for (i = 0; i < 128; i++)
396 
397 
398 
399     for (k = 0; k < 8; k++)
400     {
401       unsigned m;
402       UInt16 *dest = p->BinSumm[i] + k;
403       const UInt16 val = (UInt16)(PPMD_BIN_SCALE - PPMD7_kInitBinEsc[k] / (i + 2));
404       for (m = 0; m < 64; m += 8)
405         dest[m] = val;
406     }
407 
408 
409   for (i = 0; i < 25; i++)
410   {
411 
412     CPpmd_See *s = p->See[i];
413 
414 
415 
416     unsigned summ = ((5 * i + 10) << (PPMD_PERIOD_BITS - 4));
417     for (k = 0; k < 16; k++, s++)
418     {
419       s->Summ = (UInt16)summ;
420       s->Shift = (PPMD_PERIOD_BITS - 4);
421       s->Count = 4;
422     }
423   }
424 
425   p->DummySee.Summ = 0; /* unused */
426   p->DummySee.Shift = PPMD_PERIOD_BITS;
427   p->DummySee.Count = 64; /* unused */
428 }
429 
430 
Ppmd7_Init(CPpmd7 * p,unsigned maxOrder)431 void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
432 {
433   p->MaxOrder = maxOrder;
434 
435   Ppmd7_RestartModel(p);
436 }
437 
438 
439 
440 /*
441   Ppmd7_CreateSuccessors()
442   It's called when (FoundState->Successor) is RAW-Successor,
443   that is the link to position in Raw text.
444   So we create Context records and write the links to
445   FoundState->Successor and to identical RAW-Successors in suffix
446   contexts of MinContex.
447 
448   The function returns:
449   if (OrderFall == 0) then MinContext is already at MAX order,
450     { return pointer to new or existing context of same MAX order }
451   else
452     { return pointer to new real context that will be (Order+1) in comparison with MinContext
453 
454   also it can return pointer to real context of same order,
455 */
456 
457 Z7_NO_INLINE
Ppmd7_CreateSuccessors(CPpmd7 * p)458 static PPMD7_CTX_PTR Ppmd7_CreateSuccessors(CPpmd7 *p)
459 {
460   PPMD7_CTX_PTR c = p->MinContext;
461   CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
462   Byte newSym, newFreq;
463   unsigned numPs = 0;
464   CPpmd_State *ps[PPMD7_MAX_ORDER];
465 
466   if (p->OrderFall != 0)
467     ps[numPs++] = p->FoundState;
468 
469   while (c->Suffix)
470   {
471     CPpmd_Void_Ref successor;
472     CPpmd_State *s;
473     c = SUFFIX(c);
474 
475 
476     if (c->NumStats != 1)
477     {
478       Byte sym = p->FoundState->Symbol;
479       for (s = STATS(c); s->Symbol != sym; s++);
480 
481     }
482     else
483     {
484       s = ONE_STATE(c);
485 
486     }
487     successor = SUCCESSOR(s);
488     if (successor != upBranch)
489     {
490       // (c) is real record Context here,
491       c = CTX(successor);
492       if (numPs == 0)
493       {
494         // (c) is real record MAX Order Context here,
495         // So we don't need to create any new contexts.
496         return c;
497       }
498       break;
499     }
500     ps[numPs++] = s;
501   }
502 
503   // All created contexts will have single-symbol with new RAW-Successor
504   // All new RAW-Successors will point to next position in RAW text
505   // after FoundState->Successor
506 
507   newSym = *(const Byte *)Ppmd7_GetPtr(p, upBranch);
508   upBranch++;
509 
510 
511   if (c->NumStats == 1)
512     newFreq = ONE_STATE(c)->Freq;
513   else
514   {
515     UInt32 cf, s0;
516     CPpmd_State *s;
517     for (s = STATS(c); s->Symbol != newSym; s++);
518     cf = (UInt32)s->Freq - 1;
519     s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf;
520     /*
521       cf - is frequency of symbol that will be Successor in new context records.
522       s0 - is commulative frequency sum of another symbols from parent context.
523       max(newFreq)= (s->Freq + 1), when (s0 == 1)
524       we have requirement (Ppmd7Context_OneState()->Freq <= 128) in BinSumm[]
525       so (s->Freq < 128) - is requirement for multi-symbol contexts
526     */
527     newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : (2 * cf + s0 - 1) / (2 * s0) + 1));
528   }
529 
530   // Create new single-symbol contexts from low order to high order in loop
531 
532   do
533   {
534     PPMD7_CTX_PTR c1;
535     /* = AllocContext(p); */
536     if (p->HiUnit != p->LoUnit)
537       c1 = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE);
538     else if (p->FreeList[0] != 0)
539       c1 = (PPMD7_CTX_PTR)Ppmd7_RemoveNode(p, 0);
540     else
541     {
542       c1 = (PPMD7_CTX_PTR)Ppmd7_AllocUnitsRare(p, 0);
543       if (!c1)
544         return NULL;
545     }
546 
547     c1->NumStats = 1;
548     ONE_STATE(c1)->Symbol = newSym;
549     ONE_STATE(c1)->Freq = newFreq;
550     SetSuccessor(ONE_STATE(c1), upBranch);
551     c1->Suffix = REF(c);
552     SetSuccessor(ps[--numPs], REF(c1));
553     c = c1;
554   }
555   while (numPs != 0);
556 
557   return c;
558 }
559 
560 
561 
562 #define SWAP_STATES(s) \
563   { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; }
564 
565 
566 void Ppmd7_UpdateModel(CPpmd7 *p);
567 Z7_NO_INLINE
Ppmd7_UpdateModel(CPpmd7 * p)568 void Ppmd7_UpdateModel(CPpmd7 *p)
569 {
570   CPpmd_Void_Ref maxSuccessor, minSuccessor;
571   PPMD7_CTX_PTR c, mc;
572   unsigned s0, ns;
573 
574 
575 
576   if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0)
577   {
578     /* Update Freqs in Suffix Context */
579 
580     c = SUFFIX(p->MinContext);
581 
582     if (c->NumStats == 1)
583     {
584       CPpmd_State *s = ONE_STATE(c);
585       if (s->Freq < 32)
586         s->Freq++;
587     }
588     else
589     {
590       CPpmd_State *s = STATS(c);
591       Byte sym = p->FoundState->Symbol;
592 
593       if (s->Symbol != sym)
594       {
595         do
596         {
597           // s++; if (s->Symbol == sym) break;
598           s++;
599         }
600         while (s->Symbol != sym);
601 
602         if (s[0].Freq >= s[-1].Freq)
603         {
604           SWAP_STATES(s)
605           s--;
606         }
607       }
608 
609       if (s->Freq < MAX_FREQ - 9)
610       {
611         s->Freq = (Byte)(s->Freq + 2);
612         c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2);
613       }
614     }
615   }
616 
617 
618   if (p->OrderFall == 0)
619   {
620     /* MAX ORDER context */
621     /* (FoundState->Successor) is RAW-Successor. */
622     p->MaxContext = p->MinContext = Ppmd7_CreateSuccessors(p);
623     if (!p->MinContext)
624     {
625       Ppmd7_RestartModel(p);
626       return;
627     }
628     SetSuccessor(p->FoundState, REF(p->MinContext));
629     return;
630   }
631 
632 
633   /* NON-MAX ORDER context */
634 
635   {
636     Byte *text = p->Text;
637     *text++ = p->FoundState->Symbol;
638     p->Text = text;
639     if (text >= p->UnitsStart)
640     {
641       Ppmd7_RestartModel(p);
642       return;
643     }
644     maxSuccessor = REF(text);
645   }
646 
647   minSuccessor = SUCCESSOR(p->FoundState);
648 
649   if (minSuccessor)
650   {
651     // there is Successor for FoundState in MinContext.
652     // So the next context will be one order higher than MinContext.
653 
654     if (minSuccessor <= maxSuccessor)
655     {
656       // minSuccessor is RAW-Successor. So we will create real contexts records:
657       PPMD7_CTX_PTR cs = Ppmd7_CreateSuccessors(p);
658       if (!cs)
659       {
660         Ppmd7_RestartModel(p);
661         return;
662       }
663       minSuccessor = REF(cs);
664     }
665 
666     // minSuccessor now is real Context pointer that points to existing (Order+1) context
667 
668     if (--p->OrderFall == 0)
669     {
670       /*
671       if we move to MaxOrder context, then minSuccessor will be common Succesor for both:
672         MinContext that is (MaxOrder - 1)
673         MaxContext that is (MaxOrder)
674       so we don't need new RAW-Successor, and we can use real minSuccessor
675       as succssors for both MinContext and MaxContext.
676       */
677       maxSuccessor = minSuccessor;
678 
679       /*
680       if (MaxContext != MinContext)
681       {
682         there was order fall from MaxOrder and we don't need current symbol
683         to transfer some RAW-Succesors to real contexts.
684         So we roll back pointer in raw data for one position.
685       }
686       */
687       p->Text -= (p->MaxContext != p->MinContext);
688     }
689   }
690   else
691   {
692     /*
693     FoundState has NULL-Successor here.
694     And only root 0-order context can contain NULL-Successors.
695     We change Successor in FoundState to RAW-Successor,
696     And next context will be same 0-order root Context.
697     */
698     SetSuccessor(p->FoundState, maxSuccessor);
699     minSuccessor = REF(p->MinContext);
700   }
701 
702   mc = p->MinContext;
703   c = p->MaxContext;
704 
705   p->MaxContext = p->MinContext = CTX(minSuccessor);
706 
707   if (c == mc)
708     return;
709 
710   // s0 : is pure Escape Freq
711   s0 = mc->Union2.SummFreq - (ns = mc->NumStats) - ((unsigned)p->FoundState->Freq - 1);
712 
713   do
714   {
715     unsigned ns1;
716     UInt32 sum;
717 
718     if ((ns1 = c->NumStats) != 1)
719     {
720       if ((ns1 & 1) == 0)
721       {
722         /* Expand for one UNIT */
723         const unsigned oldNU = ns1 >> 1;
724         const unsigned i = U2I(oldNU);
725         if (i != U2I((size_t)oldNU + 1))
726         {
727           void *ptr = Ppmd7_AllocUnits(p, i + 1);
728           void *oldPtr;
729           if (!ptr)
730           {
731             Ppmd7_RestartModel(p);
732             return;
733           }
734           oldPtr = STATS(c);
735           MEM_12_CPY(ptr, oldPtr, oldNU)
736           Ppmd7_InsertNode(p, oldPtr, i);
737           c->Union4.Stats = STATS_REF(ptr);
738         }
739       }
740       sum = c->Union2.SummFreq;
741       /* max increase of Escape_Freq is 3 here.
742          total increase of Union2.SummFreq for all symbols is less than 256 here */
743       sum += (UInt32)(unsigned)((2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1)));
744       /* original PPMdH uses 16-bit variable for (sum) here.
745          But (sum < 0x9000). So we don't truncate (sum) to 16-bit */
746       // sum = (UInt16)sum;
747     }
748     else
749     {
750       // instead of One-symbol context we create 2-symbol context
751       CPpmd_State *s = (CPpmd_State*)Ppmd7_AllocUnits(p, 0);
752       if (!s)
753       {
754         Ppmd7_RestartModel(p);
755         return;
756       }
757       {
758         unsigned freq = c->Union2.State2.Freq;
759         // s = *ONE_STATE(c);
760         s->Symbol = c->Union2.State2.Symbol;
761         s->Successor_0 = c->Union4.State4.Successor_0;
762         s->Successor_1 = c->Union4.State4.Successor_1;
763         // SetSuccessor(s, c->Union4.Stats);  // call it only for debug purposes to check the order of
764                                               // (Successor_0 and Successor_1) in LE/BE.
765         c->Union4.Stats = REF(s);
766         if (freq < MAX_FREQ / 4 - 1)
767           freq <<= 1;
768         else
769           freq = MAX_FREQ - 4;
770         // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context
771         s->Freq = (Byte)freq;
772         // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here
773         sum = (UInt32)(freq + p->InitEsc + (ns > 3));
774       }
775     }
776 
777     {
778       CPpmd_State *s = STATS(c) + ns1;
779       UInt32 cf = 2 * (sum + 6) * (UInt32)p->FoundState->Freq;
780       UInt32 sf = (UInt32)s0 + sum;
781       s->Symbol = p->FoundState->Symbol;
782       c->NumStats = (UInt16)(ns1 + 1);
783       SetSuccessor(s, maxSuccessor);
784 
785       if (cf < 6 * sf)
786       {
787         cf = (UInt32)1 + (cf > sf) + (cf >= 4 * sf);
788         sum += 3;
789         /* It can add (0, 1, 2) to Escape_Freq */
790       }
791       else
792       {
793         cf = (UInt32)4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf);
794         sum += cf;
795       }
796 
797       c->Union2.SummFreq = (UInt16)sum;
798       s->Freq = (Byte)cf;
799     }
800     c = SUFFIX(c);
801   }
802   while (c != mc);
803 }
804 
805 
806 
807 Z7_NO_INLINE
Ppmd7_Rescale(CPpmd7 * p)808 static void Ppmd7_Rescale(CPpmd7 *p)
809 {
810   unsigned i, adder, sumFreq, escFreq;
811   CPpmd_State *stats = STATS(p->MinContext);
812   CPpmd_State *s = p->FoundState;
813 
814   /* Sort the list by Freq */
815   if (s != stats)
816   {
817     CPpmd_State tmp = *s;
818     do
819       s[0] = s[-1];
820     while (--s != stats);
821     *s = tmp;
822   }
823 
824   sumFreq = s->Freq;
825   escFreq = p->MinContext->Union2.SummFreq - sumFreq;
826 
827   /*
828   if (p->OrderFall == 0), adder = 0 : it's     allowed to remove symbol from     MAX Order context
829   if (p->OrderFall != 0), adder = 1 : it's NOT allowed to remove symbol from NON-MAX Order context
830   */
831 
832   adder = (p->OrderFall != 0);
833 
834   #ifdef PPMD7_ORDER_0_SUPPPORT
835   adder |= (p->MaxOrder == 0); // we don't remove symbols from order-0 context
836   #endif
837 
838   sumFreq = (sumFreq + 4 + adder) >> 1;
839   i = (unsigned)p->MinContext->NumStats - 1;
840   s->Freq = (Byte)sumFreq;
841 
842   do
843   {
844     unsigned freq = (++s)->Freq;
845     escFreq -= freq;
846     freq = (freq + adder) >> 1;
847     sumFreq += freq;
848     s->Freq = (Byte)freq;
849     if (freq > s[-1].Freq)
850     {
851       CPpmd_State tmp = *s;
852       CPpmd_State *s1 = s;
853       do
854       {
855         s1[0] = s1[-1];
856       }
857       while (--s1 != stats && freq > s1[-1].Freq);
858       *s1 = tmp;
859     }
860   }
861   while (--i);
862 
863   if (s->Freq == 0)
864   {
865     /* Remove all items with Freq == 0 */
866     CPpmd7_Context *mc;
867     unsigned numStats, numStatsNew, n0, n1;
868 
869     i = 0; do { i++; } while ((--s)->Freq == 0);
870 
871     /* We increase (escFreq) for the number of removed symbols.
872        So we will have (0.5) increase for Escape_Freq in avarage per
873        removed symbol after Escape_Freq halving */
874     escFreq += i;
875     mc = p->MinContext;
876     numStats = mc->NumStats;
877     numStatsNew = numStats - i;
878     mc->NumStats = (UInt16)(numStatsNew);
879     n0 = (numStats + 1) >> 1;
880 
881     if (numStatsNew == 1)
882     {
883       /* Create Single-Symbol context */
884       unsigned freq = stats->Freq;
885 
886       do
887       {
888         escFreq >>= 1;
889         freq = (freq + 1) >> 1;
890       }
891       while (escFreq > 1);
892 
893       s = ONE_STATE(mc);
894       *s = *stats;
895       s->Freq = (Byte)freq; // (freq <= 260 / 4)
896       p->FoundState = s;
897       Ppmd7_InsertNode(p, stats, U2I(n0));
898       return;
899     }
900 
901     n1 = (numStatsNew + 1) >> 1;
902     if (n0 != n1)
903     {
904       // p->MinContext->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
905       unsigned i0 = U2I(n0);
906       unsigned i1 = U2I(n1);
907       if (i0 != i1)
908       {
909         if (p->FreeList[i1] != 0)
910         {
911           void *ptr = Ppmd7_RemoveNode(p, i1);
912           p->MinContext->Union4.Stats = STATS_REF(ptr);
913           MEM_12_CPY(ptr, (const void *)stats, n1)
914           Ppmd7_InsertNode(p, stats, i0);
915         }
916         else
917           Ppmd7_SplitBlock(p, stats, i0, i1);
918       }
919     }
920   }
921   {
922     CPpmd7_Context *mc = p->MinContext;
923     mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
924     // Escape_Freq halving here
925     p->FoundState = STATS(mc);
926   }
927 }
928 
929 
Ppmd7_MakeEscFreq(CPpmd7 * p,unsigned numMasked,UInt32 * escFreq)930 CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
931 {
932   CPpmd_See *see;
933   const CPpmd7_Context *mc = p->MinContext;
934   unsigned numStats = mc->NumStats;
935   if (numStats != 256)
936   {
937     unsigned nonMasked = numStats - numMasked;
938     see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]]
939         + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - numStats)
940         + 2 * (unsigned)(mc->Union2.SummFreq < 11 * numStats)
941         + 4 * (unsigned)(numMasked > nonMasked) +
942         p->HiBitsFlag;
943     {
944       // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
945       const unsigned summ = (UInt16)see->Summ; // & 0xFFFF
946       const unsigned r = (summ >> see->Shift);
947       see->Summ = (UInt16)(summ - r);
948       *escFreq = (UInt32)(r + (r == 0));
949     }
950   }
951   else
952   {
953     see = &p->DummySee;
954     *escFreq = 1;
955   }
956   return see;
957 }
958 
959 
Ppmd7_NextContext(CPpmd7 * p)960 static void Ppmd7_NextContext(CPpmd7 *p)
961 {
962   PPMD7_CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
963   if (p->OrderFall == 0 && (const Byte *)c > p->Text)
964     p->MaxContext = p->MinContext = c;
965   else
966     Ppmd7_UpdateModel(p);
967 }
968 
969 
Ppmd7_Update1(CPpmd7 * p)970 void Ppmd7_Update1(CPpmd7 *p)
971 {
972   CPpmd_State *s = p->FoundState;
973   unsigned freq = s->Freq;
974   freq += 4;
975   p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
976   s->Freq = (Byte)freq;
977   if (freq > s[-1].Freq)
978   {
979     SWAP_STATES(s)
980     p->FoundState = --s;
981     if (freq > MAX_FREQ)
982       Ppmd7_Rescale(p);
983   }
984   Ppmd7_NextContext(p);
985 }
986 
987 
Ppmd7_Update1_0(CPpmd7 * p)988 void Ppmd7_Update1_0(CPpmd7 *p)
989 {
990   CPpmd_State *s = p->FoundState;
991   CPpmd7_Context *mc = p->MinContext;
992   unsigned freq = s->Freq;
993   const unsigned summFreq = mc->Union2.SummFreq;
994   p->PrevSuccess = (2 * freq > summFreq);
995   p->RunLength += (Int32)p->PrevSuccess;
996   mc->Union2.SummFreq = (UInt16)(summFreq + 4);
997   freq += 4;
998   s->Freq = (Byte)freq;
999   if (freq > MAX_FREQ)
1000     Ppmd7_Rescale(p);
1001   Ppmd7_NextContext(p);
1002 }
1003 
1004 
1005 /*
1006 void Ppmd7_UpdateBin(CPpmd7 *p)
1007 {
1008   unsigned freq = p->FoundState->Freq;
1009   p->FoundState->Freq = (Byte)(freq + (freq < 128));
1010   p->PrevSuccess = 1;
1011   p->RunLength++;
1012   Ppmd7_NextContext(p);
1013 }
1014 */
1015 
Ppmd7_Update2(CPpmd7 * p)1016 void Ppmd7_Update2(CPpmd7 *p)
1017 {
1018   CPpmd_State *s = p->FoundState;
1019   unsigned freq = s->Freq;
1020   freq += 4;
1021   p->RunLength = p->InitRL;
1022   p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
1023   s->Freq = (Byte)freq;
1024   if (freq > MAX_FREQ)
1025     Ppmd7_Rescale(p);
1026   Ppmd7_UpdateModel(p);
1027 }
1028 
1029 
1030 
1031 /*
1032 PPMd Memory Map:
1033 {
1034   [ 0 ]           contains subset of original raw text, that is required to create context
1035                   records, Some symbols are not written, when max order context was reached
1036   [ Text ]        free area
1037   [ UnitsStart ]  CPpmd_State vectors and CPpmd7_Context records
1038   [ LoUnit ]      free  area for CPpmd_State and CPpmd7_Context items
1039 [ HiUnit ]      CPpmd7_Context records
1040   [ Size ]        end of array
1041 }
1042 
1043 These addresses don't cross at any time.
1044 And the following condtions is true for addresses:
1045   (0  <= Text < UnitsStart <= LoUnit <= HiUnit <= Size)
1046 
1047 Raw text is BYTE--aligned.
1048 the data in block [ UnitsStart ... Size ] contains 12-bytes aligned UNITs.
1049 
1050 Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record.
1051 The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors.
1052 The code doesn't free UNITs allocated for CPpmd7_Context records.
1053 
1054 The code calls Ppmd7_RestartModel(), when there is no free memory for allocation.
1055 And Ppmd7_RestartModel() changes the state to orignal start state, with full free block.
1056 
1057 
1058 The code allocates UNITs with the following order:
1059 
1060 Allocation of 1 UNIT for Context record
1061   - from free space (HiUnit) down to (LoUnit)
1062   - from FreeList[0]
1063   - Ppmd7_AllocUnitsRare()
1064 
1065 Ppmd7_AllocUnits() for CPpmd_State vectors:
1066   - from FreeList[i]
1067   - from free space (LoUnit) up to (HiUnit)
1068   - Ppmd7_AllocUnitsRare()
1069 
1070 Ppmd7_AllocUnitsRare()
1071   - if (GlueCount == 0)
1072        {  Glue lists, GlueCount = 255, allocate from FreeList[i]] }
1073   - loop for all higher sized FreeList[...] lists
1074   - from (UnitsStart - Text), GlueCount--
1075   - ERROR
1076 
1077 
1078 Each Record with Context contains the CPpmd_State vector, where each
1079 CPpmd_State contains the link to Successor.
1080 There are 3 types of Successor:
1081   1) NULL-Successor   - NULL pointer. NULL-Successor links can be stored
1082                         only in 0-order Root Context Record.
1083                         We use 0 value as NULL-Successor
1084   2) RAW-Successor    - the link to position in raw text,
1085                         that "RAW-Successor" is being created after first
1086                         occurrence of new symbol for some existing context record.
1087                         (RAW-Successor > 0).
1088   3) RECORD-Successor - the link to CPpmd7_Context record of (Order+1),
1089                         that record is being created when we go via RAW-Successor again.
1090 
1091 For any successors at any time: the following condtions are true for Successor links:
1092 (NULL-Successor < RAW-Successor < UnitsStart <= RECORD-Successor)
1093 
1094 
1095 ---------- Symbol Frequency, SummFreq and Range in Range_Coder ----------
1096 
1097 CPpmd7_Context::SummFreq = Sum(Stats[].Freq) + Escape_Freq
1098 
1099 The PPMd code tries to fulfill the condition:
1100   (SummFreq <= (256 * 128 = RC::kBot))
1101 
1102 We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124)
1103 So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol.
1104 If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7.
1105 SummFreq and Escape_Freq can be changed in Ppmd7_Rescale() and *Update*() functions.
1106 Ppmd7_Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Ppmd7_Rescale() for
1107 max-order context.
1108 
1109 When the PPMd code still break (Total <= RC::Range) condition in range coder,
1110 we have two ways to resolve that problem:
1111   1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases.
1112   2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value.
1113 */
1114 
1115 #undef MAX_FREQ
1116 #undef UNIT_SIZE
1117 #undef U2B
1118 #undef U2I
1119 #undef I2U
1120 #undef I2U_UInt16
1121 #undef REF
1122 #undef STATS_REF
1123 #undef CTX
1124 #undef STATS
1125 #undef ONE_STATE
1126 #undef SUFFIX
1127 #undef NODE
1128 #undef EMPTY_NODE
1129 #undef MEM_12_CPY
1130 #undef SUCCESSOR
1131 #undef SWAP_STATES
1132