xref: /aosp_15_r20/external/lzma/C/LzFindMt.c (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1 /* LzFindMt.c -- multithreaded Match finder for LZ algorithms
2 2024-01-22 : Igor Pavlov : Public domain */
3 
4 #include "Precomp.h"
5 
6 // #include <stdio.h>
7 
8 #include "CpuArch.h"
9 
10 #include "LzHash.h"
11 #include "LzFindMt.h"
12 
13 // #define LOG_ITERS
14 
15 // #define LOG_THREAD
16 
17 #ifdef LOG_THREAD
18 #include <stdio.h>
19 #define PRF(x) x
20 #else
21 #define PRF(x)
22 #endif
23 
24 #ifdef LOG_ITERS
25 #include <stdio.h>
26 extern UInt64 g_NumIters_Tree;
27 extern UInt64 g_NumIters_Loop;
28 extern UInt64 g_NumIters_Bytes;
29 #define LOG_ITER(x) x
30 #else
31 #define LOG_ITER(x)
32 #endif
33 
34 #define kMtHashBlockSize ((UInt32)1 << 17)
35 #define kMtHashNumBlocks (1 << 1)
36 
37 #define GET_HASH_BLOCK_OFFSET(i)  (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize)
38 
39 #define kMtBtBlockSize ((UInt32)1 << 16)
40 #define kMtBtNumBlocks (1 << 4)
41 
42 #define GET_BT_BLOCK_OFFSET(i)  (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize)
43 
44 /*
45   HASH functions:
46   We use raw 8/16 bits from a[1] and a[2],
47   xored with crc(a[0]) and crc(a[3]).
48   We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.
49   our crc() function provides one-to-one correspondence for low 8-bit values:
50     (crc[0...0xFF] & 0xFF) <-> [0...0xFF]
51 */
52 
53 #define MF(mt) ((mt)->MatchFinder)
54 #define MF_CRC (p->crc)
55 
56 // #define MF(mt) (&(mt)->MatchFinder)
57 // #define MF_CRC (p->MatchFinder.crc)
58 
59 #define MT_HASH2_CALC \
60   h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1);
61 
62 #define MT_HASH3_CALC { \
63   UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \
64   h2 = temp & (kHash2Size - 1); \
65   h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
66 
67 /*
68 #define MT_HASH3_CALC__NO_2 { \
69   UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
70   h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
71 
72 #define MT_HASH4_CALC { \
73   UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
74   h2 = temp & (kHash2Size - 1); \
75   temp ^= ((UInt32)cur[2] << 8); \
76   h3 = temp & (kHash3Size - 1); \
77   h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }
78   // (kHash4Size - 1);
79 */
80 
81 
82 Z7_NO_INLINE
MtSync_Construct(CMtSync * p)83 static void MtSync_Construct(CMtSync *p)
84 {
85   p->affinity = 0;
86   p->wasCreated = False;
87   p->csWasInitialized = False;
88   p->csWasEntered = False;
89   Thread_CONSTRUCT(&p->thread)
90   Event_Construct(&p->canStart);
91   Event_Construct(&p->wasStopped);
92   Semaphore_Construct(&p->freeSemaphore);
93   Semaphore_Construct(&p->filledSemaphore);
94 }
95 
96 
97 // #define DEBUG_BUFFER_LOCK   // define it to debug lock state
98 
99 #ifdef DEBUG_BUFFER_LOCK
100 #include <stdlib.h>
101 #define BUFFER_MUST_BE_LOCKED(p)    if (!(p)->csWasEntered) exit(1);
102 #define BUFFER_MUST_BE_UNLOCKED(p)  if ( (p)->csWasEntered) exit(1);
103 #else
104 #define BUFFER_MUST_BE_LOCKED(p)
105 #define BUFFER_MUST_BE_UNLOCKED(p)
106 #endif
107 
108 #define LOCK_BUFFER(p) { \
109     BUFFER_MUST_BE_UNLOCKED(p); \
110     CriticalSection_Enter(&(p)->cs); \
111     (p)->csWasEntered = True; }
112 
113 #define UNLOCK_BUFFER(p) { \
114     BUFFER_MUST_BE_LOCKED(p); \
115     CriticalSection_Leave(&(p)->cs); \
116     (p)->csWasEntered = False; }
117 
118 
119 Z7_NO_INLINE
MtSync_GetNextBlock(CMtSync * p)120 static UInt32 MtSync_GetNextBlock(CMtSync *p)
121 {
122   UInt32 numBlocks = 0;
123   if (p->needStart)
124   {
125     BUFFER_MUST_BE_UNLOCKED(p)
126     p->numProcessedBlocks = 1;
127     p->needStart = False;
128     p->stopWriting = False;
129     p->exit = False;
130     Event_Reset(&p->wasStopped);
131     Event_Set(&p->canStart);
132   }
133   else
134   {
135     UNLOCK_BUFFER(p)
136     // we free current block
137     numBlocks = p->numProcessedBlocks++;
138     Semaphore_Release1(&p->freeSemaphore);
139   }
140 
141   // buffer is UNLOCKED here
142   Semaphore_Wait(&p->filledSemaphore);
143   LOCK_BUFFER(p)
144   return numBlocks;
145 }
146 
147 
148 /* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */
149 
150 Z7_NO_INLINE
MtSync_StopWriting(CMtSync * p)151 static void MtSync_StopWriting(CMtSync *p)
152 {
153   if (!Thread_WasCreated(&p->thread) || p->needStart)
154     return;
155 
156     PRF(printf("\nMtSync_StopWriting %p\n", p));
157 
158   if (p->csWasEntered)
159   {
160     /* we don't use buffer in this thread after StopWriting().
161        So we UNLOCK buffer.
162        And we restore default UNLOCKED state for stopped thread */
163     UNLOCK_BUFFER(p)
164   }
165 
166   /* We send (p->stopWriting) message and release freeSemaphore
167      to free current block.
168      So the thread will see (p->stopWriting) at some
169      iteration after Wait(freeSemaphore).
170      The thread doesn't need to fill all avail free blocks,
171      so we can get fast thread stop.
172   */
173 
174   p->stopWriting = True;
175   Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!!
176 
177     PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p));
178   Event_Wait(&p->wasStopped);
179     PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p));
180 
181   /* 21.03 : we don't restore samaphore counters here.
182      We will recreate and reinit samaphores in next start */
183 
184   p->needStart = True;
185 }
186 
187 
188 Z7_NO_INLINE
MtSync_Destruct(CMtSync * p)189 static void MtSync_Destruct(CMtSync *p)
190 {
191     PRF(printf("\nMtSync_Destruct %p\n", p));
192 
193   if (Thread_WasCreated(&p->thread))
194   {
195     /* we want thread to be in Stopped state before sending EXIT command.
196        note: stop(btSync) will stop (htSync) also */
197     MtSync_StopWriting(p);
198     /* thread in Stopped state here : (p->needStart == true) */
199     p->exit = True;
200     // if (p->needStart)  // it's (true)
201     Event_Set(&p->canStart);  // we send EXIT command to thread
202     Thread_Wait_Close(&p->thread);  // we wait thread finishing
203   }
204 
205   if (p->csWasInitialized)
206   {
207     CriticalSection_Delete(&p->cs);
208     p->csWasInitialized = False;
209   }
210   p->csWasEntered = False;
211 
212   Event_Close(&p->canStart);
213   Event_Close(&p->wasStopped);
214   Semaphore_Close(&p->freeSemaphore);
215   Semaphore_Close(&p->filledSemaphore);
216 
217   p->wasCreated = False;
218 }
219 
220 
221 // #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
222 // we want to get real system error codes here instead of SZ_ERROR_THREAD
223 #define RINOK_THREAD(x)  RINOK_WRes(x)
224 
225 
226 // call it before each new file (when new starting is required):
227 Z7_NO_INLINE
MtSync_Init(CMtSync * p,UInt32 numBlocks)228 static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)
229 {
230   WRes wres;
231   // BUFFER_MUST_BE_UNLOCKED(p)
232   if (!p->needStart || p->csWasEntered)
233     return SZ_ERROR_FAIL;
234   wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks);
235   if (wres == 0)
236     wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks);
237   return MY_SRes_HRESULT_FROM_WRes(wres);
238 }
239 
240 
MtSync_Create_WRes(CMtSync * p,THREAD_FUNC_TYPE startAddress,void * obj)241 static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
242 {
243   WRes wres;
244 
245   if (p->wasCreated)
246     return SZ_OK;
247 
248   RINOK_THREAD(CriticalSection_Init(&p->cs))
249   p->csWasInitialized = True;
250   p->csWasEntered = False;
251 
252   RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart))
253   RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped))
254 
255   p->needStart = True;
256   p->exit = True;  /* p->exit is unused before (canStart) Event.
257      But in case of some unexpected code failure we will get fast exit from thread */
258 
259   // return ERROR_TOO_MANY_POSTS; // for debug
260   // return EINVAL; // for debug
261 
262   if (p->affinity != 0)
263     wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
264   else
265     wres = Thread_Create(&p->thread, startAddress, obj);
266 
267   RINOK_THREAD(wres)
268   p->wasCreated = True;
269   return SZ_OK;
270 }
271 
272 
273 Z7_NO_INLINE
MtSync_Create(CMtSync * p,THREAD_FUNC_TYPE startAddress,void * obj)274 static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
275 {
276   const WRes wres = MtSync_Create_WRes(p, startAddress, obj);
277   if (wres == 0)
278     return 0;
279   MtSync_Destruct(p);
280   return MY_SRes_HRESULT_FROM_WRes(wres);
281 }
282 
283 
284 // ---------- HASH THREAD ----------
285 
286 #define kMtMaxValForNormalize 0xFFFFFFFF
287 // #define kMtMaxValForNormalize ((1 << 21)) // for debug
288 // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
289 
290 #ifdef MY_CPU_LE_UNALIGN
291   #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)
292 #else
293   #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))
294 #endif
295 
296 #define GetHeads_DECL(name) \
297     static void GetHeads ## name(const Byte *p, UInt32 pos, \
298       UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)
299 
300 #define GetHeads_LOOP(v) \
301     for (; numHeads != 0; numHeads--) { \
302       const UInt32 value = (v); \
303       p++; \
304       *heads++ = pos - hash[value]; \
305       hash[value] = pos++; }
306 
307 #define DEF_GetHeads2(name, v, action) \
308     GetHeads_DECL(name) { action \
309     GetHeads_LOOP(v) }
310 
311 #define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
312 
GetUi16(p)313 DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
314 DEF_GetHeads(3,  (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)
315 DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
316 // BT3 is not good for crc collisions for big hashMask values.
317 
318 /*
319 GetHeads_DECL(3b)
320 {
321   UNUSED_VAR(hashMask);
322   UNUSED_VAR(crc);
323   {
324   const Byte *pLim = p + numHeads;
325   if (numHeads == 0)
326     return;
327   pLim--;
328   while (p < pLim)
329   {
330     UInt32 v1 = GetUi32(p);
331     UInt32 v0 = v1 & 0xFFFFFF;
332     UInt32 h0, h1;
333     p += 2;
334     v1 >>= 8;
335     h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;
336     h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;
337     heads += 2;
338   }
339   if (p == pLim)
340   {
341     UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);
342     *heads = pos - hash[v0];
343     hash[v0] = pos;
344   }
345   }
346 }
347 */
348 
349 /*
350 GetHeads_DECL(4)
351 {
352   unsigned sh = 0;
353   UNUSED_VAR(crc)
354   while ((hashMask & 0x80000000) == 0)
355   {
356     hashMask <<= 1;
357     sh++;
358   }
359   GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)
360 }
361 #define GetHeads4b GetHeads4
362 */
363 
364 #define USE_GetHeads_LOCAL_CRC
365 
366 #ifdef USE_GetHeads_LOCAL_CRC
367 
368 GetHeads_DECL(4)
369 {
370   UInt32 crc0[256];
371   UInt32 crc1[256];
372   {
373     unsigned i;
374     for (i = 0; i < 256; i++)
375     {
376       UInt32 v = crc[i];
377       crc0[i] = v & hashMask;
378       crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
379       // crc1[i] = rotlFixed(v, 8) & hashMask;
380     }
381   }
382   GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))
383 }
384 
385 GetHeads_DECL(4b)
386 {
387   UInt32 crc0[256];
388   {
389     unsigned i;
390     for (i = 0; i < 256; i++)
391       crc0[i] = crc[i] & hashMask;
392   }
393   GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))
394 }
395 
396 GetHeads_DECL(5)
397 {
398   UInt32 crc0[256];
399   UInt32 crc1[256];
400   UInt32 crc2[256];
401   {
402     unsigned i;
403     for (i = 0; i < 256; i++)
404     {
405       UInt32 v = crc[i];
406       crc0[i] = v & hashMask;
407       crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
408       crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;
409     }
410   }
411   GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))
412 }
413 
414 GetHeads_DECL(5b)
415 {
416   UInt32 crc0[256];
417   UInt32 crc1[256];
418   {
419     unsigned i;
420     for (i = 0; i < 256; i++)
421     {
422       UInt32 v = crc[i];
423       crc0[i] = v & hashMask;
424       crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
425     }
426   }
427   GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))
428 }
429 
430 #else
431 
432 DEF_GetHeads(4,  (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)
433 DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)
434 DEF_GetHeads(5,  (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)
435 DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)
436 
437 #endif
438 
439 
HashThreadFunc(CMatchFinderMt * mt)440 static void HashThreadFunc(CMatchFinderMt *mt)
441 {
442   CMtSync *p = &mt->hashSync;
443     PRF(printf("\nHashThreadFunc\n"));
444 
445   for (;;)
446   {
447     UInt32 blockIndex = 0;
448       PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n"));
449     Event_Wait(&p->canStart);
450       PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n"));
451     if (p->exit)
452     {
453       PRF(printf("\nHashThreadFunc : exit \n"));
454       return;
455     }
456 
457     MatchFinder_Init_HighHash(MF(mt));
458 
459     for (;;)
460     {
461       PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos));
462 
463       {
464         CMatchFinder *mf = MF(mt);
465         if (MatchFinder_NeedMove(mf))
466         {
467           CriticalSection_Enter(&mt->btSync.cs);
468           CriticalSection_Enter(&mt->hashSync.cs);
469           {
470             const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf);
471             ptrdiff_t offset;
472             MatchFinder_MoveBlock(mf);
473             offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf);
474             mt->pointerToCurPos -= offset;
475             mt->buffer -= offset;
476           }
477           CriticalSection_Leave(&mt->hashSync.cs);
478           CriticalSection_Leave(&mt->btSync.cs);
479           continue;
480         }
481 
482         Semaphore_Wait(&p->freeSemaphore);
483 
484         if (p->exit) // exit is unexpected here. But we check it here for some failure case
485           return;
486 
487         // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
488         if (p->stopWriting)
489           break;
490 
491         MatchFinder_ReadIfRequired(mf);
492         {
493           UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++);
494           UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf);
495           heads[0] = 2;
496           heads[1] = num;
497 
498           /* heads[1] contains the number of avail bytes:
499              if (avail < mf->numHashBytes) :
500              {
501                it means that stream was finished
502                HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes.
503                HASH_THREAD doesn't stop,
504                HASH_THREAD fills only the header (2 numbers) for all next blocks:
505                {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0}
506              }
507              else
508              {
509                HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes;
510              }
511           */
512 
513           if (num >= mf->numHashBytes)
514           {
515             num = num - mf->numHashBytes + 1;
516             if (num > kMtHashBlockSize - 2)
517               num = kMtHashBlockSize - 2;
518 
519             if (mf->pos > (UInt32)kMtMaxValForNormalize - num)
520             {
521               const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
522               MatchFinder_REDUCE_OFFSETS(mf, subValue)
523               MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
524             }
525 
526             heads[0] = 2 + num;
527             mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
528           }
529 
530           mf->pos += num;  // wrap over zero is allowed at the end of stream
531           mf->buffer += num;
532         }
533       }
534 
535       Semaphore_Release1(&p->filledSemaphore);
536     } // for() processing end
537 
538     // p->numBlocks_Sent = blockIndex;
539     Event_Set(&p->wasStopped);
540   } // for() thread end
541 }
542 
543 
544 
545 
546 // ---------- BT THREAD ----------
547 
548 /* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap.
549    here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */
550 #define CYC_TO_POS_OFFSET 0
551 // #define CYC_TO_POS_OFFSET 1 // for debug
552 
553 #define MFMT_GM_INLINE
554 
555 #ifdef MFMT_GM_INLINE
556 
557 /*
558   we use size_t for (pos) instead of UInt32
559   to eliminate "movsx" BUG in old MSVC x64 compiler.
560 */
561 
562 
563 UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
564     UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
565     size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
566     UInt32 *posRes);
567 
568 #endif
569 
570 
BtGetMatches(CMatchFinderMt * p,UInt32 * d)571 static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
572 {
573   UInt32 numProcessed = 0;
574   UInt32 curPos = 2;
575 
576   /* GetMatchesSpec() functions don't create (len = 1)
577      in [len, dist] match pairs, if (p->numHashBytes >= 2)
578      Also we suppose here that (matchMaxLen >= 2).
579      So the following code for (reserve) is not required
580      UInt32 reserve = (p->matchMaxLen * 2);
581      const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX
582      if (reserve < kNumHashBytes_Max - 1)
583         reserve = kNumHashBytes_Max - 1;
584      const UInt32 limit = kMtBtBlockSize - (reserve);
585   */
586 
587   const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);
588 
589   d[1] = p->hashNumAvail;
590 
591   if (p->failure_BT)
592   {
593     // printf("\n == 1 BtGetMatches() p->failure_BT\n");
594     d[0] = 0;
595     // d[1] = 0;
596     return;
597   }
598 
599   while (curPos < limit)
600   {
601     if (p->hashBufPos == p->hashBufPosLimit)
602     {
603       // MatchFinderMt_GetNextBlock_Hash(p);
604       UInt32 avail;
605       {
606         const UInt32 bi = MtSync_GetNextBlock(&p->hashSync);
607         const UInt32 k = GET_HASH_BLOCK_OFFSET(bi);
608         const UInt32 *h = p->hashBuf + k;
609         avail = h[1];
610         p->hashBufPosLimit = k + h[0];
611         p->hashNumAvail = avail;
612         p->hashBufPos = k + 2;
613       }
614 
615       {
616         /* we must prevent UInt32 overflow for avail total value,
617            if avail was increased with new hash block */
618         UInt32 availSum = numProcessed + avail;
619         if (availSum < numProcessed)
620           availSum = (UInt32)(Int32)-1;
621         d[1] = availSum;
622       }
623 
624       if (avail >= p->numHashBytes)
625         continue;
626 
627       // if (p->hashBufPos != p->hashBufPosLimit) exit(1);
628 
629       /* (avail < p->numHashBytes)
630          It means that stream was finished.
631          And (avail) - is a number of remaining bytes,
632          we fill (d) for (avail) bytes for LZ_THREAD (receiver).
633          but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */
634 
635       /* here we suppose that we have space enough:
636          (kMtBtBlockSize - curPos >= p->hashNumAvail) */
637       p->hashNumAvail = 0;
638       d[0] = curPos + avail;
639       d += curPos;
640       for (; avail != 0; avail--)
641         *d++ = 0;
642       return;
643     }
644     {
645       UInt32 size = p->hashBufPosLimit - p->hashBufPos;
646       UInt32 pos = p->pos;
647       UInt32 cyclicBufferPos = p->cyclicBufferPos;
648       UInt32 lenLimit = p->matchMaxLen;
649       if (lenLimit >= p->hashNumAvail)
650         lenLimit = p->hashNumAvail;
651       {
652         UInt32 size2 = p->hashNumAvail - lenLimit + 1;
653         if (size2 < size)
654           size = size2;
655         size2 = p->cyclicBufferSize - cyclicBufferPos;
656         if (size2 < size)
657           size = size2;
658       }
659 
660       if (pos > (UInt32)kMtMaxValForNormalize - size)
661       {
662         const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1);
663         pos -= subValue;
664         p->pos = pos;
665         MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
666       }
667 
668       #ifndef MFMT_GM_INLINE
669       while (curPos < limit && size-- != 0)
670       {
671         UInt32 *startDistances = d + curPos;
672         UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
673             pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
674             startDistances + 1, p->numHashBytes - 1) - startDistances);
675         *startDistances = num - 1;
676         curPos += num;
677         cyclicBufferPos++;
678         pos++;
679         p->buffer++;
680       }
681       #else
682       {
683         UInt32 posRes = pos;
684         const UInt32 *d_end;
685         {
686           d_end = GetMatchesSpecN_2(
687               p->buffer + lenLimit - 1,
688               pos, p->buffer, p->son, p->cutValue, d + curPos,
689               p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
690               d + limit, p->hashBuf + p->hashBufPos + size,
691               cyclicBufferPos, p->cyclicBufferSize,
692               &posRes);
693         }
694         {
695           if (!d_end)
696           {
697             // printf("\n == 2 BtGetMatches() p->failure_BT\n");
698             // internal data failure
699             p->failure_BT = True;
700             d[0] = 0;
701             // d[1] = 0;
702             return;
703           }
704         }
705         curPos = (UInt32)(d_end - d);
706         {
707           const UInt32 processed = posRes - pos;
708           pos = posRes;
709           p->hashBufPos += processed;
710           cyclicBufferPos += processed;
711           p->buffer += processed;
712         }
713       }
714       #endif
715 
716       {
717         const UInt32 processed = pos - p->pos;
718         numProcessed += processed;
719         p->hashNumAvail -= processed;
720         p->pos = pos;
721       }
722       if (cyclicBufferPos == p->cyclicBufferSize)
723         cyclicBufferPos = 0;
724       p->cyclicBufferPos = cyclicBufferPos;
725     }
726   }
727 
728   d[0] = curPos;
729 }
730 
731 
BtFillBlock(CMatchFinderMt * p,UInt32 globalBlockIndex)732 static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
733 {
734   CMtSync *sync = &p->hashSync;
735 
736   BUFFER_MUST_BE_UNLOCKED(sync)
737 
738   if (!sync->needStart)
739   {
740     LOCK_BUFFER(sync)
741   }
742 
743   BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex));
744 
745   /* We suppose that we have called GetNextBlock() from start.
746      So buffer is LOCKED */
747 
748   UNLOCK_BUFFER(sync)
749 }
750 
751 
752 Z7_NO_INLINE
BtThreadFunc(CMatchFinderMt * mt)753 static void BtThreadFunc(CMatchFinderMt *mt)
754 {
755   CMtSync *p = &mt->btSync;
756   for (;;)
757   {
758     UInt32 blockIndex = 0;
759     Event_Wait(&p->canStart);
760 
761     for (;;)
762     {
763         PRF(printf("  BT thread block = %d  pos = %d\n", (unsigned)blockIndex, mt->pos));
764       /* (p->exit == true) is possible after (p->canStart) at first loop iteration
765          and is unexpected after more Wait(freeSemaphore) iterations */
766       if (p->exit)
767         return;
768 
769       Semaphore_Wait(&p->freeSemaphore);
770 
771       // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
772       if (p->stopWriting)
773         break;
774 
775       BtFillBlock(mt, blockIndex++);
776 
777       Semaphore_Release1(&p->filledSemaphore);
778     }
779 
780     // we stop HASH_THREAD here
781     MtSync_StopWriting(&mt->hashSync);
782 
783     // p->numBlocks_Sent = blockIndex;
784     Event_Set(&p->wasStopped);
785   }
786 }
787 
788 
MatchFinderMt_Construct(CMatchFinderMt * p)789 void MatchFinderMt_Construct(CMatchFinderMt *p)
790 {
791   p->hashBuf = NULL;
792   MtSync_Construct(&p->hashSync);
793   MtSync_Construct(&p->btSync);
794 }
795 
MatchFinderMt_FreeMem(CMatchFinderMt * p,ISzAllocPtr alloc)796 static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)
797 {
798   ISzAlloc_Free(alloc, p->hashBuf);
799   p->hashBuf = NULL;
800 }
801 
MatchFinderMt_Destruct(CMatchFinderMt * p,ISzAllocPtr alloc)802 void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
803 {
804   /*
805      HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs.
806      So we must be sure that HASH_THREAD will not use CriticalSection(s)
807      after deleting CriticalSection here.
808 
809      we call ReleaseStream(p)
810        that calls StopWriting(btSync)
811          that calls StopWriting(hashSync), if it's required to stop HASH_THREAD.
812      after StopWriting() it's safe to destruct MtSync(s) in any order */
813 
814   MatchFinderMt_ReleaseStream(p);
815 
816   MtSync_Destruct(&p->btSync);
817   MtSync_Destruct(&p->hashSync);
818 
819   LOG_ITER(
820   printf("\nTree %9d * %7d iter = %9d = sum  :  bytes = %9d\n",
821       (UInt32)(g_NumIters_Tree / 1000),
822       (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),
823       (UInt32)(g_NumIters_Loop / 1000),
824       (UInt32)(g_NumIters_Bytes / 1000)
825       ));
826 
827   MatchFinderMt_FreeMem(p, alloc);
828 }
829 
830 
831 #define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
832 #define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
833 
834 
HashThreadFunc2(void * p)835 static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p);  return 0; }
BtThreadFunc2(void * p)836 static THREAD_FUNC_DECL BtThreadFunc2(void *p)
837 {
838   Byte allocaDummy[0x180];
839   unsigned i = 0;
840   for (i = 0; i < 16; i++)
841     allocaDummy[i] = (Byte)0;
842   if (allocaDummy[0] == 0)
843     BtThreadFunc((CMatchFinderMt *)p);
844   return 0;
845 }
846 
847 
MatchFinderMt_Create(CMatchFinderMt * p,UInt32 historySize,UInt32 keepAddBufferBefore,UInt32 matchMaxLen,UInt32 keepAddBufferAfter,ISzAllocPtr alloc)848 SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
849     UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)
850 {
851   CMatchFinder *mf = MF(p);
852   p->historySize = historySize;
853   if (kMtBtBlockSize <= matchMaxLen * 4)
854     return SZ_ERROR_PARAM;
855   if (!p->hashBuf)
856   {
857     p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32));
858     if (!p->hashBuf)
859       return SZ_ERROR_MEM;
860     p->btBuf = p->hashBuf + kHashBufferSize;
861   }
862   keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);
863   keepAddBufferAfter += kMtHashBlockSize;
864   if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
865     return SZ_ERROR_MEM;
866 
867   RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p))
868   RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p))
869   return SZ_OK;
870 }
871 
872 
MatchFinderMt_InitMt(CMatchFinderMt * p)873 SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
874 {
875   RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks))
876   return MtSync_Init(&p->btSync, kMtBtNumBlocks);
877 }
878 
879 
MatchFinderMt_Init(void * _p)880 static void MatchFinderMt_Init(void *_p)
881 {
882   CMatchFinderMt *p = (CMatchFinderMt *)_p;
883   CMatchFinder *mf = MF(p);
884 
885   p->btBufPos =
886   p->btBufPosLimit = NULL;
887   p->hashBufPos =
888   p->hashBufPosLimit = 0;
889   p->hashNumAvail = 0; // 21.03
890 
891   p->failure_BT = False;
892 
893   /* Init without data reading. We don't want to read data in this thread */
894   MatchFinder_Init_4(mf);
895 
896   MatchFinder_Init_LowHash(mf);
897 
898   p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);
899   p->btNumAvailBytes = 0;
900   p->failure_LZ_BT = False;
901   // p->failure_LZ_LZ = False;
902 
903   p->lzPos =
904       1; // optimal smallest value
905       // 0; // for debug: ignores match to start
906       // kNormalizeAlign; // for debug
907 
908   p->hash = mf->hash;
909   p->fixedHashSize = mf->fixedHashSize;
910   // p->hash4Mask = mf->hash4Mask;
911   p->crc = mf->crc;
912   // memcpy(p->crc, mf->crc, sizeof(mf->crc));
913 
914   p->son = mf->son;
915   p->matchMaxLen = mf->matchMaxLen;
916   p->numHashBytes = mf->numHashBytes;
917 
918   /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */
919   // mf->streamPos = mf->pos = 1; // optimal smallest value
920       // 0; // for debug: ignores match to start
921       // kNormalizeAlign; // for debug
922 
923   /* we must init (p->pos = mf->pos) for BT, because
924      BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */
925   p->pos = mf->pos; // do not change it
926 
927   p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET);
928   p->cyclicBufferSize = mf->cyclicBufferSize;
929   p->buffer = mf->buffer;
930   p->cutValue = mf->cutValue;
931   // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses.
932 }
933 
934 
935 /* ReleaseStream is required to finish multithreading */
MatchFinderMt_ReleaseStream(CMatchFinderMt * p)936 void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
937 {
938   // Sleep(1); // for debug
939   MtSync_StopWriting(&p->btSync);
940   // Sleep(200); // for debug
941   /* p->MatchFinder->ReleaseStream(); */
942 }
943 
944 
945 Z7_NO_INLINE
MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt * p)946 static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
947 {
948   if (p->failure_LZ_BT)
949     p->btBufPos = p->failureBuf;
950   else
951   {
952     const UInt32 bi = MtSync_GetNextBlock(&p->btSync);
953     const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi);
954     {
955       const UInt32 numItems = bt[0];
956       p->btBufPosLimit = bt + numItems;
957       p->btNumAvailBytes = bt[1];
958       p->btBufPos = bt + 2;
959       if (numItems < 2 || numItems > kMtBtBlockSize)
960       {
961         p->failureBuf[0] = 0;
962         p->btBufPos = p->failureBuf;
963         p->btBufPosLimit = p->failureBuf + 1;
964         p->failure_LZ_BT = True;
965         // p->btNumAvailBytes = 0;
966         /* we don't want to decrease AvailBytes, that was load before.
967             that can be unxepected for the code that have loaded anopther value before */
968       }
969     }
970 
971     if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize)
972     {
973       /* we don't check (lzPos) over exact avail bytes in (btBuf).
974          (fixedHashSize) is small, so normalization is fast */
975       const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
976       p->lzPos -= subValue;
977       MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize);
978     }
979   }
980   return p->btNumAvailBytes;
981 }
982 
983 
984 
MatchFinderMt_GetPointerToCurrentPos(void * _p)985 static const Byte * MatchFinderMt_GetPointerToCurrentPos(void *_p)
986 {
987   CMatchFinderMt *p = (CMatchFinderMt *)_p;
988   return p->pointerToCurPos;
989 }
990 
991 
992 #define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
993 
994 
MatchFinderMt_GetNumAvailableBytes(void * _p)995 static UInt32 MatchFinderMt_GetNumAvailableBytes(void *_p)
996 {
997   CMatchFinderMt *p = (CMatchFinderMt *)_p;
998   if (p->btBufPos != p->btBufPosLimit)
999     return p->btNumAvailBytes;
1000   return MatchFinderMt_GetNextBlock_Bt(p);
1001 }
1002 
1003 
1004 // #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True;  return d; }
1005 #define CHECK_FAILURE_LZ(_match_, _pos_)
1006 
MixMatches2(CMatchFinderMt * p,UInt32 matchMinPos,UInt32 * d)1007 static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
1008 {
1009   UInt32 h2, c2;
1010   UInt32 *hash = p->hash;
1011   const Byte *cur = p->pointerToCurPos;
1012   const UInt32 m = p->lzPos;
1013   MT_HASH2_CALC
1014 
1015   c2 = hash[h2];
1016   hash[h2] = m;
1017 
1018   if (c2 >= matchMinPos)
1019   {
1020     CHECK_FAILURE_LZ(c2, m)
1021     if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1022     {
1023       *d++ = 2;
1024       *d++ = m - c2 - 1;
1025     }
1026   }
1027 
1028   return d;
1029 }
1030 
MixMatches3(CMatchFinderMt * p,UInt32 matchMinPos,UInt32 * d)1031 static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
1032 {
1033   UInt32 h2, h3, c2, c3;
1034   UInt32 *hash = p->hash;
1035   const Byte *cur = p->pointerToCurPos;
1036   const UInt32 m = p->lzPos;
1037   MT_HASH3_CALC
1038 
1039   c2 = hash[h2];
1040   c3 = (hash + kFix3HashSize)[h3];
1041 
1042   hash[h2] = m;
1043   (hash + kFix3HashSize)[h3] = m;
1044 
1045   if (c2 >= matchMinPos)
1046   {
1047     CHECK_FAILURE_LZ(c2, m)
1048     if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1049     {
1050       d[1] = m - c2 - 1;
1051       if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
1052       {
1053         d[0] = 3;
1054         return d + 2;
1055       }
1056       d[0] = 2;
1057       d += 2;
1058     }
1059   }
1060 
1061   if (c3 >= matchMinPos)
1062   {
1063     CHECK_FAILURE_LZ(c3, m)
1064     if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
1065     {
1066       *d++ = 3;
1067       *d++ = m - c3 - 1;
1068     }
1069   }
1070 
1071   return d;
1072 }
1073 
1074 
1075 #define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
1076 
1077 /*
1078 static
1079 UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
1080 {
1081   const UInt32 *bt = p->btBufPos;
1082   const UInt32 len = *bt++;
1083   const UInt32 *btLim = bt + len;
1084   UInt32 matchMinPos;
1085   UInt32 avail = p->btNumAvailBytes - 1;
1086   p->btBufPos = btLim;
1087 
1088   {
1089     p->btNumAvailBytes = avail;
1090 
1091     #define BT_HASH_BYTES_MAX 5
1092 
1093     matchMinPos = p->lzPos;
1094 
1095     if (len != 0)
1096       matchMinPos -= bt[1];
1097     else if (avail < (BT_HASH_BYTES_MAX - 1) - 1)
1098     {
1099       INCREASE_LZ_POS
1100       return d;
1101     }
1102     else
1103     {
1104       const UInt32 hs = p->historySize;
1105       if (matchMinPos > hs)
1106         matchMinPos -= hs;
1107       else
1108         matchMinPos = 1;
1109     }
1110   }
1111 
1112   for (;;)
1113   {
1114 
1115   UInt32 h2, h3, c2, c3;
1116   UInt32 *hash = p->hash;
1117   const Byte *cur = p->pointerToCurPos;
1118   UInt32 m = p->lzPos;
1119   MT_HASH3_CALC
1120 
1121   c2 = hash[h2];
1122   c3 = (hash + kFix3HashSize)[h3];
1123 
1124   hash[h2] = m;
1125   (hash + kFix3HashSize)[h3] = m;
1126 
1127   if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1128   {
1129     d[1] = m - c2 - 1;
1130     if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
1131     {
1132       d[0] = 3;
1133       d += 2;
1134       break;
1135     }
1136     // else
1137     {
1138       d[0] = 2;
1139       d += 2;
1140     }
1141   }
1142   if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
1143   {
1144     *d++ = 3;
1145     *d++ = m - c3 - 1;
1146   }
1147   break;
1148   }
1149 
1150   if (len != 0)
1151   {
1152     do
1153     {
1154       const UInt32 v0 = bt[0];
1155       const UInt32 v1 = bt[1];
1156       bt += 2;
1157       d[0] = v0;
1158       d[1] = v1;
1159       d += 2;
1160     }
1161     while (bt != btLim);
1162   }
1163   INCREASE_LZ_POS
1164   return d;
1165 }
1166 */
1167 
1168 
MixMatches4(CMatchFinderMt * p,UInt32 matchMinPos,UInt32 * d)1169 static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
1170 {
1171   UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
1172   UInt32 *hash = p->hash;
1173   const Byte *cur = p->pointerToCurPos;
1174   const UInt32 m = p->lzPos;
1175   MT_HASH3_CALC
1176   // MT_HASH4_CALC
1177   c2 = hash[h2];
1178   c3 = (hash + kFix3HashSize)[h3];
1179   // c4 = (hash + kFix4HashSize)[h4];
1180 
1181   hash[h2] = m;
1182   (hash + kFix3HashSize)[h3] = m;
1183   // (hash + kFix4HashSize)[h4] = m;
1184 
1185   // #define BT5_USE_H2
1186   // #ifdef BT5_USE_H2
1187   if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1188   {
1189     d[1] = m - c2 - 1;
1190     if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
1191     {
1192       // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;
1193       // return d + 2;
1194 
1195       if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])
1196       {
1197         d[0] = 4;
1198         return d + 2;
1199       }
1200       d[0] = 3;
1201       d += 2;
1202 
1203       #ifdef BT5_USE_H4
1204       if (c4 >= matchMinPos)
1205         if (
1206           cur[(ptrdiff_t)c4 - (ptrdiff_t)m]     == cur[0] &&
1207           cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
1208           )
1209       {
1210         *d++ = 4;
1211         *d++ = m - c4 - 1;
1212       }
1213       #endif
1214       return d;
1215     }
1216     d[0] = 2;
1217     d += 2;
1218   }
1219   // #endif
1220 
1221   if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
1222   {
1223     d[1] = m - c3 - 1;
1224     if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])
1225     {
1226       d[0] = 4;
1227       return d + 2;
1228     }
1229     d[0] = 3;
1230     d += 2;
1231   }
1232 
1233   #ifdef BT5_USE_H4
1234   if (c4 >= matchMinPos)
1235     if (
1236       cur[(ptrdiff_t)c4 - (ptrdiff_t)m]     == cur[0] &&
1237       cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
1238       )
1239     {
1240       *d++ = 4;
1241       *d++ = m - c4 - 1;
1242     }
1243   #endif
1244 
1245   return d;
1246 }
1247 
1248 
MatchFinderMt2_GetMatches(void * _p,UInt32 * d)1249 static UInt32 * MatchFinderMt2_GetMatches(void *_p, UInt32 *d)
1250 {
1251   CMatchFinderMt *p = (CMatchFinderMt *)_p;
1252   const UInt32 *bt = p->btBufPos;
1253   const UInt32 len = *bt++;
1254   const UInt32 *btLim = bt + len;
1255   p->btBufPos = btLim;
1256   p->btNumAvailBytes--;
1257   INCREASE_LZ_POS
1258   {
1259     while (bt != btLim)
1260     {
1261       const UInt32 v0 = bt[0];
1262       const UInt32 v1 = bt[1];
1263       bt += 2;
1264       d[0] = v0;
1265       d[1] = v1;
1266       d += 2;
1267     }
1268   }
1269   return d;
1270 }
1271 
1272 
1273 
MatchFinderMt_GetMatches(void * _p,UInt32 * d)1274 static UInt32 * MatchFinderMt_GetMatches(void *_p, UInt32 *d)
1275 {
1276   CMatchFinderMt *p = (CMatchFinderMt *)_p;
1277   const UInt32 *bt = p->btBufPos;
1278   UInt32 len = *bt++;
1279   const UInt32 avail = p->btNumAvailBytes - 1;
1280   p->btNumAvailBytes = avail;
1281   p->btBufPos = bt + len;
1282   if (len == 0)
1283   {
1284     #define BT_HASH_BYTES_MAX 5
1285     if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)
1286     {
1287       UInt32 m = p->lzPos;
1288       if (m > p->historySize)
1289         m -= p->historySize;
1290       else
1291         m = 1;
1292       d = p->MixMatchesFunc(p, m, d);
1293     }
1294   }
1295   else
1296   {
1297     /*
1298       first match pair from BinTree: (match_len, match_dist),
1299       (match_len >= numHashBytes).
1300       MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)
1301     */
1302     d = p->MixMatchesFunc(p, p->lzPos - bt[1], d);
1303     // if (d) // check for failure
1304     do
1305     {
1306       const UInt32 v0 = bt[0];
1307       const UInt32 v1 = bt[1];
1308       bt += 2;
1309       d[0] = v0;
1310       d[1] = v1;
1311       d += 2;
1312     }
1313     while (len -= 2);
1314   }
1315   INCREASE_LZ_POS
1316   return d;
1317 }
1318 
1319 #define SKIP_HEADER2_MT  do { GET_NEXT_BLOCK_IF_REQUIRED
1320 #define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
1321 #define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);
1322 
MatchFinderMt0_Skip(void * _p,UInt32 num)1323 static void MatchFinderMt0_Skip(void *_p, UInt32 num)
1324 {
1325   CMatchFinderMt *p = (CMatchFinderMt *)_p;
1326   SKIP_HEADER2_MT { p->btNumAvailBytes--;
1327   SKIP_FOOTER_MT
1328 }
1329 
1330 static void MatchFinderMt2_Skip(void *_p, UInt32 num)
1331 {
1332   CMatchFinderMt *p = (CMatchFinderMt *)_p;
1333   SKIP_HEADER_MT(2)
1334       UInt32 h2;
1335       MT_HASH2_CALC
1336       hash[h2] = p->lzPos;
1337   SKIP_FOOTER_MT
1338 }
1339 
1340 static void MatchFinderMt3_Skip(void *_p, UInt32 num)
1341 {
1342   CMatchFinderMt *p = (CMatchFinderMt *)_p;
1343   SKIP_HEADER_MT(3)
1344       UInt32 h2, h3;
1345       MT_HASH3_CALC
1346       (hash + kFix3HashSize)[h3] =
1347       hash[                h2] =
1348         p->lzPos;
1349   SKIP_FOOTER_MT
1350 }
1351 
1352 /*
1353 // MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip().
1354 // The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream.
1355 
1356 static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
1357 {
1358   SKIP_HEADER_MT(4)
1359       UInt32 h2, h3; // h4
1360       MT_HASH3_CALC
1361       // MT_HASH4_CALC
1362       // (hash + kFix4HashSize)[h4] =
1363       (hash + kFix3HashSize)[h3] =
1364       hash[                h2] =
1365         p->lzPos;
1366   SKIP_FOOTER_MT
1367 }
1368 */
1369 
1370 void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
1371 {
1372   vTable->Init = MatchFinderMt_Init;
1373   vTable->GetNumAvailableBytes = MatchFinderMt_GetNumAvailableBytes;
1374   vTable->GetPointerToCurrentPos = MatchFinderMt_GetPointerToCurrentPos;
1375   vTable->GetMatches = MatchFinderMt_GetMatches;
1376 
1377   switch (MF(p)->numHashBytes)
1378   {
1379     case 2:
1380       p->GetHeadsFunc = GetHeads2;
1381       p->MixMatchesFunc = NULL;
1382       vTable->Skip = MatchFinderMt0_Skip;
1383       vTable->GetMatches = MatchFinderMt2_GetMatches;
1384       break;
1385     case 3:
1386       p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;
1387       p->MixMatchesFunc = MixMatches2;
1388       vTable->Skip = MatchFinderMt2_Skip;
1389       break;
1390     case 4:
1391       p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;
1392 
1393       // it's fast inline version of GetMatches()
1394       // vTable->GetMatches = MatchFinderMt_GetMatches_Bt4;
1395 
1396       p->MixMatchesFunc = MixMatches3;
1397       vTable->Skip = MatchFinderMt3_Skip;
1398       break;
1399     default:
1400       p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;
1401       p->MixMatchesFunc = MixMatches4;
1402       vTable->Skip =
1403           MatchFinderMt3_Skip;
1404           // MatchFinderMt4_Skip;
1405       break;
1406   }
1407 }
1408 
1409 #undef RINOK_THREAD
1410 #undef PRF
1411 #undef MF
1412 #undef GetUi24hi_from32
1413 #undef LOCK_BUFFER
1414 #undef UNLOCK_BUFFER
1415