1*f6dc9357SAndroid Build Coastguard Worker /* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
2*f6dc9357SAndroid Build Coastguard Worker 2023-04-02 : Igor Pavlov : Public domain */
3*f6dc9357SAndroid Build Coastguard Worker
4*f6dc9357SAndroid Build Coastguard Worker #include "Precomp.h"
5*f6dc9357SAndroid Build Coastguard Worker
6*f6dc9357SAndroid Build Coastguard Worker #include "CpuArch.h"
7*f6dc9357SAndroid Build Coastguard Worker #include "LzFind.h"
8*f6dc9357SAndroid Build Coastguard Worker
9*f6dc9357SAndroid Build Coastguard Worker // #include "LzFindMt.h"
10*f6dc9357SAndroid Build Coastguard Worker
11*f6dc9357SAndroid Build Coastguard Worker // #define LOG_ITERS
12*f6dc9357SAndroid Build Coastguard Worker
13*f6dc9357SAndroid Build Coastguard Worker // #define LOG_THREAD
14*f6dc9357SAndroid Build Coastguard Worker
15*f6dc9357SAndroid Build Coastguard Worker #ifdef LOG_THREAD
16*f6dc9357SAndroid Build Coastguard Worker #include <stdio.h>
17*f6dc9357SAndroid Build Coastguard Worker #define PRF(x) x
18*f6dc9357SAndroid Build Coastguard Worker #else
19*f6dc9357SAndroid Build Coastguard Worker // #define PRF(x)
20*f6dc9357SAndroid Build Coastguard Worker #endif
21*f6dc9357SAndroid Build Coastguard Worker
22*f6dc9357SAndroid Build Coastguard Worker #ifdef LOG_ITERS
23*f6dc9357SAndroid Build Coastguard Worker #include <stdio.h>
24*f6dc9357SAndroid Build Coastguard Worker UInt64 g_NumIters_Tree;
25*f6dc9357SAndroid Build Coastguard Worker UInt64 g_NumIters_Loop;
26*f6dc9357SAndroid Build Coastguard Worker UInt64 g_NumIters_Bytes;
27*f6dc9357SAndroid Build Coastguard Worker #define LOG_ITER(x) x
28*f6dc9357SAndroid Build Coastguard Worker #else
29*f6dc9357SAndroid Build Coastguard Worker #define LOG_ITER(x)
30*f6dc9357SAndroid Build Coastguard Worker #endif
31*f6dc9357SAndroid Build Coastguard Worker
32*f6dc9357SAndroid Build Coastguard Worker // ---------- BT THREAD ----------
33*f6dc9357SAndroid Build Coastguard Worker
34*f6dc9357SAndroid Build Coastguard Worker #define USE_SON_PREFETCH
35*f6dc9357SAndroid Build Coastguard Worker #define USE_LONG_MATCH_OPT
36*f6dc9357SAndroid Build Coastguard Worker
37*f6dc9357SAndroid Build Coastguard Worker #define kEmptyHashValue 0
38*f6dc9357SAndroid Build Coastguard Worker
39*f6dc9357SAndroid Build Coastguard Worker // #define CYC_TO_POS_OFFSET 0
40*f6dc9357SAndroid Build Coastguard Worker
41*f6dc9357SAndroid Build Coastguard Worker // #define CYC_TO_POS_OFFSET 1 // for debug
42*f6dc9357SAndroid Build Coastguard Worker
43*f6dc9357SAndroid Build Coastguard Worker /*
44*f6dc9357SAndroid Build Coastguard Worker Z7_NO_INLINE
45*f6dc9357SAndroid Build Coastguard Worker UInt32 * Z7_FASTCALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
46*f6dc9357SAndroid Build Coastguard Worker UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
47*f6dc9357SAndroid Build Coastguard Worker {
48*f6dc9357SAndroid Build Coastguard Worker do
49*f6dc9357SAndroid Build Coastguard Worker {
50*f6dc9357SAndroid Build Coastguard Worker UInt32 delta;
51*f6dc9357SAndroid Build Coastguard Worker if (hash == size)
52*f6dc9357SAndroid Build Coastguard Worker break;
53*f6dc9357SAndroid Build Coastguard Worker delta = *hash++;
54*f6dc9357SAndroid Build Coastguard Worker
55*f6dc9357SAndroid Build Coastguard Worker if (delta == 0 || delta > (UInt32)pos)
56*f6dc9357SAndroid Build Coastguard Worker return NULL;
57*f6dc9357SAndroid Build Coastguard Worker
58*f6dc9357SAndroid Build Coastguard Worker lenLimit++;
59*f6dc9357SAndroid Build Coastguard Worker
60*f6dc9357SAndroid Build Coastguard Worker if (delta == (UInt32)pos)
61*f6dc9357SAndroid Build Coastguard Worker {
62*f6dc9357SAndroid Build Coastguard Worker CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
63*f6dc9357SAndroid Build Coastguard Worker *d++ = 0;
64*f6dc9357SAndroid Build Coastguard Worker ptr1[0] = kEmptyHashValue;
65*f6dc9357SAndroid Build Coastguard Worker ptr1[1] = kEmptyHashValue;
66*f6dc9357SAndroid Build Coastguard Worker }
67*f6dc9357SAndroid Build Coastguard Worker else
68*f6dc9357SAndroid Build Coastguard Worker {
69*f6dc9357SAndroid Build Coastguard Worker UInt32 *_distances = ++d;
70*f6dc9357SAndroid Build Coastguard Worker
71*f6dc9357SAndroid Build Coastguard Worker CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
72*f6dc9357SAndroid Build Coastguard Worker CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
73*f6dc9357SAndroid Build Coastguard Worker
74*f6dc9357SAndroid Build Coastguard Worker const Byte *len0 = cur, *len1 = cur;
75*f6dc9357SAndroid Build Coastguard Worker UInt32 cutValue = _cutValue;
76*f6dc9357SAndroid Build Coastguard Worker const Byte *maxLen = cur + _maxLen;
77*f6dc9357SAndroid Build Coastguard Worker
78*f6dc9357SAndroid Build Coastguard Worker for (LOG_ITER(g_NumIters_Tree++);;)
79*f6dc9357SAndroid Build Coastguard Worker {
80*f6dc9357SAndroid Build Coastguard Worker LOG_ITER(g_NumIters_Loop++);
81*f6dc9357SAndroid Build Coastguard Worker {
82*f6dc9357SAndroid Build Coastguard Worker const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
83*f6dc9357SAndroid Build Coastguard Worker CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
84*f6dc9357SAndroid Build Coastguard Worker const Byte *len = (len0 < len1 ? len0 : len1);
85*f6dc9357SAndroid Build Coastguard Worker
86*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_SON_PREFETCH
87*f6dc9357SAndroid Build Coastguard Worker const UInt32 pair0 = *pair;
88*f6dc9357SAndroid Build Coastguard Worker #endif
89*f6dc9357SAndroid Build Coastguard Worker
90*f6dc9357SAndroid Build Coastguard Worker if (len[diff] == len[0])
91*f6dc9357SAndroid Build Coastguard Worker {
92*f6dc9357SAndroid Build Coastguard Worker if (++len != lenLimit && len[diff] == len[0])
93*f6dc9357SAndroid Build Coastguard Worker while (++len != lenLimit)
94*f6dc9357SAndroid Build Coastguard Worker {
95*f6dc9357SAndroid Build Coastguard Worker LOG_ITER(g_NumIters_Bytes++);
96*f6dc9357SAndroid Build Coastguard Worker if (len[diff] != len[0])
97*f6dc9357SAndroid Build Coastguard Worker break;
98*f6dc9357SAndroid Build Coastguard Worker }
99*f6dc9357SAndroid Build Coastguard Worker if (maxLen < len)
100*f6dc9357SAndroid Build Coastguard Worker {
101*f6dc9357SAndroid Build Coastguard Worker maxLen = len;
102*f6dc9357SAndroid Build Coastguard Worker *d++ = (UInt32)(len - cur);
103*f6dc9357SAndroid Build Coastguard Worker *d++ = delta - 1;
104*f6dc9357SAndroid Build Coastguard Worker
105*f6dc9357SAndroid Build Coastguard Worker if (len == lenLimit)
106*f6dc9357SAndroid Build Coastguard Worker {
107*f6dc9357SAndroid Build Coastguard Worker const UInt32 pair1 = pair[1];
108*f6dc9357SAndroid Build Coastguard Worker *ptr1 =
109*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_SON_PREFETCH
110*f6dc9357SAndroid Build Coastguard Worker pair0;
111*f6dc9357SAndroid Build Coastguard Worker #else
112*f6dc9357SAndroid Build Coastguard Worker pair[0];
113*f6dc9357SAndroid Build Coastguard Worker #endif
114*f6dc9357SAndroid Build Coastguard Worker *ptr0 = pair1;
115*f6dc9357SAndroid Build Coastguard Worker
116*f6dc9357SAndroid Build Coastguard Worker _distances[-1] = (UInt32)(d - _distances);
117*f6dc9357SAndroid Build Coastguard Worker
118*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_LONG_MATCH_OPT
119*f6dc9357SAndroid Build Coastguard Worker
120*f6dc9357SAndroid Build Coastguard Worker if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
121*f6dc9357SAndroid Build Coastguard Worker break;
122*f6dc9357SAndroid Build Coastguard Worker
123*f6dc9357SAndroid Build Coastguard Worker {
124*f6dc9357SAndroid Build Coastguard Worker for (;;)
125*f6dc9357SAndroid Build Coastguard Worker {
126*f6dc9357SAndroid Build Coastguard Worker hash++;
127*f6dc9357SAndroid Build Coastguard Worker pos++;
128*f6dc9357SAndroid Build Coastguard Worker cur++;
129*f6dc9357SAndroid Build Coastguard Worker lenLimit++;
130*f6dc9357SAndroid Build Coastguard Worker {
131*f6dc9357SAndroid Build Coastguard Worker CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
132*f6dc9357SAndroid Build Coastguard Worker #if 0
133*f6dc9357SAndroid Build Coastguard Worker *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
134*f6dc9357SAndroid Build Coastguard Worker #else
135*f6dc9357SAndroid Build Coastguard Worker const UInt32 p0 = ptr[0 + (diff * 2)];
136*f6dc9357SAndroid Build Coastguard Worker const UInt32 p1 = ptr[1 + (diff * 2)];
137*f6dc9357SAndroid Build Coastguard Worker ptr[0] = p0;
138*f6dc9357SAndroid Build Coastguard Worker ptr[1] = p1;
139*f6dc9357SAndroid Build Coastguard Worker // ptr[0] = ptr[0 + (diff * 2)];
140*f6dc9357SAndroid Build Coastguard Worker // ptr[1] = ptr[1 + (diff * 2)];
141*f6dc9357SAndroid Build Coastguard Worker #endif
142*f6dc9357SAndroid Build Coastguard Worker }
143*f6dc9357SAndroid Build Coastguard Worker // PrintSon(son + 2, pos - 1);
144*f6dc9357SAndroid Build Coastguard Worker // printf("\npos = %x delta = %x\n", pos, delta);
145*f6dc9357SAndroid Build Coastguard Worker len++;
146*f6dc9357SAndroid Build Coastguard Worker *d++ = 2;
147*f6dc9357SAndroid Build Coastguard Worker *d++ = (UInt32)(len - cur);
148*f6dc9357SAndroid Build Coastguard Worker *d++ = delta - 1;
149*f6dc9357SAndroid Build Coastguard Worker if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
150*f6dc9357SAndroid Build Coastguard Worker break;
151*f6dc9357SAndroid Build Coastguard Worker }
152*f6dc9357SAndroid Build Coastguard Worker }
153*f6dc9357SAndroid Build Coastguard Worker #endif
154*f6dc9357SAndroid Build Coastguard Worker
155*f6dc9357SAndroid Build Coastguard Worker break;
156*f6dc9357SAndroid Build Coastguard Worker }
157*f6dc9357SAndroid Build Coastguard Worker }
158*f6dc9357SAndroid Build Coastguard Worker }
159*f6dc9357SAndroid Build Coastguard Worker
160*f6dc9357SAndroid Build Coastguard Worker {
161*f6dc9357SAndroid Build Coastguard Worker const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
162*f6dc9357SAndroid Build Coastguard Worker if (len[diff] < len[0])
163*f6dc9357SAndroid Build Coastguard Worker {
164*f6dc9357SAndroid Build Coastguard Worker delta = pair[1];
165*f6dc9357SAndroid Build Coastguard Worker if (delta >= curMatch)
166*f6dc9357SAndroid Build Coastguard Worker return NULL;
167*f6dc9357SAndroid Build Coastguard Worker *ptr1 = curMatch;
168*f6dc9357SAndroid Build Coastguard Worker ptr1 = pair + 1;
169*f6dc9357SAndroid Build Coastguard Worker len1 = len;
170*f6dc9357SAndroid Build Coastguard Worker }
171*f6dc9357SAndroid Build Coastguard Worker else
172*f6dc9357SAndroid Build Coastguard Worker {
173*f6dc9357SAndroid Build Coastguard Worker delta = *pair;
174*f6dc9357SAndroid Build Coastguard Worker if (delta >= curMatch)
175*f6dc9357SAndroid Build Coastguard Worker return NULL;
176*f6dc9357SAndroid Build Coastguard Worker *ptr0 = curMatch;
177*f6dc9357SAndroid Build Coastguard Worker ptr0 = pair;
178*f6dc9357SAndroid Build Coastguard Worker len0 = len;
179*f6dc9357SAndroid Build Coastguard Worker }
180*f6dc9357SAndroid Build Coastguard Worker
181*f6dc9357SAndroid Build Coastguard Worker delta = (UInt32)pos - delta;
182*f6dc9357SAndroid Build Coastguard Worker
183*f6dc9357SAndroid Build Coastguard Worker if (--cutValue == 0 || delta >= pos)
184*f6dc9357SAndroid Build Coastguard Worker {
185*f6dc9357SAndroid Build Coastguard Worker *ptr0 = *ptr1 = kEmptyHashValue;
186*f6dc9357SAndroid Build Coastguard Worker _distances[-1] = (UInt32)(d - _distances);
187*f6dc9357SAndroid Build Coastguard Worker break;
188*f6dc9357SAndroid Build Coastguard Worker }
189*f6dc9357SAndroid Build Coastguard Worker }
190*f6dc9357SAndroid Build Coastguard Worker }
191*f6dc9357SAndroid Build Coastguard Worker } // for (tree iterations)
192*f6dc9357SAndroid Build Coastguard Worker }
193*f6dc9357SAndroid Build Coastguard Worker pos++;
194*f6dc9357SAndroid Build Coastguard Worker cur++;
195*f6dc9357SAndroid Build Coastguard Worker }
196*f6dc9357SAndroid Build Coastguard Worker while (d < limit);
197*f6dc9357SAndroid Build Coastguard Worker *posRes = (UInt32)pos;
198*f6dc9357SAndroid Build Coastguard Worker return d;
199*f6dc9357SAndroid Build Coastguard Worker }
200*f6dc9357SAndroid Build Coastguard Worker */
201*f6dc9357SAndroid Build Coastguard Worker
202*f6dc9357SAndroid Build Coastguard Worker /* define cbs if you use 2 functions.
203*f6dc9357SAndroid Build Coastguard Worker GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
204*f6dc9357SAndroid Build Coastguard Worker GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
205*f6dc9357SAndroid Build Coastguard Worker
206*f6dc9357SAndroid Build Coastguard Worker do not define cbs if you use 1 function:
207*f6dc9357SAndroid Build Coastguard Worker GetMatchesSpecN_2()
208*f6dc9357SAndroid Build Coastguard Worker */
209*f6dc9357SAndroid Build Coastguard Worker
210*f6dc9357SAndroid Build Coastguard Worker // #define cbs _cyclicBufferSize
211*f6dc9357SAndroid Build Coastguard Worker
212*f6dc9357SAndroid Build Coastguard Worker /*
213*f6dc9357SAndroid Build Coastguard Worker we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
214*f6dc9357SAndroid Build Coastguard Worker to eliminate "movsx" BUG in old MSVC x64 compiler.
215*f6dc9357SAndroid Build Coastguard Worker */
216*f6dc9357SAndroid Build Coastguard Worker
217*f6dc9357SAndroid Build Coastguard Worker UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
218*f6dc9357SAndroid Build Coastguard Worker UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
219*f6dc9357SAndroid Build Coastguard Worker size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
220*f6dc9357SAndroid Build Coastguard Worker UInt32 *posRes);
221*f6dc9357SAndroid Build Coastguard Worker
222*f6dc9357SAndroid Build Coastguard Worker Z7_NO_INLINE
GetMatchesSpecN_2(const Byte * lenLimit,size_t pos,const Byte * cur,CLzRef * son,UInt32 _cutValue,UInt32 * d,size_t _maxLen,const UInt32 * hash,const UInt32 * limit,const UInt32 * size,size_t _cyclicBufferPos,UInt32 _cyclicBufferSize,UInt32 * posRes)223*f6dc9357SAndroid Build Coastguard Worker UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
224*f6dc9357SAndroid Build Coastguard Worker UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
225*f6dc9357SAndroid Build Coastguard Worker size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
226*f6dc9357SAndroid Build Coastguard Worker UInt32 *posRes)
227*f6dc9357SAndroid Build Coastguard Worker {
228*f6dc9357SAndroid Build Coastguard Worker do // while (hash != size)
229*f6dc9357SAndroid Build Coastguard Worker {
230*f6dc9357SAndroid Build Coastguard Worker UInt32 delta;
231*f6dc9357SAndroid Build Coastguard Worker
232*f6dc9357SAndroid Build Coastguard Worker #ifndef cbs
233*f6dc9357SAndroid Build Coastguard Worker UInt32 cbs;
234*f6dc9357SAndroid Build Coastguard Worker #endif
235*f6dc9357SAndroid Build Coastguard Worker
236*f6dc9357SAndroid Build Coastguard Worker if (hash == size)
237*f6dc9357SAndroid Build Coastguard Worker break;
238*f6dc9357SAndroid Build Coastguard Worker
239*f6dc9357SAndroid Build Coastguard Worker delta = *hash++;
240*f6dc9357SAndroid Build Coastguard Worker
241*f6dc9357SAndroid Build Coastguard Worker if (delta == 0)
242*f6dc9357SAndroid Build Coastguard Worker return NULL;
243*f6dc9357SAndroid Build Coastguard Worker
244*f6dc9357SAndroid Build Coastguard Worker lenLimit++;
245*f6dc9357SAndroid Build Coastguard Worker
246*f6dc9357SAndroid Build Coastguard Worker #ifndef cbs
247*f6dc9357SAndroid Build Coastguard Worker cbs = _cyclicBufferSize;
248*f6dc9357SAndroid Build Coastguard Worker if ((UInt32)pos < cbs)
249*f6dc9357SAndroid Build Coastguard Worker {
250*f6dc9357SAndroid Build Coastguard Worker if (delta > (UInt32)pos)
251*f6dc9357SAndroid Build Coastguard Worker return NULL;
252*f6dc9357SAndroid Build Coastguard Worker cbs = (UInt32)pos;
253*f6dc9357SAndroid Build Coastguard Worker }
254*f6dc9357SAndroid Build Coastguard Worker #endif
255*f6dc9357SAndroid Build Coastguard Worker
256*f6dc9357SAndroid Build Coastguard Worker if (delta >= cbs)
257*f6dc9357SAndroid Build Coastguard Worker {
258*f6dc9357SAndroid Build Coastguard Worker CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
259*f6dc9357SAndroid Build Coastguard Worker *d++ = 0;
260*f6dc9357SAndroid Build Coastguard Worker ptr1[0] = kEmptyHashValue;
261*f6dc9357SAndroid Build Coastguard Worker ptr1[1] = kEmptyHashValue;
262*f6dc9357SAndroid Build Coastguard Worker }
263*f6dc9357SAndroid Build Coastguard Worker else
264*f6dc9357SAndroid Build Coastguard Worker {
265*f6dc9357SAndroid Build Coastguard Worker UInt32 *_distances = ++d;
266*f6dc9357SAndroid Build Coastguard Worker
267*f6dc9357SAndroid Build Coastguard Worker CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
268*f6dc9357SAndroid Build Coastguard Worker CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
269*f6dc9357SAndroid Build Coastguard Worker
270*f6dc9357SAndroid Build Coastguard Worker UInt32 cutValue = _cutValue;
271*f6dc9357SAndroid Build Coastguard Worker const Byte *len0 = cur, *len1 = cur;
272*f6dc9357SAndroid Build Coastguard Worker const Byte *maxLen = cur + _maxLen;
273*f6dc9357SAndroid Build Coastguard Worker
274*f6dc9357SAndroid Build Coastguard Worker // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
275*f6dc9357SAndroid Build Coastguard Worker for (LOG_ITER(g_NumIters_Tree++);;)
276*f6dc9357SAndroid Build Coastguard Worker {
277*f6dc9357SAndroid Build Coastguard Worker LOG_ITER(g_NumIters_Loop++);
278*f6dc9357SAndroid Build Coastguard Worker {
279*f6dc9357SAndroid Build Coastguard Worker // SPEC code
280*f6dc9357SAndroid Build Coastguard Worker CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
281*f6dc9357SAndroid Build Coastguard Worker + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
282*f6dc9357SAndroid Build Coastguard Worker ) << 1);
283*f6dc9357SAndroid Build Coastguard Worker
284*f6dc9357SAndroid Build Coastguard Worker const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
285*f6dc9357SAndroid Build Coastguard Worker const Byte *len = (len0 < len1 ? len0 : len1);
286*f6dc9357SAndroid Build Coastguard Worker
287*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_SON_PREFETCH
288*f6dc9357SAndroid Build Coastguard Worker const UInt32 pair0 = *pair;
289*f6dc9357SAndroid Build Coastguard Worker #endif
290*f6dc9357SAndroid Build Coastguard Worker
291*f6dc9357SAndroid Build Coastguard Worker if (len[diff] == len[0])
292*f6dc9357SAndroid Build Coastguard Worker {
293*f6dc9357SAndroid Build Coastguard Worker if (++len != lenLimit && len[diff] == len[0])
294*f6dc9357SAndroid Build Coastguard Worker while (++len != lenLimit)
295*f6dc9357SAndroid Build Coastguard Worker {
296*f6dc9357SAndroid Build Coastguard Worker LOG_ITER(g_NumIters_Bytes++);
297*f6dc9357SAndroid Build Coastguard Worker if (len[diff] != len[0])
298*f6dc9357SAndroid Build Coastguard Worker break;
299*f6dc9357SAndroid Build Coastguard Worker }
300*f6dc9357SAndroid Build Coastguard Worker if (maxLen < len)
301*f6dc9357SAndroid Build Coastguard Worker {
302*f6dc9357SAndroid Build Coastguard Worker maxLen = len;
303*f6dc9357SAndroid Build Coastguard Worker *d++ = (UInt32)(len - cur);
304*f6dc9357SAndroid Build Coastguard Worker *d++ = delta - 1;
305*f6dc9357SAndroid Build Coastguard Worker
306*f6dc9357SAndroid Build Coastguard Worker if (len == lenLimit)
307*f6dc9357SAndroid Build Coastguard Worker {
308*f6dc9357SAndroid Build Coastguard Worker const UInt32 pair1 = pair[1];
309*f6dc9357SAndroid Build Coastguard Worker *ptr1 =
310*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_SON_PREFETCH
311*f6dc9357SAndroid Build Coastguard Worker pair0;
312*f6dc9357SAndroid Build Coastguard Worker #else
313*f6dc9357SAndroid Build Coastguard Worker pair[0];
314*f6dc9357SAndroid Build Coastguard Worker #endif
315*f6dc9357SAndroid Build Coastguard Worker *ptr0 = pair1;
316*f6dc9357SAndroid Build Coastguard Worker
317*f6dc9357SAndroid Build Coastguard Worker _distances[-1] = (UInt32)(d - _distances);
318*f6dc9357SAndroid Build Coastguard Worker
319*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_LONG_MATCH_OPT
320*f6dc9357SAndroid Build Coastguard Worker
321*f6dc9357SAndroid Build Coastguard Worker if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
322*f6dc9357SAndroid Build Coastguard Worker break;
323*f6dc9357SAndroid Build Coastguard Worker
324*f6dc9357SAndroid Build Coastguard Worker {
325*f6dc9357SAndroid Build Coastguard Worker for (;;)
326*f6dc9357SAndroid Build Coastguard Worker {
327*f6dc9357SAndroid Build Coastguard Worker *d++ = 2;
328*f6dc9357SAndroid Build Coastguard Worker *d++ = (UInt32)(lenLimit - cur);
329*f6dc9357SAndroid Build Coastguard Worker *d++ = delta - 1;
330*f6dc9357SAndroid Build Coastguard Worker cur++;
331*f6dc9357SAndroid Build Coastguard Worker lenLimit++;
332*f6dc9357SAndroid Build Coastguard Worker // SPEC
333*f6dc9357SAndroid Build Coastguard Worker _cyclicBufferPos++;
334*f6dc9357SAndroid Build Coastguard Worker {
335*f6dc9357SAndroid Build Coastguard Worker // SPEC code
336*f6dc9357SAndroid Build Coastguard Worker CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
337*f6dc9357SAndroid Build Coastguard Worker const CLzRef *src = dest + ((diff
338*f6dc9357SAndroid Build Coastguard Worker + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
339*f6dc9357SAndroid Build Coastguard Worker // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
340*f6dc9357SAndroid Build Coastguard Worker #if 0
341*f6dc9357SAndroid Build Coastguard Worker *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
342*f6dc9357SAndroid Build Coastguard Worker #else
343*f6dc9357SAndroid Build Coastguard Worker const UInt32 p0 = src[0];
344*f6dc9357SAndroid Build Coastguard Worker const UInt32 p1 = src[1];
345*f6dc9357SAndroid Build Coastguard Worker dest[0] = p0;
346*f6dc9357SAndroid Build Coastguard Worker dest[1] = p1;
347*f6dc9357SAndroid Build Coastguard Worker #endif
348*f6dc9357SAndroid Build Coastguard Worker }
349*f6dc9357SAndroid Build Coastguard Worker pos++;
350*f6dc9357SAndroid Build Coastguard Worker hash++;
351*f6dc9357SAndroid Build Coastguard Worker if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
352*f6dc9357SAndroid Build Coastguard Worker break;
353*f6dc9357SAndroid Build Coastguard Worker } // for() end for long matches
354*f6dc9357SAndroid Build Coastguard Worker }
355*f6dc9357SAndroid Build Coastguard Worker #endif
356*f6dc9357SAndroid Build Coastguard Worker
357*f6dc9357SAndroid Build Coastguard Worker break; // break from TREE iterations
358*f6dc9357SAndroid Build Coastguard Worker }
359*f6dc9357SAndroid Build Coastguard Worker }
360*f6dc9357SAndroid Build Coastguard Worker }
361*f6dc9357SAndroid Build Coastguard Worker {
362*f6dc9357SAndroid Build Coastguard Worker const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
363*f6dc9357SAndroid Build Coastguard Worker if (len[diff] < len[0])
364*f6dc9357SAndroid Build Coastguard Worker {
365*f6dc9357SAndroid Build Coastguard Worker delta = pair[1];
366*f6dc9357SAndroid Build Coastguard Worker *ptr1 = curMatch;
367*f6dc9357SAndroid Build Coastguard Worker ptr1 = pair + 1;
368*f6dc9357SAndroid Build Coastguard Worker len1 = len;
369*f6dc9357SAndroid Build Coastguard Worker if (delta >= curMatch)
370*f6dc9357SAndroid Build Coastguard Worker return NULL;
371*f6dc9357SAndroid Build Coastguard Worker }
372*f6dc9357SAndroid Build Coastguard Worker else
373*f6dc9357SAndroid Build Coastguard Worker {
374*f6dc9357SAndroid Build Coastguard Worker delta = *pair;
375*f6dc9357SAndroid Build Coastguard Worker *ptr0 = curMatch;
376*f6dc9357SAndroid Build Coastguard Worker ptr0 = pair;
377*f6dc9357SAndroid Build Coastguard Worker len0 = len;
378*f6dc9357SAndroid Build Coastguard Worker if (delta >= curMatch)
379*f6dc9357SAndroid Build Coastguard Worker return NULL;
380*f6dc9357SAndroid Build Coastguard Worker }
381*f6dc9357SAndroid Build Coastguard Worker delta = (UInt32)pos - delta;
382*f6dc9357SAndroid Build Coastguard Worker
383*f6dc9357SAndroid Build Coastguard Worker if (--cutValue == 0 || delta >= cbs)
384*f6dc9357SAndroid Build Coastguard Worker {
385*f6dc9357SAndroid Build Coastguard Worker *ptr0 = *ptr1 = kEmptyHashValue;
386*f6dc9357SAndroid Build Coastguard Worker _distances[-1] = (UInt32)(d - _distances);
387*f6dc9357SAndroid Build Coastguard Worker break;
388*f6dc9357SAndroid Build Coastguard Worker }
389*f6dc9357SAndroid Build Coastguard Worker }
390*f6dc9357SAndroid Build Coastguard Worker }
391*f6dc9357SAndroid Build Coastguard Worker } // for (tree iterations)
392*f6dc9357SAndroid Build Coastguard Worker }
393*f6dc9357SAndroid Build Coastguard Worker pos++;
394*f6dc9357SAndroid Build Coastguard Worker _cyclicBufferPos++;
395*f6dc9357SAndroid Build Coastguard Worker cur++;
396*f6dc9357SAndroid Build Coastguard Worker }
397*f6dc9357SAndroid Build Coastguard Worker while (d < limit);
398*f6dc9357SAndroid Build Coastguard Worker *posRes = (UInt32)pos;
399*f6dc9357SAndroid Build Coastguard Worker return d;
400*f6dc9357SAndroid Build Coastguard Worker }
401*f6dc9357SAndroid Build Coastguard Worker
402*f6dc9357SAndroid Build Coastguard Worker
403*f6dc9357SAndroid Build Coastguard Worker
404*f6dc9357SAndroid Build Coastguard Worker /*
405*f6dc9357SAndroid Build Coastguard Worker typedef UInt32 uint32plus; // size_t
406*f6dc9357SAndroid Build Coastguard Worker
407*f6dc9357SAndroid Build Coastguard Worker UInt32 * Z7_FASTCALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
408*f6dc9357SAndroid Build Coastguard Worker UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
409*f6dc9357SAndroid Build Coastguard Worker size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
410*f6dc9357SAndroid Build Coastguard Worker UInt32 *posRes)
411*f6dc9357SAndroid Build Coastguard Worker {
412*f6dc9357SAndroid Build Coastguard Worker do // while (hash != size)
413*f6dc9357SAndroid Build Coastguard Worker {
414*f6dc9357SAndroid Build Coastguard Worker UInt32 delta;
415*f6dc9357SAndroid Build Coastguard Worker
416*f6dc9357SAndroid Build Coastguard Worker #ifndef cbs
417*f6dc9357SAndroid Build Coastguard Worker UInt32 cbs;
418*f6dc9357SAndroid Build Coastguard Worker #endif
419*f6dc9357SAndroid Build Coastguard Worker
420*f6dc9357SAndroid Build Coastguard Worker if (hash == size)
421*f6dc9357SAndroid Build Coastguard Worker break;
422*f6dc9357SAndroid Build Coastguard Worker
423*f6dc9357SAndroid Build Coastguard Worker delta = *hash++;
424*f6dc9357SAndroid Build Coastguard Worker
425*f6dc9357SAndroid Build Coastguard Worker if (delta == 0)
426*f6dc9357SAndroid Build Coastguard Worker return NULL;
427*f6dc9357SAndroid Build Coastguard Worker
428*f6dc9357SAndroid Build Coastguard Worker #ifndef cbs
429*f6dc9357SAndroid Build Coastguard Worker cbs = _cyclicBufferSize;
430*f6dc9357SAndroid Build Coastguard Worker if ((UInt32)pos < cbs)
431*f6dc9357SAndroid Build Coastguard Worker {
432*f6dc9357SAndroid Build Coastguard Worker if (delta > (UInt32)pos)
433*f6dc9357SAndroid Build Coastguard Worker return NULL;
434*f6dc9357SAndroid Build Coastguard Worker cbs = (UInt32)pos;
435*f6dc9357SAndroid Build Coastguard Worker }
436*f6dc9357SAndroid Build Coastguard Worker #endif
437*f6dc9357SAndroid Build Coastguard Worker
438*f6dc9357SAndroid Build Coastguard Worker if (delta >= cbs)
439*f6dc9357SAndroid Build Coastguard Worker {
440*f6dc9357SAndroid Build Coastguard Worker CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
441*f6dc9357SAndroid Build Coastguard Worker *d++ = 0;
442*f6dc9357SAndroid Build Coastguard Worker ptr1[0] = kEmptyHashValue;
443*f6dc9357SAndroid Build Coastguard Worker ptr1[1] = kEmptyHashValue;
444*f6dc9357SAndroid Build Coastguard Worker }
445*f6dc9357SAndroid Build Coastguard Worker else
446*f6dc9357SAndroid Build Coastguard Worker {
447*f6dc9357SAndroid Build Coastguard Worker CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
448*f6dc9357SAndroid Build Coastguard Worker CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
449*f6dc9357SAndroid Build Coastguard Worker UInt32 *_distances = ++d;
450*f6dc9357SAndroid Build Coastguard Worker uint32plus len0 = 0, len1 = 0;
451*f6dc9357SAndroid Build Coastguard Worker UInt32 cutValue = _cutValue;
452*f6dc9357SAndroid Build Coastguard Worker uint32plus maxLen = _maxLen;
453*f6dc9357SAndroid Build Coastguard Worker // lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
454*f6dc9357SAndroid Build Coastguard Worker
455*f6dc9357SAndroid Build Coastguard Worker for (LOG_ITER(g_NumIters_Tree++);;)
456*f6dc9357SAndroid Build Coastguard Worker {
457*f6dc9357SAndroid Build Coastguard Worker LOG_ITER(g_NumIters_Loop++);
458*f6dc9357SAndroid Build Coastguard Worker {
459*f6dc9357SAndroid Build Coastguard Worker // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
460*f6dc9357SAndroid Build Coastguard Worker CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
461*f6dc9357SAndroid Build Coastguard Worker + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
462*f6dc9357SAndroid Build Coastguard Worker ) << 1);
463*f6dc9357SAndroid Build Coastguard Worker const Byte *pb = cur - delta;
464*f6dc9357SAndroid Build Coastguard Worker uint32plus len = (len0 < len1 ? len0 : len1);
465*f6dc9357SAndroid Build Coastguard Worker
466*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_SON_PREFETCH
467*f6dc9357SAndroid Build Coastguard Worker const UInt32 pair0 = *pair;
468*f6dc9357SAndroid Build Coastguard Worker #endif
469*f6dc9357SAndroid Build Coastguard Worker
470*f6dc9357SAndroid Build Coastguard Worker if (pb[len] == cur[len])
471*f6dc9357SAndroid Build Coastguard Worker {
472*f6dc9357SAndroid Build Coastguard Worker if (++len != lenLimit && pb[len] == cur[len])
473*f6dc9357SAndroid Build Coastguard Worker while (++len != lenLimit)
474*f6dc9357SAndroid Build Coastguard Worker if (pb[len] != cur[len])
475*f6dc9357SAndroid Build Coastguard Worker break;
476*f6dc9357SAndroid Build Coastguard Worker if (maxLen < len)
477*f6dc9357SAndroid Build Coastguard Worker {
478*f6dc9357SAndroid Build Coastguard Worker maxLen = len;
479*f6dc9357SAndroid Build Coastguard Worker *d++ = (UInt32)len;
480*f6dc9357SAndroid Build Coastguard Worker *d++ = delta - 1;
481*f6dc9357SAndroid Build Coastguard Worker if (len == lenLimit)
482*f6dc9357SAndroid Build Coastguard Worker {
483*f6dc9357SAndroid Build Coastguard Worker {
484*f6dc9357SAndroid Build Coastguard Worker const UInt32 pair1 = pair[1];
485*f6dc9357SAndroid Build Coastguard Worker *ptr0 = pair1;
486*f6dc9357SAndroid Build Coastguard Worker *ptr1 =
487*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_SON_PREFETCH
488*f6dc9357SAndroid Build Coastguard Worker pair0;
489*f6dc9357SAndroid Build Coastguard Worker #else
490*f6dc9357SAndroid Build Coastguard Worker pair[0];
491*f6dc9357SAndroid Build Coastguard Worker #endif
492*f6dc9357SAndroid Build Coastguard Worker }
493*f6dc9357SAndroid Build Coastguard Worker
494*f6dc9357SAndroid Build Coastguard Worker _distances[-1] = (UInt32)(d - _distances);
495*f6dc9357SAndroid Build Coastguard Worker
496*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_LONG_MATCH_OPT
497*f6dc9357SAndroid Build Coastguard Worker
498*f6dc9357SAndroid Build Coastguard Worker if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
499*f6dc9357SAndroid Build Coastguard Worker break;
500*f6dc9357SAndroid Build Coastguard Worker
501*f6dc9357SAndroid Build Coastguard Worker {
502*f6dc9357SAndroid Build Coastguard Worker const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
503*f6dc9357SAndroid Build Coastguard Worker for (;;)
504*f6dc9357SAndroid Build Coastguard Worker {
505*f6dc9357SAndroid Build Coastguard Worker *d++ = 2;
506*f6dc9357SAndroid Build Coastguard Worker *d++ = (UInt32)lenLimit;
507*f6dc9357SAndroid Build Coastguard Worker *d++ = delta - 1;
508*f6dc9357SAndroid Build Coastguard Worker _cyclicBufferPos++;
509*f6dc9357SAndroid Build Coastguard Worker {
510*f6dc9357SAndroid Build Coastguard Worker CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
511*f6dc9357SAndroid Build Coastguard Worker const CLzRef *src = dest + ((diff +
512*f6dc9357SAndroid Build Coastguard Worker (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
513*f6dc9357SAndroid Build Coastguard Worker #if 0
514*f6dc9357SAndroid Build Coastguard Worker *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
515*f6dc9357SAndroid Build Coastguard Worker #else
516*f6dc9357SAndroid Build Coastguard Worker const UInt32 p0 = src[0];
517*f6dc9357SAndroid Build Coastguard Worker const UInt32 p1 = src[1];
518*f6dc9357SAndroid Build Coastguard Worker dest[0] = p0;
519*f6dc9357SAndroid Build Coastguard Worker dest[1] = p1;
520*f6dc9357SAndroid Build Coastguard Worker #endif
521*f6dc9357SAndroid Build Coastguard Worker }
522*f6dc9357SAndroid Build Coastguard Worker hash++;
523*f6dc9357SAndroid Build Coastguard Worker pos++;
524*f6dc9357SAndroid Build Coastguard Worker cur++;
525*f6dc9357SAndroid Build Coastguard Worker pb++;
526*f6dc9357SAndroid Build Coastguard Worker if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
527*f6dc9357SAndroid Build Coastguard Worker break;
528*f6dc9357SAndroid Build Coastguard Worker }
529*f6dc9357SAndroid Build Coastguard Worker }
530*f6dc9357SAndroid Build Coastguard Worker #endif
531*f6dc9357SAndroid Build Coastguard Worker
532*f6dc9357SAndroid Build Coastguard Worker break;
533*f6dc9357SAndroid Build Coastguard Worker }
534*f6dc9357SAndroid Build Coastguard Worker }
535*f6dc9357SAndroid Build Coastguard Worker }
536*f6dc9357SAndroid Build Coastguard Worker {
537*f6dc9357SAndroid Build Coastguard Worker const UInt32 curMatch = (UInt32)pos - delta;
538*f6dc9357SAndroid Build Coastguard Worker if (pb[len] < cur[len])
539*f6dc9357SAndroid Build Coastguard Worker {
540*f6dc9357SAndroid Build Coastguard Worker delta = pair[1];
541*f6dc9357SAndroid Build Coastguard Worker *ptr1 = curMatch;
542*f6dc9357SAndroid Build Coastguard Worker ptr1 = pair + 1;
543*f6dc9357SAndroid Build Coastguard Worker len1 = len;
544*f6dc9357SAndroid Build Coastguard Worker }
545*f6dc9357SAndroid Build Coastguard Worker else
546*f6dc9357SAndroid Build Coastguard Worker {
547*f6dc9357SAndroid Build Coastguard Worker delta = *pair;
548*f6dc9357SAndroid Build Coastguard Worker *ptr0 = curMatch;
549*f6dc9357SAndroid Build Coastguard Worker ptr0 = pair;
550*f6dc9357SAndroid Build Coastguard Worker len0 = len;
551*f6dc9357SAndroid Build Coastguard Worker }
552*f6dc9357SAndroid Build Coastguard Worker
553*f6dc9357SAndroid Build Coastguard Worker {
554*f6dc9357SAndroid Build Coastguard Worker if (delta >= curMatch)
555*f6dc9357SAndroid Build Coastguard Worker return NULL;
556*f6dc9357SAndroid Build Coastguard Worker delta = (UInt32)pos - delta;
557*f6dc9357SAndroid Build Coastguard Worker if (delta >= cbs
558*f6dc9357SAndroid Build Coastguard Worker // delta >= _cyclicBufferSize || delta >= pos
559*f6dc9357SAndroid Build Coastguard Worker || --cutValue == 0)
560*f6dc9357SAndroid Build Coastguard Worker {
561*f6dc9357SAndroid Build Coastguard Worker *ptr0 = *ptr1 = kEmptyHashValue;
562*f6dc9357SAndroid Build Coastguard Worker _distances[-1] = (UInt32)(d - _distances);
563*f6dc9357SAndroid Build Coastguard Worker break;
564*f6dc9357SAndroid Build Coastguard Worker }
565*f6dc9357SAndroid Build Coastguard Worker }
566*f6dc9357SAndroid Build Coastguard Worker }
567*f6dc9357SAndroid Build Coastguard Worker }
568*f6dc9357SAndroid Build Coastguard Worker } // for (tree iterations)
569*f6dc9357SAndroid Build Coastguard Worker }
570*f6dc9357SAndroid Build Coastguard Worker pos++;
571*f6dc9357SAndroid Build Coastguard Worker _cyclicBufferPos++;
572*f6dc9357SAndroid Build Coastguard Worker cur++;
573*f6dc9357SAndroid Build Coastguard Worker }
574*f6dc9357SAndroid Build Coastguard Worker while (d < limit);
575*f6dc9357SAndroid Build Coastguard Worker *posRes = (UInt32)pos;
576*f6dc9357SAndroid Build Coastguard Worker return d;
577*f6dc9357SAndroid Build Coastguard Worker }
578*f6dc9357SAndroid Build Coastguard Worker */
579