1*01826a49SYabin Cui /*
2*01826a49SYabin Cui * Copyright (c) Meta Platforms, Inc. and affiliates.
3*01826a49SYabin Cui * All rights reserved.
4*01826a49SYabin Cui *
5*01826a49SYabin Cui * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui */
10*01826a49SYabin Cui
11*01826a49SYabin Cui #include "zstd_compress_internal.h"
12*01826a49SYabin Cui #include "hist.h"
13*01826a49SYabin Cui #include "zstd_opt.h"
14*01826a49SYabin Cui
15*01826a49SYabin Cui #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
16*01826a49SYabin Cui || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
17*01826a49SYabin Cui || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
18*01826a49SYabin Cui
19*01826a49SYabin Cui #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
20*01826a49SYabin Cui #define ZSTD_MAX_PRICE (1<<30)
21*01826a49SYabin Cui
22*01826a49SYabin Cui #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
23*01826a49SYabin Cui
24*01826a49SYabin Cui
25*01826a49SYabin Cui /*-*************************************
26*01826a49SYabin Cui * Price functions for optimal parser
27*01826a49SYabin Cui ***************************************/
28*01826a49SYabin Cui
29*01826a49SYabin Cui #if 0 /* approximation at bit level (for tests) */
30*01826a49SYabin Cui # define BITCOST_ACCURACY 0
31*01826a49SYabin Cui # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
32*01826a49SYabin Cui # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
33*01826a49SYabin Cui #elif 0 /* fractional bit accuracy (for tests) */
34*01826a49SYabin Cui # define BITCOST_ACCURACY 8
35*01826a49SYabin Cui # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
36*01826a49SYabin Cui # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
37*01826a49SYabin Cui #else /* opt==approx, ultra==accurate */
38*01826a49SYabin Cui # define BITCOST_ACCURACY 8
39*01826a49SYabin Cui # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
40*01826a49SYabin Cui # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
41*01826a49SYabin Cui #endif
42*01826a49SYabin Cui
43*01826a49SYabin Cui /* ZSTD_bitWeight() :
44*01826a49SYabin Cui * provide estimated "cost" of a stat in full bits only */
ZSTD_bitWeight(U32 stat)45*01826a49SYabin Cui MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
46*01826a49SYabin Cui {
47*01826a49SYabin Cui return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
48*01826a49SYabin Cui }
49*01826a49SYabin Cui
50*01826a49SYabin Cui /* ZSTD_fracWeight() :
51*01826a49SYabin Cui * provide fractional-bit "cost" of a stat,
52*01826a49SYabin Cui * using linear interpolation approximation */
ZSTD_fracWeight(U32 rawStat)53*01826a49SYabin Cui MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
54*01826a49SYabin Cui {
55*01826a49SYabin Cui U32 const stat = rawStat + 1;
56*01826a49SYabin Cui U32 const hb = ZSTD_highbit32(stat);
57*01826a49SYabin Cui U32 const BWeight = hb * BITCOST_MULTIPLIER;
58*01826a49SYabin Cui /* Fweight was meant for "Fractional weight"
59*01826a49SYabin Cui * but it's effectively a value between 1 and 2
60*01826a49SYabin Cui * using fixed point arithmetic */
61*01826a49SYabin Cui U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
62*01826a49SYabin Cui U32 const weight = BWeight + FWeight;
63*01826a49SYabin Cui assert(hb + BITCOST_ACCURACY < 31);
64*01826a49SYabin Cui return weight;
65*01826a49SYabin Cui }
66*01826a49SYabin Cui
67*01826a49SYabin Cui #if (DEBUGLEVEL>=2)
68*01826a49SYabin Cui /* debugging function,
69*01826a49SYabin Cui * @return price in bytes as fractional value
70*01826a49SYabin Cui * for debug messages only */
ZSTD_fCost(int price)71*01826a49SYabin Cui MEM_STATIC double ZSTD_fCost(int price)
72*01826a49SYabin Cui {
73*01826a49SYabin Cui return (double)price / (BITCOST_MULTIPLIER*8);
74*01826a49SYabin Cui }
75*01826a49SYabin Cui #endif
76*01826a49SYabin Cui
ZSTD_compressedLiterals(optState_t const * const optPtr)77*01826a49SYabin Cui static int ZSTD_compressedLiterals(optState_t const* const optPtr)
78*01826a49SYabin Cui {
79*01826a49SYabin Cui return optPtr->literalCompressionMode != ZSTD_ps_disable;
80*01826a49SYabin Cui }
81*01826a49SYabin Cui
ZSTD_setBasePrices(optState_t * optPtr,int optLevel)82*01826a49SYabin Cui static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
83*01826a49SYabin Cui {
84*01826a49SYabin Cui if (ZSTD_compressedLiterals(optPtr))
85*01826a49SYabin Cui optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
86*01826a49SYabin Cui optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
87*01826a49SYabin Cui optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
88*01826a49SYabin Cui optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
89*01826a49SYabin Cui }
90*01826a49SYabin Cui
91*01826a49SYabin Cui
sum_u32(const unsigned table[],size_t nbElts)92*01826a49SYabin Cui static U32 sum_u32(const unsigned table[], size_t nbElts)
93*01826a49SYabin Cui {
94*01826a49SYabin Cui size_t n;
95*01826a49SYabin Cui U32 total = 0;
96*01826a49SYabin Cui for (n=0; n<nbElts; n++) {
97*01826a49SYabin Cui total += table[n];
98*01826a49SYabin Cui }
99*01826a49SYabin Cui return total;
100*01826a49SYabin Cui }
101*01826a49SYabin Cui
102*01826a49SYabin Cui typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
103*01826a49SYabin Cui
104*01826a49SYabin Cui static U32
ZSTD_downscaleStats(unsigned * table,U32 lastEltIndex,U32 shift,base_directive_e base1)105*01826a49SYabin Cui ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
106*01826a49SYabin Cui {
107*01826a49SYabin Cui U32 s, sum=0;
108*01826a49SYabin Cui DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
109*01826a49SYabin Cui (unsigned)lastEltIndex+1, (unsigned)shift );
110*01826a49SYabin Cui assert(shift < 30);
111*01826a49SYabin Cui for (s=0; s<lastEltIndex+1; s++) {
112*01826a49SYabin Cui unsigned const base = base1 ? 1 : (table[s]>0);
113*01826a49SYabin Cui unsigned const newStat = base + (table[s] >> shift);
114*01826a49SYabin Cui sum += newStat;
115*01826a49SYabin Cui table[s] = newStat;
116*01826a49SYabin Cui }
117*01826a49SYabin Cui return sum;
118*01826a49SYabin Cui }
119*01826a49SYabin Cui
120*01826a49SYabin Cui /* ZSTD_scaleStats() :
121*01826a49SYabin Cui * reduce all elt frequencies in table if sum too large
122*01826a49SYabin Cui * return the resulting sum of elements */
ZSTD_scaleStats(unsigned * table,U32 lastEltIndex,U32 logTarget)123*01826a49SYabin Cui static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
124*01826a49SYabin Cui {
125*01826a49SYabin Cui U32 const prevsum = sum_u32(table, lastEltIndex+1);
126*01826a49SYabin Cui U32 const factor = prevsum >> logTarget;
127*01826a49SYabin Cui DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
128*01826a49SYabin Cui assert(logTarget < 30);
129*01826a49SYabin Cui if (factor <= 1) return prevsum;
130*01826a49SYabin Cui return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
131*01826a49SYabin Cui }
132*01826a49SYabin Cui
133*01826a49SYabin Cui /* ZSTD_rescaleFreqs() :
134*01826a49SYabin Cui * if first block (detected by optPtr->litLengthSum == 0) : init statistics
135*01826a49SYabin Cui * take hints from dictionary if there is one
136*01826a49SYabin Cui * and init from zero if there is none,
137*01826a49SYabin Cui * using src for literals stats, and baseline stats for sequence symbols
138*01826a49SYabin Cui * otherwise downscale existing stats, to be used as seed for next block.
139*01826a49SYabin Cui */
140*01826a49SYabin Cui static void
ZSTD_rescaleFreqs(optState_t * const optPtr,const BYTE * const src,size_t const srcSize,int const optLevel)141*01826a49SYabin Cui ZSTD_rescaleFreqs(optState_t* const optPtr,
142*01826a49SYabin Cui const BYTE* const src, size_t const srcSize,
143*01826a49SYabin Cui int const optLevel)
144*01826a49SYabin Cui {
145*01826a49SYabin Cui int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
146*01826a49SYabin Cui DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
147*01826a49SYabin Cui optPtr->priceType = zop_dynamic;
148*01826a49SYabin Cui
149*01826a49SYabin Cui if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
150*01826a49SYabin Cui
151*01826a49SYabin Cui /* heuristic: use pre-defined stats for too small inputs */
152*01826a49SYabin Cui if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
153*01826a49SYabin Cui DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
154*01826a49SYabin Cui optPtr->priceType = zop_predef;
155*01826a49SYabin Cui }
156*01826a49SYabin Cui
157*01826a49SYabin Cui assert(optPtr->symbolCosts != NULL);
158*01826a49SYabin Cui if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
159*01826a49SYabin Cui
160*01826a49SYabin Cui /* huffman stats covering the full value set : table presumed generated by dictionary */
161*01826a49SYabin Cui optPtr->priceType = zop_dynamic;
162*01826a49SYabin Cui
163*01826a49SYabin Cui if (compressedLiterals) {
164*01826a49SYabin Cui /* generate literals statistics from huffman table */
165*01826a49SYabin Cui unsigned lit;
166*01826a49SYabin Cui assert(optPtr->litFreq != NULL);
167*01826a49SYabin Cui optPtr->litSum = 0;
168*01826a49SYabin Cui for (lit=0; lit<=MaxLit; lit++) {
169*01826a49SYabin Cui U32 const scaleLog = 11; /* scale to 2K */
170*01826a49SYabin Cui U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
171*01826a49SYabin Cui assert(bitCost <= scaleLog);
172*01826a49SYabin Cui optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
173*01826a49SYabin Cui optPtr->litSum += optPtr->litFreq[lit];
174*01826a49SYabin Cui } }
175*01826a49SYabin Cui
176*01826a49SYabin Cui { unsigned ll;
177*01826a49SYabin Cui FSE_CState_t llstate;
178*01826a49SYabin Cui FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
179*01826a49SYabin Cui optPtr->litLengthSum = 0;
180*01826a49SYabin Cui for (ll=0; ll<=MaxLL; ll++) {
181*01826a49SYabin Cui U32 const scaleLog = 10; /* scale to 1K */
182*01826a49SYabin Cui U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
183*01826a49SYabin Cui assert(bitCost < scaleLog);
184*01826a49SYabin Cui optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
185*01826a49SYabin Cui optPtr->litLengthSum += optPtr->litLengthFreq[ll];
186*01826a49SYabin Cui } }
187*01826a49SYabin Cui
188*01826a49SYabin Cui { unsigned ml;
189*01826a49SYabin Cui FSE_CState_t mlstate;
190*01826a49SYabin Cui FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
191*01826a49SYabin Cui optPtr->matchLengthSum = 0;
192*01826a49SYabin Cui for (ml=0; ml<=MaxML; ml++) {
193*01826a49SYabin Cui U32 const scaleLog = 10;
194*01826a49SYabin Cui U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
195*01826a49SYabin Cui assert(bitCost < scaleLog);
196*01826a49SYabin Cui optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
197*01826a49SYabin Cui optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
198*01826a49SYabin Cui } }
199*01826a49SYabin Cui
200*01826a49SYabin Cui { unsigned of;
201*01826a49SYabin Cui FSE_CState_t ofstate;
202*01826a49SYabin Cui FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
203*01826a49SYabin Cui optPtr->offCodeSum = 0;
204*01826a49SYabin Cui for (of=0; of<=MaxOff; of++) {
205*01826a49SYabin Cui U32 const scaleLog = 10;
206*01826a49SYabin Cui U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
207*01826a49SYabin Cui assert(bitCost < scaleLog);
208*01826a49SYabin Cui optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
209*01826a49SYabin Cui optPtr->offCodeSum += optPtr->offCodeFreq[of];
210*01826a49SYabin Cui } }
211*01826a49SYabin Cui
212*01826a49SYabin Cui } else { /* first block, no dictionary */
213*01826a49SYabin Cui
214*01826a49SYabin Cui assert(optPtr->litFreq != NULL);
215*01826a49SYabin Cui if (compressedLiterals) {
216*01826a49SYabin Cui /* base initial cost of literals on direct frequency within src */
217*01826a49SYabin Cui unsigned lit = MaxLit;
218*01826a49SYabin Cui HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
219*01826a49SYabin Cui optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
220*01826a49SYabin Cui }
221*01826a49SYabin Cui
222*01826a49SYabin Cui { unsigned const baseLLfreqs[MaxLL+1] = {
223*01826a49SYabin Cui 4, 2, 1, 1, 1, 1, 1, 1,
224*01826a49SYabin Cui 1, 1, 1, 1, 1, 1, 1, 1,
225*01826a49SYabin Cui 1, 1, 1, 1, 1, 1, 1, 1,
226*01826a49SYabin Cui 1, 1, 1, 1, 1, 1, 1, 1,
227*01826a49SYabin Cui 1, 1, 1, 1
228*01826a49SYabin Cui };
229*01826a49SYabin Cui ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
230*01826a49SYabin Cui optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
231*01826a49SYabin Cui }
232*01826a49SYabin Cui
233*01826a49SYabin Cui { unsigned ml;
234*01826a49SYabin Cui for (ml=0; ml<=MaxML; ml++)
235*01826a49SYabin Cui optPtr->matchLengthFreq[ml] = 1;
236*01826a49SYabin Cui }
237*01826a49SYabin Cui optPtr->matchLengthSum = MaxML+1;
238*01826a49SYabin Cui
239*01826a49SYabin Cui { unsigned const baseOFCfreqs[MaxOff+1] = {
240*01826a49SYabin Cui 6, 2, 1, 1, 2, 3, 4, 4,
241*01826a49SYabin Cui 4, 3, 2, 1, 1, 1, 1, 1,
242*01826a49SYabin Cui 1, 1, 1, 1, 1, 1, 1, 1,
243*01826a49SYabin Cui 1, 1, 1, 1, 1, 1, 1, 1
244*01826a49SYabin Cui };
245*01826a49SYabin Cui ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
246*01826a49SYabin Cui optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
247*01826a49SYabin Cui }
248*01826a49SYabin Cui
249*01826a49SYabin Cui }
250*01826a49SYabin Cui
251*01826a49SYabin Cui } else { /* new block : scale down accumulated statistics */
252*01826a49SYabin Cui
253*01826a49SYabin Cui if (compressedLiterals)
254*01826a49SYabin Cui optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
255*01826a49SYabin Cui optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
256*01826a49SYabin Cui optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
257*01826a49SYabin Cui optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
258*01826a49SYabin Cui }
259*01826a49SYabin Cui
260*01826a49SYabin Cui ZSTD_setBasePrices(optPtr, optLevel);
261*01826a49SYabin Cui }
262*01826a49SYabin Cui
263*01826a49SYabin Cui /* ZSTD_rawLiteralsCost() :
264*01826a49SYabin Cui * price of literals (only) in specified segment (which length can be 0).
265*01826a49SYabin Cui * does not include price of literalLength symbol */
ZSTD_rawLiteralsCost(const BYTE * const literals,U32 const litLength,const optState_t * const optPtr,int optLevel)266*01826a49SYabin Cui static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
267*01826a49SYabin Cui const optState_t* const optPtr,
268*01826a49SYabin Cui int optLevel)
269*01826a49SYabin Cui {
270*01826a49SYabin Cui DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
271*01826a49SYabin Cui if (litLength == 0) return 0;
272*01826a49SYabin Cui
273*01826a49SYabin Cui if (!ZSTD_compressedLiterals(optPtr))
274*01826a49SYabin Cui return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
275*01826a49SYabin Cui
276*01826a49SYabin Cui if (optPtr->priceType == zop_predef)
277*01826a49SYabin Cui return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
278*01826a49SYabin Cui
279*01826a49SYabin Cui /* dynamic statistics */
280*01826a49SYabin Cui { U32 price = optPtr->litSumBasePrice * litLength;
281*01826a49SYabin Cui U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
282*01826a49SYabin Cui U32 u;
283*01826a49SYabin Cui assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
284*01826a49SYabin Cui for (u=0; u < litLength; u++) {
285*01826a49SYabin Cui U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
286*01826a49SYabin Cui if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
287*01826a49SYabin Cui price -= litPrice;
288*01826a49SYabin Cui }
289*01826a49SYabin Cui return price;
290*01826a49SYabin Cui }
291*01826a49SYabin Cui }
292*01826a49SYabin Cui
293*01826a49SYabin Cui /* ZSTD_litLengthPrice() :
294*01826a49SYabin Cui * cost of literalLength symbol */
ZSTD_litLengthPrice(U32 const litLength,const optState_t * const optPtr,int optLevel)295*01826a49SYabin Cui static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
296*01826a49SYabin Cui {
297*01826a49SYabin Cui assert(litLength <= ZSTD_BLOCKSIZE_MAX);
298*01826a49SYabin Cui if (optPtr->priceType == zop_predef)
299*01826a49SYabin Cui return WEIGHT(litLength, optLevel);
300*01826a49SYabin Cui
301*01826a49SYabin Cui /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
302*01826a49SYabin Cui * because it isn't representable in the zstd format.
303*01826a49SYabin Cui * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
304*01826a49SYabin Cui * In such a case, the block would be all literals.
305*01826a49SYabin Cui */
306*01826a49SYabin Cui if (litLength == ZSTD_BLOCKSIZE_MAX)
307*01826a49SYabin Cui return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
308*01826a49SYabin Cui
309*01826a49SYabin Cui /* dynamic statistics */
310*01826a49SYabin Cui { U32 const llCode = ZSTD_LLcode(litLength);
311*01826a49SYabin Cui return (LL_bits[llCode] * BITCOST_MULTIPLIER)
312*01826a49SYabin Cui + optPtr->litLengthSumBasePrice
313*01826a49SYabin Cui - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
314*01826a49SYabin Cui }
315*01826a49SYabin Cui }
316*01826a49SYabin Cui
317*01826a49SYabin Cui /* ZSTD_getMatchPrice() :
318*01826a49SYabin Cui * Provides the cost of the match part (offset + matchLength) of a sequence.
319*01826a49SYabin Cui * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
320*01826a49SYabin Cui * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
321*01826a49SYabin Cui * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
322*01826a49SYabin Cui */
323*01826a49SYabin Cui FORCE_INLINE_TEMPLATE U32
ZSTD_getMatchPrice(U32 const offBase,U32 const matchLength,const optState_t * const optPtr,int const optLevel)324*01826a49SYabin Cui ZSTD_getMatchPrice(U32 const offBase,
325*01826a49SYabin Cui U32 const matchLength,
326*01826a49SYabin Cui const optState_t* const optPtr,
327*01826a49SYabin Cui int const optLevel)
328*01826a49SYabin Cui {
329*01826a49SYabin Cui U32 price;
330*01826a49SYabin Cui U32 const offCode = ZSTD_highbit32(offBase);
331*01826a49SYabin Cui U32 const mlBase = matchLength - MINMATCH;
332*01826a49SYabin Cui assert(matchLength >= MINMATCH);
333*01826a49SYabin Cui
334*01826a49SYabin Cui if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
335*01826a49SYabin Cui return WEIGHT(mlBase, optLevel)
336*01826a49SYabin Cui + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
337*01826a49SYabin Cui
338*01826a49SYabin Cui /* dynamic statistics */
339*01826a49SYabin Cui price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
340*01826a49SYabin Cui if ((optLevel<2) /*static*/ && offCode >= 20)
341*01826a49SYabin Cui price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
342*01826a49SYabin Cui
343*01826a49SYabin Cui /* match Length */
344*01826a49SYabin Cui { U32 const mlCode = ZSTD_MLcode(mlBase);
345*01826a49SYabin Cui price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
346*01826a49SYabin Cui }
347*01826a49SYabin Cui
348*01826a49SYabin Cui price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
349*01826a49SYabin Cui
350*01826a49SYabin Cui DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
351*01826a49SYabin Cui return price;
352*01826a49SYabin Cui }
353*01826a49SYabin Cui
354*01826a49SYabin Cui /* ZSTD_updateStats() :
355*01826a49SYabin Cui * assumption : literals + litLength <= iend */
ZSTD_updateStats(optState_t * const optPtr,U32 litLength,const BYTE * literals,U32 offBase,U32 matchLength)356*01826a49SYabin Cui static void ZSTD_updateStats(optState_t* const optPtr,
357*01826a49SYabin Cui U32 litLength, const BYTE* literals,
358*01826a49SYabin Cui U32 offBase, U32 matchLength)
359*01826a49SYabin Cui {
360*01826a49SYabin Cui /* literals */
361*01826a49SYabin Cui if (ZSTD_compressedLiterals(optPtr)) {
362*01826a49SYabin Cui U32 u;
363*01826a49SYabin Cui for (u=0; u < litLength; u++)
364*01826a49SYabin Cui optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
365*01826a49SYabin Cui optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
366*01826a49SYabin Cui }
367*01826a49SYabin Cui
368*01826a49SYabin Cui /* literal Length */
369*01826a49SYabin Cui { U32 const llCode = ZSTD_LLcode(litLength);
370*01826a49SYabin Cui optPtr->litLengthFreq[llCode]++;
371*01826a49SYabin Cui optPtr->litLengthSum++;
372*01826a49SYabin Cui }
373*01826a49SYabin Cui
374*01826a49SYabin Cui /* offset code : follows storeSeq() numeric representation */
375*01826a49SYabin Cui { U32 const offCode = ZSTD_highbit32(offBase);
376*01826a49SYabin Cui assert(offCode <= MaxOff);
377*01826a49SYabin Cui optPtr->offCodeFreq[offCode]++;
378*01826a49SYabin Cui optPtr->offCodeSum++;
379*01826a49SYabin Cui }
380*01826a49SYabin Cui
381*01826a49SYabin Cui /* match Length */
382*01826a49SYabin Cui { U32 const mlBase = matchLength - MINMATCH;
383*01826a49SYabin Cui U32 const mlCode = ZSTD_MLcode(mlBase);
384*01826a49SYabin Cui optPtr->matchLengthFreq[mlCode]++;
385*01826a49SYabin Cui optPtr->matchLengthSum++;
386*01826a49SYabin Cui }
387*01826a49SYabin Cui }
388*01826a49SYabin Cui
389*01826a49SYabin Cui
390*01826a49SYabin Cui /* ZSTD_readMINMATCH() :
391*01826a49SYabin Cui * function safe only for comparisons
392*01826a49SYabin Cui * assumption : memPtr must be at least 4 bytes before end of buffer */
ZSTD_readMINMATCH(const void * memPtr,U32 length)393*01826a49SYabin Cui MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
394*01826a49SYabin Cui {
395*01826a49SYabin Cui switch (length)
396*01826a49SYabin Cui {
397*01826a49SYabin Cui default :
398*01826a49SYabin Cui case 4 : return MEM_read32(memPtr);
399*01826a49SYabin Cui case 3 : if (MEM_isLittleEndian())
400*01826a49SYabin Cui return MEM_read32(memPtr)<<8;
401*01826a49SYabin Cui else
402*01826a49SYabin Cui return MEM_read32(memPtr)>>8;
403*01826a49SYabin Cui }
404*01826a49SYabin Cui }
405*01826a49SYabin Cui
406*01826a49SYabin Cui
407*01826a49SYabin Cui /* Update hashTable3 up to ip (excluded)
408*01826a49SYabin Cui Assumption : always within prefix (i.e. not within extDict) */
409*01826a49SYabin Cui static
410*01826a49SYabin Cui ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_insertAndFindFirstIndexHash3(const ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip)411*01826a49SYabin Cui U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
412*01826a49SYabin Cui U32* nextToUpdate3,
413*01826a49SYabin Cui const BYTE* const ip)
414*01826a49SYabin Cui {
415*01826a49SYabin Cui U32* const hashTable3 = ms->hashTable3;
416*01826a49SYabin Cui U32 const hashLog3 = ms->hashLog3;
417*01826a49SYabin Cui const BYTE* const base = ms->window.base;
418*01826a49SYabin Cui U32 idx = *nextToUpdate3;
419*01826a49SYabin Cui U32 const target = (U32)(ip - base);
420*01826a49SYabin Cui size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
421*01826a49SYabin Cui assert(hashLog3 > 0);
422*01826a49SYabin Cui
423*01826a49SYabin Cui while(idx < target) {
424*01826a49SYabin Cui hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
425*01826a49SYabin Cui idx++;
426*01826a49SYabin Cui }
427*01826a49SYabin Cui
428*01826a49SYabin Cui *nextToUpdate3 = target;
429*01826a49SYabin Cui return hashTable3[hash3];
430*01826a49SYabin Cui }
431*01826a49SYabin Cui
432*01826a49SYabin Cui
433*01826a49SYabin Cui /*-*************************************
434*01826a49SYabin Cui * Binary Tree search
435*01826a49SYabin Cui ***************************************/
436*01826a49SYabin Cui /** ZSTD_insertBt1() : add one or multiple positions to tree.
437*01826a49SYabin Cui * @param ip assumed <= iend-8 .
438*01826a49SYabin Cui * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
439*01826a49SYabin Cui * @return : nb of positions added */
440*01826a49SYabin Cui static
441*01826a49SYabin Cui ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_insertBt1(const ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,U32 const target,U32 const mls,const int extDict)442*01826a49SYabin Cui U32 ZSTD_insertBt1(
443*01826a49SYabin Cui const ZSTD_matchState_t* ms,
444*01826a49SYabin Cui const BYTE* const ip, const BYTE* const iend,
445*01826a49SYabin Cui U32 const target,
446*01826a49SYabin Cui U32 const mls, const int extDict)
447*01826a49SYabin Cui {
448*01826a49SYabin Cui const ZSTD_compressionParameters* const cParams = &ms->cParams;
449*01826a49SYabin Cui U32* const hashTable = ms->hashTable;
450*01826a49SYabin Cui U32 const hashLog = cParams->hashLog;
451*01826a49SYabin Cui size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
452*01826a49SYabin Cui U32* const bt = ms->chainTable;
453*01826a49SYabin Cui U32 const btLog = cParams->chainLog - 1;
454*01826a49SYabin Cui U32 const btMask = (1 << btLog) - 1;
455*01826a49SYabin Cui U32 matchIndex = hashTable[h];
456*01826a49SYabin Cui size_t commonLengthSmaller=0, commonLengthLarger=0;
457*01826a49SYabin Cui const BYTE* const base = ms->window.base;
458*01826a49SYabin Cui const BYTE* const dictBase = ms->window.dictBase;
459*01826a49SYabin Cui const U32 dictLimit = ms->window.dictLimit;
460*01826a49SYabin Cui const BYTE* const dictEnd = dictBase + dictLimit;
461*01826a49SYabin Cui const BYTE* const prefixStart = base + dictLimit;
462*01826a49SYabin Cui const BYTE* match;
463*01826a49SYabin Cui const U32 curr = (U32)(ip-base);
464*01826a49SYabin Cui const U32 btLow = btMask >= curr ? 0 : curr - btMask;
465*01826a49SYabin Cui U32* smallerPtr = bt + 2*(curr&btMask);
466*01826a49SYabin Cui U32* largerPtr = smallerPtr + 1;
467*01826a49SYabin Cui U32 dummy32; /* to be nullified at the end */
468*01826a49SYabin Cui /* windowLow is based on target because
469*01826a49SYabin Cui * we only need positions that will be in the window at the end of the tree update.
470*01826a49SYabin Cui */
471*01826a49SYabin Cui U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
472*01826a49SYabin Cui U32 matchEndIdx = curr+8+1;
473*01826a49SYabin Cui size_t bestLength = 8;
474*01826a49SYabin Cui U32 nbCompares = 1U << cParams->searchLog;
475*01826a49SYabin Cui #ifdef ZSTD_C_PREDICT
476*01826a49SYabin Cui U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
477*01826a49SYabin Cui U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
478*01826a49SYabin Cui predictedSmall += (predictedSmall>0);
479*01826a49SYabin Cui predictedLarge += (predictedLarge>0);
480*01826a49SYabin Cui #endif /* ZSTD_C_PREDICT */
481*01826a49SYabin Cui
482*01826a49SYabin Cui DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
483*01826a49SYabin Cui
484*01826a49SYabin Cui assert(curr <= target);
485*01826a49SYabin Cui assert(ip <= iend-8); /* required for h calculation */
486*01826a49SYabin Cui hashTable[h] = curr; /* Update Hash Table */
487*01826a49SYabin Cui
488*01826a49SYabin Cui assert(windowLow > 0);
489*01826a49SYabin Cui for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
490*01826a49SYabin Cui U32* const nextPtr = bt + 2*(matchIndex & btMask);
491*01826a49SYabin Cui size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
492*01826a49SYabin Cui assert(matchIndex < curr);
493*01826a49SYabin Cui
494*01826a49SYabin Cui #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
495*01826a49SYabin Cui const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
496*01826a49SYabin Cui if (matchIndex == predictedSmall) {
497*01826a49SYabin Cui /* no need to check length, result known */
498*01826a49SYabin Cui *smallerPtr = matchIndex;
499*01826a49SYabin Cui if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
500*01826a49SYabin Cui smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
501*01826a49SYabin Cui matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
502*01826a49SYabin Cui predictedSmall = predictPtr[1] + (predictPtr[1]>0);
503*01826a49SYabin Cui continue;
504*01826a49SYabin Cui }
505*01826a49SYabin Cui if (matchIndex == predictedLarge) {
506*01826a49SYabin Cui *largerPtr = matchIndex;
507*01826a49SYabin Cui if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
508*01826a49SYabin Cui largerPtr = nextPtr;
509*01826a49SYabin Cui matchIndex = nextPtr[0];
510*01826a49SYabin Cui predictedLarge = predictPtr[0] + (predictPtr[0]>0);
511*01826a49SYabin Cui continue;
512*01826a49SYabin Cui }
513*01826a49SYabin Cui #endif
514*01826a49SYabin Cui
515*01826a49SYabin Cui if (!extDict || (matchIndex+matchLength >= dictLimit)) {
516*01826a49SYabin Cui assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
517*01826a49SYabin Cui match = base + matchIndex;
518*01826a49SYabin Cui matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
519*01826a49SYabin Cui } else {
520*01826a49SYabin Cui match = dictBase + matchIndex;
521*01826a49SYabin Cui matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
522*01826a49SYabin Cui if (matchIndex+matchLength >= dictLimit)
523*01826a49SYabin Cui match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
524*01826a49SYabin Cui }
525*01826a49SYabin Cui
526*01826a49SYabin Cui if (matchLength > bestLength) {
527*01826a49SYabin Cui bestLength = matchLength;
528*01826a49SYabin Cui if (matchLength > matchEndIdx - matchIndex)
529*01826a49SYabin Cui matchEndIdx = matchIndex + (U32)matchLength;
530*01826a49SYabin Cui }
531*01826a49SYabin Cui
532*01826a49SYabin Cui if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
533*01826a49SYabin Cui break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
534*01826a49SYabin Cui }
535*01826a49SYabin Cui
536*01826a49SYabin Cui if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
537*01826a49SYabin Cui /* match is smaller than current */
538*01826a49SYabin Cui *smallerPtr = matchIndex; /* update smaller idx */
539*01826a49SYabin Cui commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
540*01826a49SYabin Cui if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
541*01826a49SYabin Cui smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
542*01826a49SYabin Cui matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
543*01826a49SYabin Cui } else {
544*01826a49SYabin Cui /* match is larger than current */
545*01826a49SYabin Cui *largerPtr = matchIndex;
546*01826a49SYabin Cui commonLengthLarger = matchLength;
547*01826a49SYabin Cui if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
548*01826a49SYabin Cui largerPtr = nextPtr;
549*01826a49SYabin Cui matchIndex = nextPtr[0];
550*01826a49SYabin Cui } }
551*01826a49SYabin Cui
552*01826a49SYabin Cui *smallerPtr = *largerPtr = 0;
553*01826a49SYabin Cui { U32 positions = 0;
554*01826a49SYabin Cui if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
555*01826a49SYabin Cui assert(matchEndIdx > curr + 8);
556*01826a49SYabin Cui return MAX(positions, matchEndIdx - (curr + 8));
557*01826a49SYabin Cui }
558*01826a49SYabin Cui }
559*01826a49SYabin Cui
560*01826a49SYabin Cui FORCE_INLINE_TEMPLATE
561*01826a49SYabin Cui ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_updateTree_internal(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,const U32 mls,const ZSTD_dictMode_e dictMode)562*01826a49SYabin Cui void ZSTD_updateTree_internal(
563*01826a49SYabin Cui ZSTD_matchState_t* ms,
564*01826a49SYabin Cui const BYTE* const ip, const BYTE* const iend,
565*01826a49SYabin Cui const U32 mls, const ZSTD_dictMode_e dictMode)
566*01826a49SYabin Cui {
567*01826a49SYabin Cui const BYTE* const base = ms->window.base;
568*01826a49SYabin Cui U32 const target = (U32)(ip - base);
569*01826a49SYabin Cui U32 idx = ms->nextToUpdate;
570*01826a49SYabin Cui DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
571*01826a49SYabin Cui idx, target, dictMode);
572*01826a49SYabin Cui
573*01826a49SYabin Cui while(idx < target) {
574*01826a49SYabin Cui U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
575*01826a49SYabin Cui assert(idx < (U32)(idx + forward));
576*01826a49SYabin Cui idx += forward;
577*01826a49SYabin Cui }
578*01826a49SYabin Cui assert((size_t)(ip - base) <= (size_t)(U32)(-1));
579*01826a49SYabin Cui assert((size_t)(iend - base) <= (size_t)(U32)(-1));
580*01826a49SYabin Cui ms->nextToUpdate = target;
581*01826a49SYabin Cui }
582*01826a49SYabin Cui
ZSTD_updateTree(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * iend)583*01826a49SYabin Cui void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
584*01826a49SYabin Cui ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
585*01826a49SYabin Cui }
586*01826a49SYabin Cui
587*01826a49SYabin Cui FORCE_INLINE_TEMPLATE
588*01826a49SYabin Cui ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
589*01826a49SYabin Cui U32
ZSTD_insertBtAndGetAllMatches(ZSTD_match_t * matches,ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip,const BYTE * const iLimit,const ZSTD_dictMode_e dictMode,const U32 rep[ZSTD_REP_NUM],const U32 ll0,const U32 lengthToBeat,const U32 mls)590*01826a49SYabin Cui ZSTD_insertBtAndGetAllMatches (
591*01826a49SYabin Cui ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
592*01826a49SYabin Cui ZSTD_matchState_t* ms,
593*01826a49SYabin Cui U32* nextToUpdate3,
594*01826a49SYabin Cui const BYTE* const ip, const BYTE* const iLimit,
595*01826a49SYabin Cui const ZSTD_dictMode_e dictMode,
596*01826a49SYabin Cui const U32 rep[ZSTD_REP_NUM],
597*01826a49SYabin Cui const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
598*01826a49SYabin Cui const U32 lengthToBeat,
599*01826a49SYabin Cui const U32 mls /* template */)
600*01826a49SYabin Cui {
601*01826a49SYabin Cui const ZSTD_compressionParameters* const cParams = &ms->cParams;
602*01826a49SYabin Cui U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
603*01826a49SYabin Cui const BYTE* const base = ms->window.base;
604*01826a49SYabin Cui U32 const curr = (U32)(ip-base);
605*01826a49SYabin Cui U32 const hashLog = cParams->hashLog;
606*01826a49SYabin Cui U32 const minMatch = (mls==3) ? 3 : 4;
607*01826a49SYabin Cui U32* const hashTable = ms->hashTable;
608*01826a49SYabin Cui size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
609*01826a49SYabin Cui U32 matchIndex = hashTable[h];
610*01826a49SYabin Cui U32* const bt = ms->chainTable;
611*01826a49SYabin Cui U32 const btLog = cParams->chainLog - 1;
612*01826a49SYabin Cui U32 const btMask= (1U << btLog) - 1;
613*01826a49SYabin Cui size_t commonLengthSmaller=0, commonLengthLarger=0;
614*01826a49SYabin Cui const BYTE* const dictBase = ms->window.dictBase;
615*01826a49SYabin Cui U32 const dictLimit = ms->window.dictLimit;
616*01826a49SYabin Cui const BYTE* const dictEnd = dictBase + dictLimit;
617*01826a49SYabin Cui const BYTE* const prefixStart = base + dictLimit;
618*01826a49SYabin Cui U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
619*01826a49SYabin Cui U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
620*01826a49SYabin Cui U32 const matchLow = windowLow ? windowLow : 1;
621*01826a49SYabin Cui U32* smallerPtr = bt + 2*(curr&btMask);
622*01826a49SYabin Cui U32* largerPtr = bt + 2*(curr&btMask) + 1;
623*01826a49SYabin Cui U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
624*01826a49SYabin Cui U32 dummy32; /* to be nullified at the end */
625*01826a49SYabin Cui U32 mnum = 0;
626*01826a49SYabin Cui U32 nbCompares = 1U << cParams->searchLog;
627*01826a49SYabin Cui
628*01826a49SYabin Cui const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
629*01826a49SYabin Cui const ZSTD_compressionParameters* const dmsCParams =
630*01826a49SYabin Cui dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
631*01826a49SYabin Cui const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
632*01826a49SYabin Cui const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
633*01826a49SYabin Cui U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
634*01826a49SYabin Cui U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
635*01826a49SYabin Cui U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
636*01826a49SYabin Cui U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
637*01826a49SYabin Cui U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
638*01826a49SYabin Cui U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
639*01826a49SYabin Cui U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
640*01826a49SYabin Cui
641*01826a49SYabin Cui size_t bestLength = lengthToBeat-1;
642*01826a49SYabin Cui DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
643*01826a49SYabin Cui
644*01826a49SYabin Cui /* check repCode */
645*01826a49SYabin Cui assert(ll0 <= 1); /* necessarily 1 or 0 */
646*01826a49SYabin Cui { U32 const lastR = ZSTD_REP_NUM + ll0;
647*01826a49SYabin Cui U32 repCode;
648*01826a49SYabin Cui for (repCode = ll0; repCode < lastR; repCode++) {
649*01826a49SYabin Cui U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
650*01826a49SYabin Cui U32 const repIndex = curr - repOffset;
651*01826a49SYabin Cui U32 repLen = 0;
652*01826a49SYabin Cui assert(curr >= dictLimit);
653*01826a49SYabin Cui if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
654*01826a49SYabin Cui /* We must validate the repcode offset because when we're using a dictionary the
655*01826a49SYabin Cui * valid offset range shrinks when the dictionary goes out of bounds.
656*01826a49SYabin Cui */
657*01826a49SYabin Cui if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
658*01826a49SYabin Cui repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
659*01826a49SYabin Cui }
660*01826a49SYabin Cui } else { /* repIndex < dictLimit || repIndex >= curr */
661*01826a49SYabin Cui const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
662*01826a49SYabin Cui dmsBase + repIndex - dmsIndexDelta :
663*01826a49SYabin Cui dictBase + repIndex;
664*01826a49SYabin Cui assert(curr >= windowLow);
665*01826a49SYabin Cui if ( dictMode == ZSTD_extDict
666*01826a49SYabin Cui && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
667*01826a49SYabin Cui & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
668*01826a49SYabin Cui && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
669*01826a49SYabin Cui repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
670*01826a49SYabin Cui }
671*01826a49SYabin Cui if (dictMode == ZSTD_dictMatchState
672*01826a49SYabin Cui && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
673*01826a49SYabin Cui & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
674*01826a49SYabin Cui && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
675*01826a49SYabin Cui repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
676*01826a49SYabin Cui } }
677*01826a49SYabin Cui /* save longer solution */
678*01826a49SYabin Cui if (repLen > bestLength) {
679*01826a49SYabin Cui DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
680*01826a49SYabin Cui repCode, ll0, repOffset, repLen);
681*01826a49SYabin Cui bestLength = repLen;
682*01826a49SYabin Cui matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
683*01826a49SYabin Cui matches[mnum].len = (U32)repLen;
684*01826a49SYabin Cui mnum++;
685*01826a49SYabin Cui if ( (repLen > sufficient_len)
686*01826a49SYabin Cui | (ip+repLen == iLimit) ) { /* best possible */
687*01826a49SYabin Cui return mnum;
688*01826a49SYabin Cui } } } }
689*01826a49SYabin Cui
690*01826a49SYabin Cui /* HC3 match finder */
691*01826a49SYabin Cui if ((mls == 3) /*static*/ && (bestLength < mls)) {
692*01826a49SYabin Cui U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
693*01826a49SYabin Cui if ((matchIndex3 >= matchLow)
694*01826a49SYabin Cui & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
695*01826a49SYabin Cui size_t mlen;
696*01826a49SYabin Cui if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
697*01826a49SYabin Cui const BYTE* const match = base + matchIndex3;
698*01826a49SYabin Cui mlen = ZSTD_count(ip, match, iLimit);
699*01826a49SYabin Cui } else {
700*01826a49SYabin Cui const BYTE* const match = dictBase + matchIndex3;
701*01826a49SYabin Cui mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
702*01826a49SYabin Cui }
703*01826a49SYabin Cui
704*01826a49SYabin Cui /* save best solution */
705*01826a49SYabin Cui if (mlen >= mls /* == 3 > bestLength */) {
706*01826a49SYabin Cui DEBUGLOG(8, "found small match with hlog3, of length %u",
707*01826a49SYabin Cui (U32)mlen);
708*01826a49SYabin Cui bestLength = mlen;
709*01826a49SYabin Cui assert(curr > matchIndex3);
710*01826a49SYabin Cui assert(mnum==0); /* no prior solution */
711*01826a49SYabin Cui matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
712*01826a49SYabin Cui matches[0].len = (U32)mlen;
713*01826a49SYabin Cui mnum = 1;
714*01826a49SYabin Cui if ( (mlen > sufficient_len) |
715*01826a49SYabin Cui (ip+mlen == iLimit) ) { /* best possible length */
716*01826a49SYabin Cui ms->nextToUpdate = curr+1; /* skip insertion */
717*01826a49SYabin Cui return 1;
718*01826a49SYabin Cui } } }
719*01826a49SYabin Cui /* no dictMatchState lookup: dicts don't have a populated HC3 table */
720*01826a49SYabin Cui } /* if (mls == 3) */
721*01826a49SYabin Cui
722*01826a49SYabin Cui hashTable[h] = curr; /* Update Hash Table */
723*01826a49SYabin Cui
724*01826a49SYabin Cui for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
725*01826a49SYabin Cui U32* const nextPtr = bt + 2*(matchIndex & btMask);
726*01826a49SYabin Cui const BYTE* match;
727*01826a49SYabin Cui size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
728*01826a49SYabin Cui assert(curr > matchIndex);
729*01826a49SYabin Cui
730*01826a49SYabin Cui if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
731*01826a49SYabin Cui assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
732*01826a49SYabin Cui match = base + matchIndex;
733*01826a49SYabin Cui if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
734*01826a49SYabin Cui matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
735*01826a49SYabin Cui } else {
736*01826a49SYabin Cui match = dictBase + matchIndex;
737*01826a49SYabin Cui assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
738*01826a49SYabin Cui matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
739*01826a49SYabin Cui if (matchIndex+matchLength >= dictLimit)
740*01826a49SYabin Cui match = base + matchIndex; /* prepare for match[matchLength] read */
741*01826a49SYabin Cui }
742*01826a49SYabin Cui
743*01826a49SYabin Cui if (matchLength > bestLength) {
744*01826a49SYabin Cui DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
745*01826a49SYabin Cui (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
746*01826a49SYabin Cui assert(matchEndIdx > matchIndex);
747*01826a49SYabin Cui if (matchLength > matchEndIdx - matchIndex)
748*01826a49SYabin Cui matchEndIdx = matchIndex + (U32)matchLength;
749*01826a49SYabin Cui bestLength = matchLength;
750*01826a49SYabin Cui matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
751*01826a49SYabin Cui matches[mnum].len = (U32)matchLength;
752*01826a49SYabin Cui mnum++;
753*01826a49SYabin Cui if ( (matchLength > ZSTD_OPT_NUM)
754*01826a49SYabin Cui | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
755*01826a49SYabin Cui if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
756*01826a49SYabin Cui break; /* drop, to preserve bt consistency (miss a little bit of compression) */
757*01826a49SYabin Cui } }
758*01826a49SYabin Cui
759*01826a49SYabin Cui if (match[matchLength] < ip[matchLength]) {
760*01826a49SYabin Cui /* match smaller than current */
761*01826a49SYabin Cui *smallerPtr = matchIndex; /* update smaller idx */
762*01826a49SYabin Cui commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
763*01826a49SYabin Cui if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
764*01826a49SYabin Cui smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
765*01826a49SYabin Cui matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
766*01826a49SYabin Cui } else {
767*01826a49SYabin Cui *largerPtr = matchIndex;
768*01826a49SYabin Cui commonLengthLarger = matchLength;
769*01826a49SYabin Cui if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
770*01826a49SYabin Cui largerPtr = nextPtr;
771*01826a49SYabin Cui matchIndex = nextPtr[0];
772*01826a49SYabin Cui } }
773*01826a49SYabin Cui
774*01826a49SYabin Cui *smallerPtr = *largerPtr = 0;
775*01826a49SYabin Cui
776*01826a49SYabin Cui assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
777*01826a49SYabin Cui if (dictMode == ZSTD_dictMatchState && nbCompares) {
778*01826a49SYabin Cui size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
779*01826a49SYabin Cui U32 dictMatchIndex = dms->hashTable[dmsH];
780*01826a49SYabin Cui const U32* const dmsBt = dms->chainTable;
781*01826a49SYabin Cui commonLengthSmaller = commonLengthLarger = 0;
782*01826a49SYabin Cui for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
783*01826a49SYabin Cui const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
784*01826a49SYabin Cui size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
785*01826a49SYabin Cui const BYTE* match = dmsBase + dictMatchIndex;
786*01826a49SYabin Cui matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
787*01826a49SYabin Cui if (dictMatchIndex+matchLength >= dmsHighLimit)
788*01826a49SYabin Cui match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
789*01826a49SYabin Cui
790*01826a49SYabin Cui if (matchLength > bestLength) {
791*01826a49SYabin Cui matchIndex = dictMatchIndex + dmsIndexDelta;
792*01826a49SYabin Cui DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
793*01826a49SYabin Cui (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
794*01826a49SYabin Cui if (matchLength > matchEndIdx - matchIndex)
795*01826a49SYabin Cui matchEndIdx = matchIndex + (U32)matchLength;
796*01826a49SYabin Cui bestLength = matchLength;
797*01826a49SYabin Cui matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
798*01826a49SYabin Cui matches[mnum].len = (U32)matchLength;
799*01826a49SYabin Cui mnum++;
800*01826a49SYabin Cui if ( (matchLength > ZSTD_OPT_NUM)
801*01826a49SYabin Cui | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
802*01826a49SYabin Cui break; /* drop, to guarantee consistency (miss a little bit of compression) */
803*01826a49SYabin Cui } }
804*01826a49SYabin Cui
805*01826a49SYabin Cui if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
806*01826a49SYabin Cui if (match[matchLength] < ip[matchLength]) {
807*01826a49SYabin Cui commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
808*01826a49SYabin Cui dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
809*01826a49SYabin Cui } else {
810*01826a49SYabin Cui /* match is larger than current */
811*01826a49SYabin Cui commonLengthLarger = matchLength;
812*01826a49SYabin Cui dictMatchIndex = nextPtr[0];
813*01826a49SYabin Cui } } } /* if (dictMode == ZSTD_dictMatchState) */
814*01826a49SYabin Cui
815*01826a49SYabin Cui assert(matchEndIdx > curr+8);
816*01826a49SYabin Cui ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
817*01826a49SYabin Cui return mnum;
818*01826a49SYabin Cui }
819*01826a49SYabin Cui
820*01826a49SYabin Cui typedef U32 (*ZSTD_getAllMatchesFn)(
821*01826a49SYabin Cui ZSTD_match_t*,
822*01826a49SYabin Cui ZSTD_matchState_t*,
823*01826a49SYabin Cui U32*,
824*01826a49SYabin Cui const BYTE*,
825*01826a49SYabin Cui const BYTE*,
826*01826a49SYabin Cui const U32 rep[ZSTD_REP_NUM],
827*01826a49SYabin Cui U32 const ll0,
828*01826a49SYabin Cui U32 const lengthToBeat);
829*01826a49SYabin Cui
830*01826a49SYabin Cui FORCE_INLINE_TEMPLATE
831*01826a49SYabin Cui ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_btGetAllMatches_internal(ZSTD_match_t * matches,ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * ip,const BYTE * const iHighLimit,const U32 rep[ZSTD_REP_NUM],U32 const ll0,U32 const lengthToBeat,const ZSTD_dictMode_e dictMode,const U32 mls)832*01826a49SYabin Cui U32 ZSTD_btGetAllMatches_internal(
833*01826a49SYabin Cui ZSTD_match_t* matches,
834*01826a49SYabin Cui ZSTD_matchState_t* ms,
835*01826a49SYabin Cui U32* nextToUpdate3,
836*01826a49SYabin Cui const BYTE* ip,
837*01826a49SYabin Cui const BYTE* const iHighLimit,
838*01826a49SYabin Cui const U32 rep[ZSTD_REP_NUM],
839*01826a49SYabin Cui U32 const ll0,
840*01826a49SYabin Cui U32 const lengthToBeat,
841*01826a49SYabin Cui const ZSTD_dictMode_e dictMode,
842*01826a49SYabin Cui const U32 mls)
843*01826a49SYabin Cui {
844*01826a49SYabin Cui assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
845*01826a49SYabin Cui DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
846*01826a49SYabin Cui if (ip < ms->window.base + ms->nextToUpdate)
847*01826a49SYabin Cui return 0; /* skipped area */
848*01826a49SYabin Cui ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
849*01826a49SYabin Cui return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
850*01826a49SYabin Cui }
851*01826a49SYabin Cui
852*01826a49SYabin Cui #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
853*01826a49SYabin Cui
854*01826a49SYabin Cui #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
855*01826a49SYabin Cui static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
856*01826a49SYabin Cui ZSTD_match_t* matches, \
857*01826a49SYabin Cui ZSTD_matchState_t* ms, \
858*01826a49SYabin Cui U32* nextToUpdate3, \
859*01826a49SYabin Cui const BYTE* ip, \
860*01826a49SYabin Cui const BYTE* const iHighLimit, \
861*01826a49SYabin Cui const U32 rep[ZSTD_REP_NUM], \
862*01826a49SYabin Cui U32 const ll0, \
863*01826a49SYabin Cui U32 const lengthToBeat) \
864*01826a49SYabin Cui { \
865*01826a49SYabin Cui return ZSTD_btGetAllMatches_internal( \
866*01826a49SYabin Cui matches, ms, nextToUpdate3, ip, iHighLimit, \
867*01826a49SYabin Cui rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868*01826a49SYabin Cui }
869*01826a49SYabin Cui
870*01826a49SYabin Cui #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
871*01826a49SYabin Cui GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
872*01826a49SYabin Cui GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
873*01826a49SYabin Cui GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
874*01826a49SYabin Cui GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
875*01826a49SYabin Cui
876*01826a49SYabin Cui GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)877*01826a49SYabin Cui GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
878*01826a49SYabin Cui GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
879*01826a49SYabin Cui
880*01826a49SYabin Cui #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
881*01826a49SYabin Cui { \
882*01826a49SYabin Cui ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
883*01826a49SYabin Cui ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
884*01826a49SYabin Cui ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
885*01826a49SYabin Cui ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
886*01826a49SYabin Cui }
887*01826a49SYabin Cui
888*01826a49SYabin Cui static ZSTD_getAllMatchesFn
889*01826a49SYabin Cui ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
890*01826a49SYabin Cui {
891*01826a49SYabin Cui ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
892*01826a49SYabin Cui ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
893*01826a49SYabin Cui ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
894*01826a49SYabin Cui ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
895*01826a49SYabin Cui };
896*01826a49SYabin Cui U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
897*01826a49SYabin Cui assert((U32)dictMode < 3);
898*01826a49SYabin Cui assert(mls - 3 < 4);
899*01826a49SYabin Cui return getAllMatchesFns[(int)dictMode][mls - 3];
900*01826a49SYabin Cui }
901*01826a49SYabin Cui
902*01826a49SYabin Cui /*************************
903*01826a49SYabin Cui * LDM helper functions *
904*01826a49SYabin Cui *************************/
905*01826a49SYabin Cui
906*01826a49SYabin Cui /* Struct containing info needed to make decision about ldm inclusion */
907*01826a49SYabin Cui typedef struct {
908*01826a49SYabin Cui rawSeqStore_t seqStore; /* External match candidates store for this block */
909*01826a49SYabin Cui U32 startPosInBlock; /* Start position of the current match candidate */
910*01826a49SYabin Cui U32 endPosInBlock; /* End position of the current match candidate */
911*01826a49SYabin Cui U32 offset; /* Offset of the match candidate */
912*01826a49SYabin Cui } ZSTD_optLdm_t;
913*01826a49SYabin Cui
914*01826a49SYabin Cui /* ZSTD_optLdm_skipRawSeqStoreBytes():
915*01826a49SYabin Cui * Moves forward in @rawSeqStore by @nbBytes,
916*01826a49SYabin Cui * which will update the fields 'pos' and 'posInSequence'.
917*01826a49SYabin Cui */
ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t * rawSeqStore,size_t nbBytes)918*01826a49SYabin Cui static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
919*01826a49SYabin Cui {
920*01826a49SYabin Cui U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
921*01826a49SYabin Cui while (currPos && rawSeqStore->pos < rawSeqStore->size) {
922*01826a49SYabin Cui rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
923*01826a49SYabin Cui if (currPos >= currSeq.litLength + currSeq.matchLength) {
924*01826a49SYabin Cui currPos -= currSeq.litLength + currSeq.matchLength;
925*01826a49SYabin Cui rawSeqStore->pos++;
926*01826a49SYabin Cui } else {
927*01826a49SYabin Cui rawSeqStore->posInSequence = currPos;
928*01826a49SYabin Cui break;
929*01826a49SYabin Cui }
930*01826a49SYabin Cui }
931*01826a49SYabin Cui if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
932*01826a49SYabin Cui rawSeqStore->posInSequence = 0;
933*01826a49SYabin Cui }
934*01826a49SYabin Cui }
935*01826a49SYabin Cui
936*01826a49SYabin Cui /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
937*01826a49SYabin Cui * Calculates the beginning and end of the next match in the current block.
938*01826a49SYabin Cui * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
939*01826a49SYabin Cui */
940*01826a49SYabin Cui static void
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t * optLdm,U32 currPosInBlock,U32 blockBytesRemaining)941*01826a49SYabin Cui ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
942*01826a49SYabin Cui U32 blockBytesRemaining)
943*01826a49SYabin Cui {
944*01826a49SYabin Cui rawSeq currSeq;
945*01826a49SYabin Cui U32 currBlockEndPos;
946*01826a49SYabin Cui U32 literalsBytesRemaining;
947*01826a49SYabin Cui U32 matchBytesRemaining;
948*01826a49SYabin Cui
949*01826a49SYabin Cui /* Setting match end position to MAX to ensure we never use an LDM during this block */
950*01826a49SYabin Cui if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
951*01826a49SYabin Cui optLdm->startPosInBlock = UINT_MAX;
952*01826a49SYabin Cui optLdm->endPosInBlock = UINT_MAX;
953*01826a49SYabin Cui return;
954*01826a49SYabin Cui }
955*01826a49SYabin Cui /* Calculate appropriate bytes left in matchLength and litLength
956*01826a49SYabin Cui * after adjusting based on ldmSeqStore->posInSequence */
957*01826a49SYabin Cui currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
958*01826a49SYabin Cui assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
959*01826a49SYabin Cui currBlockEndPos = currPosInBlock + blockBytesRemaining;
960*01826a49SYabin Cui literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
961*01826a49SYabin Cui currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
962*01826a49SYabin Cui 0;
963*01826a49SYabin Cui matchBytesRemaining = (literalsBytesRemaining == 0) ?
964*01826a49SYabin Cui currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
965*01826a49SYabin Cui currSeq.matchLength;
966*01826a49SYabin Cui
967*01826a49SYabin Cui /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
968*01826a49SYabin Cui if (literalsBytesRemaining >= blockBytesRemaining) {
969*01826a49SYabin Cui optLdm->startPosInBlock = UINT_MAX;
970*01826a49SYabin Cui optLdm->endPosInBlock = UINT_MAX;
971*01826a49SYabin Cui ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
972*01826a49SYabin Cui return;
973*01826a49SYabin Cui }
974*01826a49SYabin Cui
975*01826a49SYabin Cui /* Matches may be < MINMATCH by this process. In that case, we will reject them
976*01826a49SYabin Cui when we are deciding whether or not to add the ldm */
977*01826a49SYabin Cui optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
978*01826a49SYabin Cui optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
979*01826a49SYabin Cui optLdm->offset = currSeq.offset;
980*01826a49SYabin Cui
981*01826a49SYabin Cui if (optLdm->endPosInBlock > currBlockEndPos) {
982*01826a49SYabin Cui /* Match ends after the block ends, we can't use the whole match */
983*01826a49SYabin Cui optLdm->endPosInBlock = currBlockEndPos;
984*01826a49SYabin Cui ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
985*01826a49SYabin Cui } else {
986*01826a49SYabin Cui /* Consume nb of bytes equal to size of sequence left */
987*01826a49SYabin Cui ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
988*01826a49SYabin Cui }
989*01826a49SYabin Cui }
990*01826a49SYabin Cui
991*01826a49SYabin Cui /* ZSTD_optLdm_maybeAddMatch():
992*01826a49SYabin Cui * Adds a match if it's long enough,
993*01826a49SYabin Cui * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
994*01826a49SYabin Cui * into 'matches'. Maintains the correct ordering of 'matches'.
995*01826a49SYabin Cui */
ZSTD_optLdm_maybeAddMatch(ZSTD_match_t * matches,U32 * nbMatches,const ZSTD_optLdm_t * optLdm,U32 currPosInBlock)996*01826a49SYabin Cui static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
997*01826a49SYabin Cui const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
998*01826a49SYabin Cui {
999*01826a49SYabin Cui U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
1000*01826a49SYabin Cui /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
1001*01826a49SYabin Cui U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
1002*01826a49SYabin Cui
1003*01826a49SYabin Cui /* Ensure that current block position is not outside of the match */
1004*01826a49SYabin Cui if (currPosInBlock < optLdm->startPosInBlock
1005*01826a49SYabin Cui || currPosInBlock >= optLdm->endPosInBlock
1006*01826a49SYabin Cui || candidateMatchLength < MINMATCH) {
1007*01826a49SYabin Cui return;
1008*01826a49SYabin Cui }
1009*01826a49SYabin Cui
1010*01826a49SYabin Cui if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
1011*01826a49SYabin Cui U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
1012*01826a49SYabin Cui DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1013*01826a49SYabin Cui candidateOffBase, candidateMatchLength, currPosInBlock);
1014*01826a49SYabin Cui matches[*nbMatches].len = candidateMatchLength;
1015*01826a49SYabin Cui matches[*nbMatches].off = candidateOffBase;
1016*01826a49SYabin Cui (*nbMatches)++;
1017*01826a49SYabin Cui }
1018*01826a49SYabin Cui }
1019*01826a49SYabin Cui
1020*01826a49SYabin Cui /* ZSTD_optLdm_processMatchCandidate():
1021*01826a49SYabin Cui * Wrapper function to update ldm seq store and call ldm functions as necessary.
1022*01826a49SYabin Cui */
1023*01826a49SYabin Cui static void
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t * optLdm,ZSTD_match_t * matches,U32 * nbMatches,U32 currPosInBlock,U32 remainingBytes)1024*01826a49SYabin Cui ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1025*01826a49SYabin Cui ZSTD_match_t* matches, U32* nbMatches,
1026*01826a49SYabin Cui U32 currPosInBlock, U32 remainingBytes)
1027*01826a49SYabin Cui {
1028*01826a49SYabin Cui if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1029*01826a49SYabin Cui return;
1030*01826a49SYabin Cui }
1031*01826a49SYabin Cui
1032*01826a49SYabin Cui if (currPosInBlock >= optLdm->endPosInBlock) {
1033*01826a49SYabin Cui if (currPosInBlock > optLdm->endPosInBlock) {
1034*01826a49SYabin Cui /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1035*01826a49SYabin Cui * at the end of a match from the ldm seq store, and will often be some bytes
1036*01826a49SYabin Cui * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1037*01826a49SYabin Cui */
1038*01826a49SYabin Cui U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1039*01826a49SYabin Cui ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1040*01826a49SYabin Cui }
1041*01826a49SYabin Cui ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1042*01826a49SYabin Cui }
1043*01826a49SYabin Cui ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1044*01826a49SYabin Cui }
1045*01826a49SYabin Cui
1046*01826a49SYabin Cui
1047*01826a49SYabin Cui /*-*******************************
1048*01826a49SYabin Cui * Optimal parser
1049*01826a49SYabin Cui *********************************/
1050*01826a49SYabin Cui
1051*01826a49SYabin Cui #if 0 /* debug */
1052*01826a49SYabin Cui
1053*01826a49SYabin Cui static void
1054*01826a49SYabin Cui listStats(const U32* table, int lastEltID)
1055*01826a49SYabin Cui {
1056*01826a49SYabin Cui int const nbElts = lastEltID + 1;
1057*01826a49SYabin Cui int enb;
1058*01826a49SYabin Cui for (enb=0; enb < nbElts; enb++) {
1059*01826a49SYabin Cui (void)table;
1060*01826a49SYabin Cui /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
1061*01826a49SYabin Cui RAWLOG(2, "%4i,", table[enb]);
1062*01826a49SYabin Cui }
1063*01826a49SYabin Cui RAWLOG(2, " \n");
1064*01826a49SYabin Cui }
1065*01826a49SYabin Cui
1066*01826a49SYabin Cui #endif
1067*01826a49SYabin Cui
1068*01826a49SYabin Cui #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1069*01826a49SYabin Cui #define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1070*01826a49SYabin Cui #define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1071*01826a49SYabin Cui
1072*01826a49SYabin Cui FORCE_INLINE_TEMPLATE
1073*01826a49SYabin Cui ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1074*01826a49SYabin Cui size_t
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const int optLevel,const ZSTD_dictMode_e dictMode)1075*01826a49SYabin Cui ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1076*01826a49SYabin Cui seqStore_t* seqStore,
1077*01826a49SYabin Cui U32 rep[ZSTD_REP_NUM],
1078*01826a49SYabin Cui const void* src, size_t srcSize,
1079*01826a49SYabin Cui const int optLevel,
1080*01826a49SYabin Cui const ZSTD_dictMode_e dictMode)
1081*01826a49SYabin Cui {
1082*01826a49SYabin Cui optState_t* const optStatePtr = &ms->opt;
1083*01826a49SYabin Cui const BYTE* const istart = (const BYTE*)src;
1084*01826a49SYabin Cui const BYTE* ip = istart;
1085*01826a49SYabin Cui const BYTE* anchor = istart;
1086*01826a49SYabin Cui const BYTE* const iend = istart + srcSize;
1087*01826a49SYabin Cui const BYTE* const ilimit = iend - 8;
1088*01826a49SYabin Cui const BYTE* const base = ms->window.base;
1089*01826a49SYabin Cui const BYTE* const prefixStart = base + ms->window.dictLimit;
1090*01826a49SYabin Cui const ZSTD_compressionParameters* const cParams = &ms->cParams;
1091*01826a49SYabin Cui
1092*01826a49SYabin Cui ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1093*01826a49SYabin Cui
1094*01826a49SYabin Cui U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1095*01826a49SYabin Cui U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1096*01826a49SYabin Cui U32 nextToUpdate3 = ms->nextToUpdate;
1097*01826a49SYabin Cui
1098*01826a49SYabin Cui ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1099*01826a49SYabin Cui ZSTD_match_t* const matches = optStatePtr->matchTable;
1100*01826a49SYabin Cui ZSTD_optimal_t lastStretch;
1101*01826a49SYabin Cui ZSTD_optLdm_t optLdm;
1102*01826a49SYabin Cui
1103*01826a49SYabin Cui ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
1104*01826a49SYabin Cui
1105*01826a49SYabin Cui optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1106*01826a49SYabin Cui optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1107*01826a49SYabin Cui ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1108*01826a49SYabin Cui
1109*01826a49SYabin Cui /* init */
1110*01826a49SYabin Cui DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1111*01826a49SYabin Cui (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1112*01826a49SYabin Cui assert(optLevel <= 2);
1113*01826a49SYabin Cui ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1114*01826a49SYabin Cui ip += (ip==prefixStart);
1115*01826a49SYabin Cui
1116*01826a49SYabin Cui /* Match Loop */
1117*01826a49SYabin Cui while (ip < ilimit) {
1118*01826a49SYabin Cui U32 cur, last_pos = 0;
1119*01826a49SYabin Cui
1120*01826a49SYabin Cui /* find first match */
1121*01826a49SYabin Cui { U32 const litlen = (U32)(ip - anchor);
1122*01826a49SYabin Cui U32 const ll0 = !litlen;
1123*01826a49SYabin Cui U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1124*01826a49SYabin Cui ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1125*01826a49SYabin Cui (U32)(ip-istart), (U32)(iend-ip));
1126*01826a49SYabin Cui if (!nbMatches) {
1127*01826a49SYabin Cui DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1128*01826a49SYabin Cui ip++;
1129*01826a49SYabin Cui continue;
1130*01826a49SYabin Cui }
1131*01826a49SYabin Cui
1132*01826a49SYabin Cui /* Match found: let's store this solution, and eventually find more candidates.
1133*01826a49SYabin Cui * During this forward pass, @opt is used to store stretches,
1134*01826a49SYabin Cui * defined as "a match followed by N literals".
1135*01826a49SYabin Cui * Note how this is different from a Sequence, which is "N literals followed by a match".
1136*01826a49SYabin Cui * Storing stretches allows us to store different match predecessors
1137*01826a49SYabin Cui * for each literal position part of a literals run. */
1138*01826a49SYabin Cui
1139*01826a49SYabin Cui /* initialize opt[0] */
1140*01826a49SYabin Cui opt[0].mlen = 0; /* there are only literals so far */
1141*01826a49SYabin Cui opt[0].litlen = litlen;
1142*01826a49SYabin Cui /* No need to include the actual price of the literals before the first match
1143*01826a49SYabin Cui * because it is static for the duration of the forward pass, and is included
1144*01826a49SYabin Cui * in every subsequent price. But, we include the literal length because
1145*01826a49SYabin Cui * the cost variation of litlen depends on the value of litlen.
1146*01826a49SYabin Cui */
1147*01826a49SYabin Cui opt[0].price = LL_PRICE(litlen);
1148*01826a49SYabin Cui ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1149*01826a49SYabin Cui ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
1150*01826a49SYabin Cui
1151*01826a49SYabin Cui /* large match -> immediate encoding */
1152*01826a49SYabin Cui { U32 const maxML = matches[nbMatches-1].len;
1153*01826a49SYabin Cui U32 const maxOffBase = matches[nbMatches-1].off;
1154*01826a49SYabin Cui DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1155*01826a49SYabin Cui nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1156*01826a49SYabin Cui
1157*01826a49SYabin Cui if (maxML > sufficient_len) {
1158*01826a49SYabin Cui lastStretch.litlen = 0;
1159*01826a49SYabin Cui lastStretch.mlen = maxML;
1160*01826a49SYabin Cui lastStretch.off = maxOffBase;
1161*01826a49SYabin Cui DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
1162*01826a49SYabin Cui maxML, sufficient_len);
1163*01826a49SYabin Cui cur = 0;
1164*01826a49SYabin Cui last_pos = maxML;
1165*01826a49SYabin Cui goto _shortestPath;
1166*01826a49SYabin Cui } }
1167*01826a49SYabin Cui
1168*01826a49SYabin Cui /* set prices for first matches starting position == 0 */
1169*01826a49SYabin Cui assert(opt[0].price >= 0);
1170*01826a49SYabin Cui { U32 pos;
1171*01826a49SYabin Cui U32 matchNb;
1172*01826a49SYabin Cui for (pos = 1; pos < minMatch; pos++) {
1173*01826a49SYabin Cui opt[pos].price = ZSTD_MAX_PRICE;
1174*01826a49SYabin Cui opt[pos].mlen = 0;
1175*01826a49SYabin Cui opt[pos].litlen = litlen + pos;
1176*01826a49SYabin Cui }
1177*01826a49SYabin Cui for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1178*01826a49SYabin Cui U32 const offBase = matches[matchNb].off;
1179*01826a49SYabin Cui U32 const end = matches[matchNb].len;
1180*01826a49SYabin Cui for ( ; pos <= end ; pos++ ) {
1181*01826a49SYabin Cui int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1182*01826a49SYabin Cui int const sequencePrice = opt[0].price + matchPrice;
1183*01826a49SYabin Cui DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1184*01826a49SYabin Cui pos, ZSTD_fCost(sequencePrice));
1185*01826a49SYabin Cui opt[pos].mlen = pos;
1186*01826a49SYabin Cui opt[pos].off = offBase;
1187*01826a49SYabin Cui opt[pos].litlen = 0; /* end of match */
1188*01826a49SYabin Cui opt[pos].price = sequencePrice + LL_PRICE(0);
1189*01826a49SYabin Cui }
1190*01826a49SYabin Cui }
1191*01826a49SYabin Cui last_pos = pos-1;
1192*01826a49SYabin Cui opt[pos].price = ZSTD_MAX_PRICE;
1193*01826a49SYabin Cui }
1194*01826a49SYabin Cui }
1195*01826a49SYabin Cui
1196*01826a49SYabin Cui /* check further positions */
1197*01826a49SYabin Cui for (cur = 1; cur <= last_pos; cur++) {
1198*01826a49SYabin Cui const BYTE* const inr = ip + cur;
1199*01826a49SYabin Cui assert(cur <= ZSTD_OPT_NUM);
1200*01826a49SYabin Cui DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
1201*01826a49SYabin Cui
1202*01826a49SYabin Cui /* Fix current position with one literal if cheaper */
1203*01826a49SYabin Cui { U32 const litlen = opt[cur-1].litlen + 1;
1204*01826a49SYabin Cui int const price = opt[cur-1].price
1205*01826a49SYabin Cui + LIT_PRICE(ip+cur-1)
1206*01826a49SYabin Cui + LL_INCPRICE(litlen);
1207*01826a49SYabin Cui assert(price < 1000000000); /* overflow check */
1208*01826a49SYabin Cui if (price <= opt[cur].price) {
1209*01826a49SYabin Cui ZSTD_optimal_t const prevMatch = opt[cur];
1210*01826a49SYabin Cui DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1211*01826a49SYabin Cui inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1212*01826a49SYabin Cui opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1213*01826a49SYabin Cui opt[cur] = opt[cur-1];
1214*01826a49SYabin Cui opt[cur].litlen = litlen;
1215*01826a49SYabin Cui opt[cur].price = price;
1216*01826a49SYabin Cui if ( (optLevel >= 1) /* additional check only for higher modes */
1217*01826a49SYabin Cui && (prevMatch.litlen == 0) /* replace a match */
1218*01826a49SYabin Cui && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1219*01826a49SYabin Cui && LIKELY(ip + cur < iend)
1220*01826a49SYabin Cui ) {
1221*01826a49SYabin Cui /* check next position, in case it would be cheaper */
1222*01826a49SYabin Cui int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1223*01826a49SYabin Cui int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1224*01826a49SYabin Cui DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1225*01826a49SYabin Cui cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1226*01826a49SYabin Cui if ( (with1literal < withMoreLiterals)
1227*01826a49SYabin Cui && (with1literal < opt[cur+1].price) ) {
1228*01826a49SYabin Cui /* update offset history - before it disappears */
1229*01826a49SYabin Cui U32 const prev = cur - prevMatch.mlen;
1230*01826a49SYabin Cui repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1231*01826a49SYabin Cui assert(cur >= prevMatch.mlen);
1232*01826a49SYabin Cui DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1233*01826a49SYabin Cui ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1234*01826a49SYabin Cui newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1235*01826a49SYabin Cui opt[cur+1] = prevMatch; /* mlen & offbase */
1236*01826a49SYabin Cui ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
1237*01826a49SYabin Cui opt[cur+1].litlen = 1;
1238*01826a49SYabin Cui opt[cur+1].price = with1literal;
1239*01826a49SYabin Cui if (last_pos < cur+1) last_pos = cur+1;
1240*01826a49SYabin Cui }
1241*01826a49SYabin Cui }
1242*01826a49SYabin Cui } else {
1243*01826a49SYabin Cui DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
1244*01826a49SYabin Cui inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
1245*01826a49SYabin Cui }
1246*01826a49SYabin Cui }
1247*01826a49SYabin Cui
1248*01826a49SYabin Cui /* Offset history is not updated during match comparison.
1249*01826a49SYabin Cui * Do it here, now that the match is selected and confirmed.
1250*01826a49SYabin Cui */
1251*01826a49SYabin Cui ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1252*01826a49SYabin Cui assert(cur >= opt[cur].mlen);
1253*01826a49SYabin Cui if (opt[cur].litlen == 0) {
1254*01826a49SYabin Cui /* just finished a match => alter offset history */
1255*01826a49SYabin Cui U32 const prev = cur - opt[cur].mlen;
1256*01826a49SYabin Cui repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
1257*01826a49SYabin Cui ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1258*01826a49SYabin Cui }
1259*01826a49SYabin Cui
1260*01826a49SYabin Cui /* last match must start at a minimum distance of 8 from oend */
1261*01826a49SYabin Cui if (inr > ilimit) continue;
1262*01826a49SYabin Cui
1263*01826a49SYabin Cui if (cur == last_pos) break;
1264*01826a49SYabin Cui
1265*01826a49SYabin Cui if ( (optLevel==0) /*static_test*/
1266*01826a49SYabin Cui && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1267*01826a49SYabin Cui DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
1268*01826a49SYabin Cui continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1269*01826a49SYabin Cui }
1270*01826a49SYabin Cui
1271*01826a49SYabin Cui assert(opt[cur].price >= 0);
1272*01826a49SYabin Cui { U32 const ll0 = (opt[cur].litlen == 0);
1273*01826a49SYabin Cui int const previousPrice = opt[cur].price;
1274*01826a49SYabin Cui int const basePrice = previousPrice + LL_PRICE(0);
1275*01826a49SYabin Cui U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1276*01826a49SYabin Cui U32 matchNb;
1277*01826a49SYabin Cui
1278*01826a49SYabin Cui ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1279*01826a49SYabin Cui (U32)(inr-istart), (U32)(iend-inr));
1280*01826a49SYabin Cui
1281*01826a49SYabin Cui if (!nbMatches) {
1282*01826a49SYabin Cui DEBUGLOG(7, "rPos:%u : no match found", cur);
1283*01826a49SYabin Cui continue;
1284*01826a49SYabin Cui }
1285*01826a49SYabin Cui
1286*01826a49SYabin Cui { U32 const longestML = matches[nbMatches-1].len;
1287*01826a49SYabin Cui DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
1288*01826a49SYabin Cui inr-istart, cur, nbMatches, longestML);
1289*01826a49SYabin Cui
1290*01826a49SYabin Cui if ( (longestML > sufficient_len)
1291*01826a49SYabin Cui || (cur + longestML >= ZSTD_OPT_NUM)
1292*01826a49SYabin Cui || (ip + cur + longestML >= iend) ) {
1293*01826a49SYabin Cui lastStretch.mlen = longestML;
1294*01826a49SYabin Cui lastStretch.off = matches[nbMatches-1].off;
1295*01826a49SYabin Cui lastStretch.litlen = 0;
1296*01826a49SYabin Cui last_pos = cur + longestML;
1297*01826a49SYabin Cui goto _shortestPath;
1298*01826a49SYabin Cui } }
1299*01826a49SYabin Cui
1300*01826a49SYabin Cui /* set prices using matches found at position == cur */
1301*01826a49SYabin Cui for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1302*01826a49SYabin Cui U32 const offset = matches[matchNb].off;
1303*01826a49SYabin Cui U32 const lastML = matches[matchNb].len;
1304*01826a49SYabin Cui U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1305*01826a49SYabin Cui U32 mlen;
1306*01826a49SYabin Cui
1307*01826a49SYabin Cui DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1308*01826a49SYabin Cui matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
1309*01826a49SYabin Cui
1310*01826a49SYabin Cui for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1311*01826a49SYabin Cui U32 const pos = cur + mlen;
1312*01826a49SYabin Cui int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1313*01826a49SYabin Cui
1314*01826a49SYabin Cui if ((pos > last_pos) || (price < opt[pos].price)) {
1315*01826a49SYabin Cui DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1316*01826a49SYabin Cui pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1317*01826a49SYabin Cui while (last_pos < pos) {
1318*01826a49SYabin Cui /* fill empty positions, for future comparisons */
1319*01826a49SYabin Cui last_pos++;
1320*01826a49SYabin Cui opt[last_pos].price = ZSTD_MAX_PRICE;
1321*01826a49SYabin Cui opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
1322*01826a49SYabin Cui }
1323*01826a49SYabin Cui opt[pos].mlen = mlen;
1324*01826a49SYabin Cui opt[pos].off = offset;
1325*01826a49SYabin Cui opt[pos].litlen = 0;
1326*01826a49SYabin Cui opt[pos].price = price;
1327*01826a49SYabin Cui } else {
1328*01826a49SYabin Cui DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1329*01826a49SYabin Cui pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1330*01826a49SYabin Cui if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1331*01826a49SYabin Cui }
1332*01826a49SYabin Cui } } }
1333*01826a49SYabin Cui opt[last_pos+1].price = ZSTD_MAX_PRICE;
1334*01826a49SYabin Cui } /* for (cur = 1; cur <= last_pos; cur++) */
1335*01826a49SYabin Cui
1336*01826a49SYabin Cui lastStretch = opt[last_pos];
1337*01826a49SYabin Cui assert(cur >= lastStretch.mlen);
1338*01826a49SYabin Cui cur = last_pos - lastStretch.mlen;
1339*01826a49SYabin Cui
1340*01826a49SYabin Cui _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1341*01826a49SYabin Cui assert(opt[0].mlen == 0);
1342*01826a49SYabin Cui assert(last_pos >= lastStretch.mlen);
1343*01826a49SYabin Cui assert(cur == last_pos - lastStretch.mlen);
1344*01826a49SYabin Cui
1345*01826a49SYabin Cui if (lastStretch.mlen==0) {
1346*01826a49SYabin Cui /* no solution : all matches have been converted into literals */
1347*01826a49SYabin Cui assert(lastStretch.litlen == (ip - anchor) + last_pos);
1348*01826a49SYabin Cui ip += last_pos;
1349*01826a49SYabin Cui continue;
1350*01826a49SYabin Cui }
1351*01826a49SYabin Cui assert(lastStretch.off > 0);
1352*01826a49SYabin Cui
1353*01826a49SYabin Cui /* Update offset history */
1354*01826a49SYabin Cui if (lastStretch.litlen == 0) {
1355*01826a49SYabin Cui /* finishing on a match : update offset history */
1356*01826a49SYabin Cui repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1357*01826a49SYabin Cui ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
1358*01826a49SYabin Cui } else {
1359*01826a49SYabin Cui ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
1360*01826a49SYabin Cui assert(cur >= lastStretch.litlen);
1361*01826a49SYabin Cui cur -= lastStretch.litlen;
1362*01826a49SYabin Cui }
1363*01826a49SYabin Cui
1364*01826a49SYabin Cui /* Let's write the shortest path solution.
1365*01826a49SYabin Cui * It is stored in @opt in reverse order,
1366*01826a49SYabin Cui * starting from @storeEnd (==cur+2),
1367*01826a49SYabin Cui * effectively partially @opt overwriting.
1368*01826a49SYabin Cui * Content is changed too:
1369*01826a49SYabin Cui * - So far, @opt stored stretches, aka a match followed by literals
1370*01826a49SYabin Cui * - Now, it will store sequences, aka literals followed by a match
1371*01826a49SYabin Cui */
1372*01826a49SYabin Cui { U32 const storeEnd = cur + 2;
1373*01826a49SYabin Cui U32 storeStart = storeEnd;
1374*01826a49SYabin Cui U32 stretchPos = cur;
1375*01826a49SYabin Cui
1376*01826a49SYabin Cui DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1377*01826a49SYabin Cui last_pos, cur); (void)last_pos;
1378*01826a49SYabin Cui assert(storeEnd < ZSTD_OPT_SIZE);
1379*01826a49SYabin Cui DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1380*01826a49SYabin Cui storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1381*01826a49SYabin Cui if (lastStretch.litlen > 0) {
1382*01826a49SYabin Cui /* last "sequence" is unfinished: just a bunch of literals */
1383*01826a49SYabin Cui opt[storeEnd].litlen = lastStretch.litlen;
1384*01826a49SYabin Cui opt[storeEnd].mlen = 0;
1385*01826a49SYabin Cui storeStart = storeEnd-1;
1386*01826a49SYabin Cui opt[storeStart] = lastStretch;
1387*01826a49SYabin Cui } {
1388*01826a49SYabin Cui opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
1389*01826a49SYabin Cui storeStart = storeEnd;
1390*01826a49SYabin Cui }
1391*01826a49SYabin Cui while (1) {
1392*01826a49SYabin Cui ZSTD_optimal_t nextStretch = opt[stretchPos];
1393*01826a49SYabin Cui opt[storeStart].litlen = nextStretch.litlen;
1394*01826a49SYabin Cui DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1395*01826a49SYabin Cui opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1396*01826a49SYabin Cui if (nextStretch.mlen == 0) {
1397*01826a49SYabin Cui /* reaching beginning of segment */
1398*01826a49SYabin Cui break;
1399*01826a49SYabin Cui }
1400*01826a49SYabin Cui storeStart--;
1401*01826a49SYabin Cui opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1402*01826a49SYabin Cui assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1403*01826a49SYabin Cui stretchPos -= nextStretch.litlen + nextStretch.mlen;
1404*01826a49SYabin Cui }
1405*01826a49SYabin Cui
1406*01826a49SYabin Cui /* save sequences */
1407*01826a49SYabin Cui DEBUGLOG(6, "sending selected sequences into seqStore");
1408*01826a49SYabin Cui { U32 storePos;
1409*01826a49SYabin Cui for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1410*01826a49SYabin Cui U32 const llen = opt[storePos].litlen;
1411*01826a49SYabin Cui U32 const mlen = opt[storePos].mlen;
1412*01826a49SYabin Cui U32 const offBase = opt[storePos].off;
1413*01826a49SYabin Cui U32 const advance = llen + mlen;
1414*01826a49SYabin Cui DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1415*01826a49SYabin Cui anchor - istart, (unsigned)llen, (unsigned)mlen);
1416*01826a49SYabin Cui
1417*01826a49SYabin Cui if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1418*01826a49SYabin Cui assert(storePos == storeEnd); /* must be last sequence */
1419*01826a49SYabin Cui ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
1420*01826a49SYabin Cui continue; /* will finish */
1421*01826a49SYabin Cui }
1422*01826a49SYabin Cui
1423*01826a49SYabin Cui assert(anchor + llen <= iend);
1424*01826a49SYabin Cui ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1425*01826a49SYabin Cui ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1426*01826a49SYabin Cui anchor += advance;
1427*01826a49SYabin Cui ip = anchor;
1428*01826a49SYabin Cui } }
1429*01826a49SYabin Cui DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1430*01826a49SYabin Cui
1431*01826a49SYabin Cui /* update all costs */
1432*01826a49SYabin Cui ZSTD_setBasePrices(optStatePtr, optLevel);
1433*01826a49SYabin Cui }
1434*01826a49SYabin Cui } /* while (ip < ilimit) */
1435*01826a49SYabin Cui
1436*01826a49SYabin Cui /* Return the last literals size */
1437*01826a49SYabin Cui return (size_t)(iend - anchor);
1438*01826a49SYabin Cui }
1439*01826a49SYabin Cui #endif /* build exclusions */
1440*01826a49SYabin Cui
1441*01826a49SYabin Cui #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
ZSTD_compressBlock_opt0(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1442*01826a49SYabin Cui static size_t ZSTD_compressBlock_opt0(
1443*01826a49SYabin Cui ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1444*01826a49SYabin Cui const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1445*01826a49SYabin Cui {
1446*01826a49SYabin Cui return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1447*01826a49SYabin Cui }
1448*01826a49SYabin Cui #endif
1449*01826a49SYabin Cui
1450*01826a49SYabin Cui #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
ZSTD_compressBlock_opt2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1451*01826a49SYabin Cui static size_t ZSTD_compressBlock_opt2(
1452*01826a49SYabin Cui ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1453*01826a49SYabin Cui const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1454*01826a49SYabin Cui {
1455*01826a49SYabin Cui return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1456*01826a49SYabin Cui }
1457*01826a49SYabin Cui #endif
1458*01826a49SYabin Cui
1459*01826a49SYabin Cui #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
ZSTD_compressBlock_btopt(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1460*01826a49SYabin Cui size_t ZSTD_compressBlock_btopt(
1461*01826a49SYabin Cui ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1462*01826a49SYabin Cui const void* src, size_t srcSize)
1463*01826a49SYabin Cui {
1464*01826a49SYabin Cui DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1465*01826a49SYabin Cui return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1466*01826a49SYabin Cui }
1467*01826a49SYabin Cui #endif
1468*01826a49SYabin Cui
1469*01826a49SYabin Cui
1470*01826a49SYabin Cui
1471*01826a49SYabin Cui
1472*01826a49SYabin Cui #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1473*01826a49SYabin Cui /* ZSTD_initStats_ultra():
1474*01826a49SYabin Cui * make a first compression pass, just to seed stats with more accurate starting values.
1475*01826a49SYabin Cui * only works on first block, with no dictionary and no ldm.
1476*01826a49SYabin Cui * this function cannot error out, its narrow contract must be respected.
1477*01826a49SYabin Cui */
1478*01826a49SYabin Cui static
1479*01826a49SYabin Cui ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_initStats_ultra(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1480*01826a49SYabin Cui void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1481*01826a49SYabin Cui seqStore_t* seqStore,
1482*01826a49SYabin Cui U32 rep[ZSTD_REP_NUM],
1483*01826a49SYabin Cui const void* src, size_t srcSize)
1484*01826a49SYabin Cui {
1485*01826a49SYabin Cui U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1486*01826a49SYabin Cui ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1487*01826a49SYabin Cui
1488*01826a49SYabin Cui DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1489*01826a49SYabin Cui assert(ms->opt.litLengthSum == 0); /* first block */
1490*01826a49SYabin Cui assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
1491*01826a49SYabin Cui assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1492*01826a49SYabin Cui assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1493*01826a49SYabin Cui
1494*01826a49SYabin Cui ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1495*01826a49SYabin Cui
1496*01826a49SYabin Cui /* invalidate first scan from history, only keep entropy stats */
1497*01826a49SYabin Cui ZSTD_resetSeqStore(seqStore);
1498*01826a49SYabin Cui ms->window.base -= srcSize;
1499*01826a49SYabin Cui ms->window.dictLimit += (U32)srcSize;
1500*01826a49SYabin Cui ms->window.lowLimit = ms->window.dictLimit;
1501*01826a49SYabin Cui ms->nextToUpdate = ms->window.dictLimit;
1502*01826a49SYabin Cui
1503*01826a49SYabin Cui }
1504*01826a49SYabin Cui
ZSTD_compressBlock_btultra(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1505*01826a49SYabin Cui size_t ZSTD_compressBlock_btultra(
1506*01826a49SYabin Cui ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1507*01826a49SYabin Cui const void* src, size_t srcSize)
1508*01826a49SYabin Cui {
1509*01826a49SYabin Cui DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1510*01826a49SYabin Cui return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1511*01826a49SYabin Cui }
1512*01826a49SYabin Cui
ZSTD_compressBlock_btultra2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1513*01826a49SYabin Cui size_t ZSTD_compressBlock_btultra2(
1514*01826a49SYabin Cui ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1515*01826a49SYabin Cui const void* src, size_t srcSize)
1516*01826a49SYabin Cui {
1517*01826a49SYabin Cui U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1518*01826a49SYabin Cui DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1519*01826a49SYabin Cui
1520*01826a49SYabin Cui /* 2-passes strategy:
1521*01826a49SYabin Cui * this strategy makes a first pass over first block to collect statistics
1522*01826a49SYabin Cui * in order to seed next round's statistics with it.
1523*01826a49SYabin Cui * After 1st pass, function forgets history, and starts a new block.
1524*01826a49SYabin Cui * Consequently, this can only work if no data has been previously loaded in tables,
1525*01826a49SYabin Cui * aka, no dictionary, no prefix, no ldm preprocessing.
1526*01826a49SYabin Cui * The compression ratio gain is generally small (~0.5% on first block),
1527*01826a49SYabin Cui * the cost is 2x cpu time on first block. */
1528*01826a49SYabin Cui assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1529*01826a49SYabin Cui if ( (ms->opt.litLengthSum==0) /* first block */
1530*01826a49SYabin Cui && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1531*01826a49SYabin Cui && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1532*01826a49SYabin Cui && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1533*01826a49SYabin Cui && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1534*01826a49SYabin Cui ) {
1535*01826a49SYabin Cui ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1536*01826a49SYabin Cui }
1537*01826a49SYabin Cui
1538*01826a49SYabin Cui return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1539*01826a49SYabin Cui }
1540*01826a49SYabin Cui #endif
1541*01826a49SYabin Cui
1542*01826a49SYabin Cui #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
ZSTD_compressBlock_btopt_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1543*01826a49SYabin Cui size_t ZSTD_compressBlock_btopt_dictMatchState(
1544*01826a49SYabin Cui ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1545*01826a49SYabin Cui const void* src, size_t srcSize)
1546*01826a49SYabin Cui {
1547*01826a49SYabin Cui return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1548*01826a49SYabin Cui }
1549*01826a49SYabin Cui
ZSTD_compressBlock_btopt_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1550*01826a49SYabin Cui size_t ZSTD_compressBlock_btopt_extDict(
1551*01826a49SYabin Cui ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1552*01826a49SYabin Cui const void* src, size_t srcSize)
1553*01826a49SYabin Cui {
1554*01826a49SYabin Cui return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1555*01826a49SYabin Cui }
1556*01826a49SYabin Cui #endif
1557*01826a49SYabin Cui
1558*01826a49SYabin Cui #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
ZSTD_compressBlock_btultra_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1559*01826a49SYabin Cui size_t ZSTD_compressBlock_btultra_dictMatchState(
1560*01826a49SYabin Cui ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1561*01826a49SYabin Cui const void* src, size_t srcSize)
1562*01826a49SYabin Cui {
1563*01826a49SYabin Cui return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1564*01826a49SYabin Cui }
1565*01826a49SYabin Cui
ZSTD_compressBlock_btultra_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1566*01826a49SYabin Cui size_t ZSTD_compressBlock_btultra_extDict(
1567*01826a49SYabin Cui ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1568*01826a49SYabin Cui const void* src, size_t srcSize)
1569*01826a49SYabin Cui {
1570*01826a49SYabin Cui return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1571*01826a49SYabin Cui }
1572*01826a49SYabin Cui #endif
1573*01826a49SYabin Cui
1574*01826a49SYabin Cui /* note : no btultra2 variant for extDict nor dictMatchState,
1575*01826a49SYabin Cui * because btultra2 is not meant to work with dictionaries
1576*01826a49SYabin Cui * and is only specific for the first block (no prefix) */
1577