1*01826a49SYabin Cui /*
2*01826a49SYabin Cui * Copyright (c) Meta Platforms, Inc. and affiliates.
3*01826a49SYabin Cui * All rights reserved.
4*01826a49SYabin Cui *
5*01826a49SYabin Cui * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui */
10*01826a49SYabin Cui
11*01826a49SYabin Cui
12*01826a49SYabin Cui /*-************************************
13*01826a49SYabin Cui * Dependencies
14*01826a49SYabin Cui **************************************/
15*01826a49SYabin Cui #include "util.h" /* Ensure platform.h is compiled first; also : compiler options, UTIL_GetFileSize */
16*01826a49SYabin Cui #include <stdlib.h> /* malloc */
17*01826a49SYabin Cui #include <stdio.h> /* fprintf, fopen, ftello64 */
18*01826a49SYabin Cui #include <string.h> /* strcmp */
19*01826a49SYabin Cui #include <math.h> /* log */
20*01826a49SYabin Cui #include <assert.h>
21*01826a49SYabin Cui
22*01826a49SYabin Cui #include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_clockSpanMicro, UTIL_clockSpanNano, UTIL_getTime */
23*01826a49SYabin Cui #include "mem.h"
24*01826a49SYabin Cui #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_estimateCCtxSize */
25*01826a49SYabin Cui #include "zstd.h"
26*01826a49SYabin Cui #include "datagen.h"
27*01826a49SYabin Cui #include "xxhash.h"
28*01826a49SYabin Cui #include "benchfn.h"
29*01826a49SYabin Cui #include "benchzstd.h"
30*01826a49SYabin Cui #include "zstd_errors.h"
31*01826a49SYabin Cui #include "zstd_internal.h" /* should not be needed */
32*01826a49SYabin Cui
33*01826a49SYabin Cui
34*01826a49SYabin Cui /*-************************************
35*01826a49SYabin Cui * Constants
36*01826a49SYabin Cui **************************************/
37*01826a49SYabin Cui #define PROGRAM_DESCRIPTION "ZSTD parameters tester"
38*01826a49SYabin Cui #define AUTHOR "Yann Collet"
39*01826a49SYabin Cui #define WELCOME_MESSAGE "*** %s %s %i-bits, by %s ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR
40*01826a49SYabin Cui
41*01826a49SYabin Cui #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
42*01826a49SYabin Cui #define NB_LEVELS_TRACKED 22 /* ensured being >= ZSTD_maxCLevel() in BMK_init_level_constraints() */
43*01826a49SYabin Cui
44*01826a49SYabin Cui static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
45*01826a49SYabin Cui
46*01826a49SYabin Cui #define COMPRESSIBILITY_DEFAULT 0.50
47*01826a49SYabin Cui
48*01826a49SYabin Cui static const U64 g_maxVariationTime = 60 * SEC_TO_MICRO;
49*01826a49SYabin Cui static const int g_maxNbVariations = 64;
50*01826a49SYabin Cui
51*01826a49SYabin Cui
52*01826a49SYabin Cui /*-************************************
53*01826a49SYabin Cui * Macros
54*01826a49SYabin Cui **************************************/
55*01826a49SYabin Cui #define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
56*01826a49SYabin Cui #define DISPLAYLEVEL(n, ...) if(g_displayLevel >= n) { fprintf(stderr, __VA_ARGS__); }
57*01826a49SYabin Cui #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
58*01826a49SYabin Cui
59*01826a49SYabin Cui #define TIMED 0
60*01826a49SYabin Cui #ifndef DEBUG
61*01826a49SYabin Cui # define DEBUG 0
62*01826a49SYabin Cui #endif
63*01826a49SYabin Cui
64*01826a49SYabin Cui #undef MIN
65*01826a49SYabin Cui #undef MAX
66*01826a49SYabin Cui #define MIN(a,b) ( (a) < (b) ? (a) : (b) )
67*01826a49SYabin Cui #define MAX(a,b) ( (a) > (b) ? (a) : (b) )
68*01826a49SYabin Cui #define CUSTOM_LEVEL 99
69*01826a49SYabin Cui #define BASE_CLEVEL 1
70*01826a49SYabin Cui
71*01826a49SYabin Cui #define FADT_MIN 0
72*01826a49SYabin Cui #define FADT_MAX ((U32)-1)
73*01826a49SYabin Cui
74*01826a49SYabin Cui #define WLOG_RANGE (ZSTD_WINDOWLOG_MAX - ZSTD_WINDOWLOG_MIN + 1)
75*01826a49SYabin Cui #define CLOG_RANGE (ZSTD_CHAINLOG_MAX - ZSTD_CHAINLOG_MIN + 1)
76*01826a49SYabin Cui #define HLOG_RANGE (ZSTD_HASHLOG_MAX - ZSTD_HASHLOG_MIN + 1)
77*01826a49SYabin Cui #define SLOG_RANGE (ZSTD_SEARCHLOG_MAX - ZSTD_SEARCHLOG_MIN + 1)
78*01826a49SYabin Cui #define MML_RANGE (ZSTD_MINMATCH_MAX - ZSTD_MINMATCH_MIN + 1)
79*01826a49SYabin Cui #define TLEN_RANGE 17
80*01826a49SYabin Cui #define STRT_RANGE (ZSTD_STRATEGY_MAX - ZSTD_STRATEGY_MIN + 1)
81*01826a49SYabin Cui #define FADT_RANGE 3
82*01826a49SYabin Cui
83*01826a49SYabin Cui #define CHECKTIME(r) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); return r; } }
84*01826a49SYabin Cui #define CHECKTIMEGT(ret, val, _gototag) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); ret = val; goto _gototag; } }
85*01826a49SYabin Cui
86*01826a49SYabin Cui #define PARAM_UNSET ((U32)-2) /* can't be -1 b/c fadt uses -1 */
87*01826a49SYabin Cui
88*01826a49SYabin Cui static const char* g_stratName[ZSTD_STRATEGY_MAX+1] = {
89*01826a49SYabin Cui "(none) ", "ZSTD_fast ", "ZSTD_dfast ",
90*01826a49SYabin Cui "ZSTD_greedy ", "ZSTD_lazy ", "ZSTD_lazy2 ",
91*01826a49SYabin Cui "ZSTD_btlazy2 ", "ZSTD_btopt ", "ZSTD_btultra ",
92*01826a49SYabin Cui "ZSTD_btultra2"};
93*01826a49SYabin Cui
94*01826a49SYabin Cui static const U32 tlen_table[TLEN_RANGE] = { 0, 1, 2, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 256, 512, 999 };
95*01826a49SYabin Cui
96*01826a49SYabin Cui
97*01826a49SYabin Cui /*-************************************
98*01826a49SYabin Cui * Setup for Adding new params
99*01826a49SYabin Cui **************************************/
100*01826a49SYabin Cui
101*01826a49SYabin Cui /* indices for each of the variables */
102*01826a49SYabin Cui typedef enum {
103*01826a49SYabin Cui wlog_ind = 0,
104*01826a49SYabin Cui clog_ind = 1,
105*01826a49SYabin Cui hlog_ind = 2,
106*01826a49SYabin Cui slog_ind = 3,
107*01826a49SYabin Cui mml_ind = 4,
108*01826a49SYabin Cui tlen_ind = 5,
109*01826a49SYabin Cui strt_ind = 6,
110*01826a49SYabin Cui fadt_ind = 7, /* forceAttachDict */
111*01826a49SYabin Cui NUM_PARAMS = 8
112*01826a49SYabin Cui } varInds_t;
113*01826a49SYabin Cui
114*01826a49SYabin Cui typedef struct {
115*01826a49SYabin Cui U32 vals[NUM_PARAMS];
116*01826a49SYabin Cui } paramValues_t;
117*01826a49SYabin Cui
118*01826a49SYabin Cui /* minimum value of parameters */
119*01826a49SYabin Cui static const U32 mintable[NUM_PARAMS] =
120*01826a49SYabin Cui { ZSTD_WINDOWLOG_MIN, ZSTD_CHAINLOG_MIN, ZSTD_HASHLOG_MIN, ZSTD_SEARCHLOG_MIN, ZSTD_MINMATCH_MIN, ZSTD_TARGETLENGTH_MIN, ZSTD_STRATEGY_MIN, FADT_MIN };
121*01826a49SYabin Cui
122*01826a49SYabin Cui /* maximum value of parameters */
123*01826a49SYabin Cui static const U32 maxtable[NUM_PARAMS] =
124*01826a49SYabin Cui { ZSTD_WINDOWLOG_MAX, ZSTD_CHAINLOG_MAX, ZSTD_HASHLOG_MAX, ZSTD_SEARCHLOG_MAX, ZSTD_MINMATCH_MAX, ZSTD_TARGETLENGTH_MAX, ZSTD_STRATEGY_MAX, FADT_MAX };
125*01826a49SYabin Cui
126*01826a49SYabin Cui /* # of values parameters can take on */
127*01826a49SYabin Cui static const U32 rangetable[NUM_PARAMS] =
128*01826a49SYabin Cui { WLOG_RANGE, CLOG_RANGE, HLOG_RANGE, SLOG_RANGE, MML_RANGE, TLEN_RANGE, STRT_RANGE, FADT_RANGE };
129*01826a49SYabin Cui
130*01826a49SYabin Cui /* ZSTD_cctxSetParameter() index to set */
131*01826a49SYabin Cui static const ZSTD_cParameter cctxSetParamTable[NUM_PARAMS] =
132*01826a49SYabin Cui { ZSTD_c_windowLog, ZSTD_c_chainLog, ZSTD_c_hashLog, ZSTD_c_searchLog, ZSTD_c_minMatch, ZSTD_c_targetLength, ZSTD_c_strategy, ZSTD_c_forceAttachDict };
133*01826a49SYabin Cui
134*01826a49SYabin Cui /* names of parameters */
135*01826a49SYabin Cui static const char* g_paramNames[NUM_PARAMS] =
136*01826a49SYabin Cui { "windowLog", "chainLog", "hashLog","searchLog", "minMatch", "targetLength", "strategy", "forceAttachDict" };
137*01826a49SYabin Cui
138*01826a49SYabin Cui /* shortened names of parameters */
139*01826a49SYabin Cui static const char* g_shortParamNames[NUM_PARAMS] =
140*01826a49SYabin Cui { "wlog", "clog", "hlog", "slog", "mml", "tlen", "strat", "fadt" };
141*01826a49SYabin Cui
142*01826a49SYabin Cui /* maps value from { 0 to rangetable[param] - 1 } to valid paramvalues */
rangeMap(varInds_t param,int ind)143*01826a49SYabin Cui static U32 rangeMap(varInds_t param, int ind)
144*01826a49SYabin Cui {
145*01826a49SYabin Cui U32 const uind = (U32)MAX(MIN(ind, (int)rangetable[param] - 1), 0);
146*01826a49SYabin Cui switch(param) {
147*01826a49SYabin Cui case wlog_ind: /* using default: triggers -Wswitch-enum */
148*01826a49SYabin Cui case clog_ind:
149*01826a49SYabin Cui case hlog_ind:
150*01826a49SYabin Cui case slog_ind:
151*01826a49SYabin Cui case mml_ind:
152*01826a49SYabin Cui case strt_ind:
153*01826a49SYabin Cui return mintable[param] + uind;
154*01826a49SYabin Cui case tlen_ind:
155*01826a49SYabin Cui return tlen_table[uind];
156*01826a49SYabin Cui case fadt_ind: /* 0, 1, 2 -> -1, 0, 1 */
157*01826a49SYabin Cui return uind - 1;
158*01826a49SYabin Cui case NUM_PARAMS:
159*01826a49SYabin Cui default:;
160*01826a49SYabin Cui }
161*01826a49SYabin Cui DISPLAY("Error, not a valid param\n ");
162*01826a49SYabin Cui assert(0);
163*01826a49SYabin Cui return (U32)-1;
164*01826a49SYabin Cui }
165*01826a49SYabin Cui
166*01826a49SYabin Cui /* inverse of rangeMap */
invRangeMap(varInds_t param,U32 value)167*01826a49SYabin Cui static int invRangeMap(varInds_t param, U32 value)
168*01826a49SYabin Cui {
169*01826a49SYabin Cui value = MIN(MAX(mintable[param], value), maxtable[param]);
170*01826a49SYabin Cui switch(param) {
171*01826a49SYabin Cui case wlog_ind:
172*01826a49SYabin Cui case clog_ind:
173*01826a49SYabin Cui case hlog_ind:
174*01826a49SYabin Cui case slog_ind:
175*01826a49SYabin Cui case mml_ind:
176*01826a49SYabin Cui case strt_ind:
177*01826a49SYabin Cui return (int)(value - mintable[param]);
178*01826a49SYabin Cui case tlen_ind: /* bin search */
179*01826a49SYabin Cui {
180*01826a49SYabin Cui int lo = 0;
181*01826a49SYabin Cui int hi = TLEN_RANGE;
182*01826a49SYabin Cui while(lo < hi) {
183*01826a49SYabin Cui int mid = (lo + hi) / 2;
184*01826a49SYabin Cui if(tlen_table[mid] < value) {
185*01826a49SYabin Cui lo = mid + 1;
186*01826a49SYabin Cui } if(tlen_table[mid] == value) {
187*01826a49SYabin Cui return mid;
188*01826a49SYabin Cui } else {
189*01826a49SYabin Cui hi = mid;
190*01826a49SYabin Cui }
191*01826a49SYabin Cui }
192*01826a49SYabin Cui return lo;
193*01826a49SYabin Cui }
194*01826a49SYabin Cui case fadt_ind:
195*01826a49SYabin Cui return (int)value + 1;
196*01826a49SYabin Cui case NUM_PARAMS:
197*01826a49SYabin Cui default:;
198*01826a49SYabin Cui }
199*01826a49SYabin Cui DISPLAY("Error, not a valid param\n ");
200*01826a49SYabin Cui assert(0);
201*01826a49SYabin Cui return -2;
202*01826a49SYabin Cui }
203*01826a49SYabin Cui
204*01826a49SYabin Cui /* display of params */
displayParamVal(FILE * f,varInds_t param,unsigned value,int width)205*01826a49SYabin Cui static void displayParamVal(FILE* f, varInds_t param, unsigned value, int width)
206*01826a49SYabin Cui {
207*01826a49SYabin Cui switch(param) {
208*01826a49SYabin Cui case wlog_ind:
209*01826a49SYabin Cui case clog_ind:
210*01826a49SYabin Cui case hlog_ind:
211*01826a49SYabin Cui case slog_ind:
212*01826a49SYabin Cui case mml_ind:
213*01826a49SYabin Cui case tlen_ind:
214*01826a49SYabin Cui if(width) {
215*01826a49SYabin Cui fprintf(f, "%*u", width, value);
216*01826a49SYabin Cui } else {
217*01826a49SYabin Cui fprintf(f, "%u", value);
218*01826a49SYabin Cui }
219*01826a49SYabin Cui break;
220*01826a49SYabin Cui case strt_ind:
221*01826a49SYabin Cui if(width) {
222*01826a49SYabin Cui fprintf(f, "%*s", width, g_stratName[value]);
223*01826a49SYabin Cui } else {
224*01826a49SYabin Cui fprintf(f, "%s", g_stratName[value]);
225*01826a49SYabin Cui }
226*01826a49SYabin Cui break;
227*01826a49SYabin Cui case fadt_ind: /* force attach dict */
228*01826a49SYabin Cui if(width) {
229*01826a49SYabin Cui fprintf(f, "%*d", width, (int)value);
230*01826a49SYabin Cui } else {
231*01826a49SYabin Cui fprintf(f, "%d", (int)value);
232*01826a49SYabin Cui }
233*01826a49SYabin Cui break;
234*01826a49SYabin Cui case NUM_PARAMS:
235*01826a49SYabin Cui default:
236*01826a49SYabin Cui DISPLAY("Error, not a valid param\n ");
237*01826a49SYabin Cui assert(0);
238*01826a49SYabin Cui break;
239*01826a49SYabin Cui }
240*01826a49SYabin Cui }
241*01826a49SYabin Cui
242*01826a49SYabin Cui
243*01826a49SYabin Cui /*-************************************
244*01826a49SYabin Cui * Benchmark Parameters/Global Variables
245*01826a49SYabin Cui **************************************/
246*01826a49SYabin Cui
247*01826a49SYabin Cui /* General Utility */
248*01826a49SYabin Cui static U32 g_timeLimit_s = 99999; /* about 27 hours */
249*01826a49SYabin Cui static UTIL_time_t g_time; /* to be used to compare solution finding speeds to compare to original */
250*01826a49SYabin Cui static U32 g_blockSize = 0;
251*01826a49SYabin Cui static U32 g_rand = 1;
252*01826a49SYabin Cui
253*01826a49SYabin Cui /* Display */
254*01826a49SYabin Cui static int g_displayLevel = 3;
255*01826a49SYabin Cui static BYTE g_silenceParams[NUM_PARAMS]; /* can selectively silence some params when displaying them */
256*01826a49SYabin Cui
257*01826a49SYabin Cui /* Mode Selection */
258*01826a49SYabin Cui static U32 g_singleRun = 0;
259*01826a49SYabin Cui static U32 g_optimizer = 0;
260*01826a49SYabin Cui static int g_optmode = 0;
261*01826a49SYabin Cui
262*01826a49SYabin Cui /* For cLevel Table generation */
263*01826a49SYabin Cui static U32 g_target = 0;
264*01826a49SYabin Cui static U32 g_noSeed = 0;
265*01826a49SYabin Cui
266*01826a49SYabin Cui /* For optimizer */
267*01826a49SYabin Cui static paramValues_t g_params; /* Initialized at the beginning of main w/ emptyParams() function */
268*01826a49SYabin Cui static double g_ratioMultiplier = 5.;
269*01826a49SYabin Cui static U32 g_strictness = PARAM_UNSET; /* range 1 - 100, measure of how strict */
270*01826a49SYabin Cui static BMK_benchResult_t g_lvltarget;
271*01826a49SYabin Cui
272*01826a49SYabin Cui typedef enum {
273*01826a49SYabin Cui directMap,
274*01826a49SYabin Cui xxhashMap,
275*01826a49SYabin Cui noMemo
276*01826a49SYabin Cui } memoTableType_t;
277*01826a49SYabin Cui
278*01826a49SYabin Cui typedef struct {
279*01826a49SYabin Cui memoTableType_t tableType;
280*01826a49SYabin Cui BYTE* table;
281*01826a49SYabin Cui size_t tableLen;
282*01826a49SYabin Cui varInds_t varArray[NUM_PARAMS];
283*01826a49SYabin Cui size_t varLen;
284*01826a49SYabin Cui } memoTable_t;
285*01826a49SYabin Cui
286*01826a49SYabin Cui typedef struct {
287*01826a49SYabin Cui BMK_benchResult_t result;
288*01826a49SYabin Cui paramValues_t params;
289*01826a49SYabin Cui } winnerInfo_t;
290*01826a49SYabin Cui
291*01826a49SYabin Cui typedef struct {
292*01826a49SYabin Cui U32 cSpeed; /* bytes / sec */
293*01826a49SYabin Cui U32 dSpeed;
294*01826a49SYabin Cui U32 cMem; /* bytes */
295*01826a49SYabin Cui } constraint_t;
296*01826a49SYabin Cui
297*01826a49SYabin Cui typedef struct winner_ll_node winner_ll_node;
298*01826a49SYabin Cui struct winner_ll_node {
299*01826a49SYabin Cui winnerInfo_t res;
300*01826a49SYabin Cui winner_ll_node* next;
301*01826a49SYabin Cui };
302*01826a49SYabin Cui
303*01826a49SYabin Cui static winner_ll_node* g_winners; /* linked list sorted ascending by cSize & cSpeed */
304*01826a49SYabin Cui
305*01826a49SYabin Cui /*
306*01826a49SYabin Cui * Additional Global Variables (Defined Above Use)
307*01826a49SYabin Cui * g_level_constraint
308*01826a49SYabin Cui * g_alreadyTested
309*01826a49SYabin Cui * g_maxTries
310*01826a49SYabin Cui * g_clockGranularity
311*01826a49SYabin Cui */
312*01826a49SYabin Cui
313*01826a49SYabin Cui
314*01826a49SYabin Cui /*-*******************************************************
315*01826a49SYabin Cui * General Util Functions
316*01826a49SYabin Cui *********************************************************/
317*01826a49SYabin Cui
318*01826a49SYabin Cui /* nullified useless params, to ensure count stats */
319*01826a49SYabin Cui /* cleans up params for memoizing / display */
sanitizeParams(paramValues_t params)320*01826a49SYabin Cui static paramValues_t sanitizeParams(paramValues_t params)
321*01826a49SYabin Cui {
322*01826a49SYabin Cui if (params.vals[strt_ind] == ZSTD_fast)
323*01826a49SYabin Cui params.vals[clog_ind] = 0, params.vals[slog_ind] = 0;
324*01826a49SYabin Cui if (params.vals[strt_ind] == ZSTD_dfast)
325*01826a49SYabin Cui params.vals[slog_ind] = 0;
326*01826a49SYabin Cui if ( (params.vals[strt_ind] < ZSTD_btopt) && (params.vals[strt_ind] != ZSTD_fast) )
327*01826a49SYabin Cui params.vals[tlen_ind] = 0;
328*01826a49SYabin Cui
329*01826a49SYabin Cui return params;
330*01826a49SYabin Cui }
331*01826a49SYabin Cui
pvalsToCParams(paramValues_t p)332*01826a49SYabin Cui static ZSTD_compressionParameters pvalsToCParams(paramValues_t p)
333*01826a49SYabin Cui {
334*01826a49SYabin Cui ZSTD_compressionParameters c;
335*01826a49SYabin Cui memset(&c, 0, sizeof(ZSTD_compressionParameters));
336*01826a49SYabin Cui c.windowLog = p.vals[wlog_ind];
337*01826a49SYabin Cui c.chainLog = p.vals[clog_ind];
338*01826a49SYabin Cui c.hashLog = p.vals[hlog_ind];
339*01826a49SYabin Cui c.searchLog = p.vals[slog_ind];
340*01826a49SYabin Cui c.minMatch = p.vals[mml_ind];
341*01826a49SYabin Cui c.targetLength = p.vals[tlen_ind];
342*01826a49SYabin Cui c.strategy = p.vals[strt_ind];
343*01826a49SYabin Cui /* no forceAttachDict */
344*01826a49SYabin Cui return c;
345*01826a49SYabin Cui }
346*01826a49SYabin Cui
cParamsToPVals(ZSTD_compressionParameters c)347*01826a49SYabin Cui static paramValues_t cParamsToPVals(ZSTD_compressionParameters c)
348*01826a49SYabin Cui {
349*01826a49SYabin Cui paramValues_t p;
350*01826a49SYabin Cui varInds_t i;
351*01826a49SYabin Cui p.vals[wlog_ind] = c.windowLog;
352*01826a49SYabin Cui p.vals[clog_ind] = c.chainLog;
353*01826a49SYabin Cui p.vals[hlog_ind] = c.hashLog;
354*01826a49SYabin Cui p.vals[slog_ind] = c.searchLog;
355*01826a49SYabin Cui p.vals[mml_ind] = c.minMatch;
356*01826a49SYabin Cui p.vals[tlen_ind] = c.targetLength;
357*01826a49SYabin Cui p.vals[strt_ind] = c.strategy;
358*01826a49SYabin Cui
359*01826a49SYabin Cui /* set all other params to their minimum value */
360*01826a49SYabin Cui for (i = strt_ind + 1; i < NUM_PARAMS; i++) {
361*01826a49SYabin Cui p.vals[i] = mintable[i];
362*01826a49SYabin Cui }
363*01826a49SYabin Cui return p;
364*01826a49SYabin Cui }
365*01826a49SYabin Cui
366*01826a49SYabin Cui /* equivalent of ZSTD_adjustCParams for paramValues_t */
367*01826a49SYabin Cui static paramValues_t
adjustParams(paramValues_t p,const size_t maxBlockSize,const size_t dictSize)368*01826a49SYabin Cui adjustParams(paramValues_t p, const size_t maxBlockSize, const size_t dictSize)
369*01826a49SYabin Cui {
370*01826a49SYabin Cui paramValues_t ot = p;
371*01826a49SYabin Cui varInds_t i;
372*01826a49SYabin Cui p = cParamsToPVals(ZSTD_adjustCParams(pvalsToCParams(p), maxBlockSize, dictSize));
373*01826a49SYabin Cui if (!dictSize) { p.vals[fadt_ind] = 0; }
374*01826a49SYabin Cui /* retain value of all other parameters */
375*01826a49SYabin Cui for(i = strt_ind + 1; i < NUM_PARAMS; i++) {
376*01826a49SYabin Cui p.vals[i] = ot.vals[i];
377*01826a49SYabin Cui }
378*01826a49SYabin Cui return p;
379*01826a49SYabin Cui }
380*01826a49SYabin Cui
BMK_findMaxMem(U64 requiredMem)381*01826a49SYabin Cui static size_t BMK_findMaxMem(U64 requiredMem)
382*01826a49SYabin Cui {
383*01826a49SYabin Cui size_t const step = 64 MB;
384*01826a49SYabin Cui void* testmem = NULL;
385*01826a49SYabin Cui
386*01826a49SYabin Cui requiredMem = (((requiredMem >> 26) + 1) << 26);
387*01826a49SYabin Cui if (requiredMem > maxMemory) requiredMem = maxMemory;
388*01826a49SYabin Cui
389*01826a49SYabin Cui requiredMem += 2 * step;
390*01826a49SYabin Cui while (!testmem && requiredMem > 0) {
391*01826a49SYabin Cui testmem = malloc ((size_t)requiredMem);
392*01826a49SYabin Cui requiredMem -= step;
393*01826a49SYabin Cui }
394*01826a49SYabin Cui
395*01826a49SYabin Cui free (testmem);
396*01826a49SYabin Cui return (size_t) requiredMem;
397*01826a49SYabin Cui }
398*01826a49SYabin Cui
399*01826a49SYabin Cui /* accuracy in seconds only, span can be multiple years */
BMK_timeSpan_s(const UTIL_time_t tStart)400*01826a49SYabin Cui static U32 BMK_timeSpan_s(const UTIL_time_t tStart)
401*01826a49SYabin Cui {
402*01826a49SYabin Cui return (U32)(UTIL_clockSpanMicro(tStart) / 1000000ULL);
403*01826a49SYabin Cui }
404*01826a49SYabin Cui
FUZ_rotl32(U32 x,U32 r)405*01826a49SYabin Cui static U32 FUZ_rotl32(U32 x, U32 r)
406*01826a49SYabin Cui {
407*01826a49SYabin Cui return ((x << r) | (x >> (32 - r)));
408*01826a49SYabin Cui }
409*01826a49SYabin Cui
FUZ_rand(U32 * src)410*01826a49SYabin Cui static U32 FUZ_rand(U32* src)
411*01826a49SYabin Cui {
412*01826a49SYabin Cui const U32 prime1 = 2654435761U;
413*01826a49SYabin Cui const U32 prime2 = 2246822519U;
414*01826a49SYabin Cui U32 rand32 = *src;
415*01826a49SYabin Cui rand32 *= prime1;
416*01826a49SYabin Cui rand32 += prime2;
417*01826a49SYabin Cui rand32 = FUZ_rotl32(rand32, 13);
418*01826a49SYabin Cui *src = rand32;
419*01826a49SYabin Cui return rand32 >> 5;
420*01826a49SYabin Cui }
421*01826a49SYabin Cui
422*01826a49SYabin Cui #define BOUNDCHECK(val,min,max) { \
423*01826a49SYabin Cui if (((val)<(min)) | ((val)>(max))) { \
424*01826a49SYabin Cui DISPLAY("INVALID PARAMETER CONSTRAINTS\n"); \
425*01826a49SYabin Cui return 0; \
426*01826a49SYabin Cui } }
427*01826a49SYabin Cui
paramValid(const paramValues_t paramTarget)428*01826a49SYabin Cui static int paramValid(const paramValues_t paramTarget)
429*01826a49SYabin Cui {
430*01826a49SYabin Cui U32 i;
431*01826a49SYabin Cui for(i = 0; i < NUM_PARAMS; i++) {
432*01826a49SYabin Cui BOUNDCHECK(paramTarget.vals[i], mintable[i], maxtable[i]);
433*01826a49SYabin Cui }
434*01826a49SYabin Cui return 1;
435*01826a49SYabin Cui }
436*01826a49SYabin Cui
437*01826a49SYabin Cui /* cParamUnsetMin() :
438*01826a49SYabin Cui * if any parameter in paramTarget is not yet set,
439*01826a49SYabin Cui * it will receive its corresponding minimal value.
440*01826a49SYabin Cui * This function never fails */
cParamUnsetMin(paramValues_t paramTarget)441*01826a49SYabin Cui static paramValues_t cParamUnsetMin(paramValues_t paramTarget)
442*01826a49SYabin Cui {
443*01826a49SYabin Cui varInds_t vi;
444*01826a49SYabin Cui for (vi = 0; vi < NUM_PARAMS; vi++) {
445*01826a49SYabin Cui if (paramTarget.vals[vi] == PARAM_UNSET) {
446*01826a49SYabin Cui paramTarget.vals[vi] = mintable[vi];
447*01826a49SYabin Cui }
448*01826a49SYabin Cui }
449*01826a49SYabin Cui return paramTarget;
450*01826a49SYabin Cui }
451*01826a49SYabin Cui
emptyParams(void)452*01826a49SYabin Cui static paramValues_t emptyParams(void)
453*01826a49SYabin Cui {
454*01826a49SYabin Cui U32 i;
455*01826a49SYabin Cui paramValues_t p;
456*01826a49SYabin Cui for(i = 0; i < NUM_PARAMS; i++) {
457*01826a49SYabin Cui p.vals[i] = PARAM_UNSET;
458*01826a49SYabin Cui }
459*01826a49SYabin Cui return p;
460*01826a49SYabin Cui }
461*01826a49SYabin Cui
initWinnerInfo(const paramValues_t p)462*01826a49SYabin Cui static winnerInfo_t initWinnerInfo(const paramValues_t p)
463*01826a49SYabin Cui {
464*01826a49SYabin Cui winnerInfo_t w1;
465*01826a49SYabin Cui w1.result.cSpeed = 0;
466*01826a49SYabin Cui w1.result.dSpeed = 0;
467*01826a49SYabin Cui w1.result.cMem = (size_t)-1;
468*01826a49SYabin Cui w1.result.cSize = (size_t)-1;
469*01826a49SYabin Cui w1.params = p;
470*01826a49SYabin Cui return w1;
471*01826a49SYabin Cui }
472*01826a49SYabin Cui
473*01826a49SYabin Cui static paramValues_t
overwriteParams(paramValues_t base,const paramValues_t mask)474*01826a49SYabin Cui overwriteParams(paramValues_t base, const paramValues_t mask)
475*01826a49SYabin Cui {
476*01826a49SYabin Cui U32 i;
477*01826a49SYabin Cui for(i = 0; i < NUM_PARAMS; i++) {
478*01826a49SYabin Cui if(mask.vals[i] != PARAM_UNSET) {
479*01826a49SYabin Cui base.vals[i] = mask.vals[i];
480*01826a49SYabin Cui }
481*01826a49SYabin Cui }
482*01826a49SYabin Cui return base;
483*01826a49SYabin Cui }
484*01826a49SYabin Cui
485*01826a49SYabin Cui static void
paramVaryOnce(const varInds_t paramIndex,const int amt,paramValues_t * ptr)486*01826a49SYabin Cui paramVaryOnce(const varInds_t paramIndex, const int amt, paramValues_t* ptr)
487*01826a49SYabin Cui {
488*01826a49SYabin Cui ptr->vals[paramIndex] = rangeMap(paramIndex,
489*01826a49SYabin Cui invRangeMap(paramIndex, ptr->vals[paramIndex]) + amt);
490*01826a49SYabin Cui }
491*01826a49SYabin Cui
492*01826a49SYabin Cui /* varies ptr by nbChanges respecting varyParams*/
493*01826a49SYabin Cui static void
paramVariation(paramValues_t * ptr,memoTable_t * mtAll,const U32 nbChanges)494*01826a49SYabin Cui paramVariation(paramValues_t* ptr, memoTable_t* mtAll, const U32 nbChanges)
495*01826a49SYabin Cui {
496*01826a49SYabin Cui paramValues_t p;
497*01826a49SYabin Cui int validated = 0;
498*01826a49SYabin Cui while (!validated) {
499*01826a49SYabin Cui U32 i;
500*01826a49SYabin Cui p = *ptr;
501*01826a49SYabin Cui for (i = 0 ; i < nbChanges ; i++) {
502*01826a49SYabin Cui const U32 changeID = (U32)FUZ_rand(&g_rand) % (mtAll[p.vals[strt_ind]].varLen << 1);
503*01826a49SYabin Cui paramVaryOnce(mtAll[p.vals[strt_ind]].varArray[changeID >> 1],
504*01826a49SYabin Cui (int)((changeID & 1) << 1) - 1,
505*01826a49SYabin Cui &p);
506*01826a49SYabin Cui }
507*01826a49SYabin Cui validated = paramValid(p);
508*01826a49SYabin Cui }
509*01826a49SYabin Cui *ptr = p;
510*01826a49SYabin Cui }
511*01826a49SYabin Cui
512*01826a49SYabin Cui /* Completely random parameter selection */
randomParams(void)513*01826a49SYabin Cui static paramValues_t randomParams(void)
514*01826a49SYabin Cui {
515*01826a49SYabin Cui varInds_t v; paramValues_t p;
516*01826a49SYabin Cui for(v = 0; v < NUM_PARAMS; v++) {
517*01826a49SYabin Cui p.vals[v] = rangeMap(v, (int)(FUZ_rand(&g_rand) % rangetable[v]));
518*01826a49SYabin Cui }
519*01826a49SYabin Cui return p;
520*01826a49SYabin Cui }
521*01826a49SYabin Cui
522*01826a49SYabin Cui static U64 g_clockGranularity = 100000000ULL;
523*01826a49SYabin Cui
init_clockGranularity(void)524*01826a49SYabin Cui static void init_clockGranularity(void)
525*01826a49SYabin Cui {
526*01826a49SYabin Cui UTIL_time_t const clockStart = UTIL_getTime();
527*01826a49SYabin Cui U64 el1 = 0, el2 = 0;
528*01826a49SYabin Cui int i = 0;
529*01826a49SYabin Cui do {
530*01826a49SYabin Cui el1 = el2;
531*01826a49SYabin Cui el2 = UTIL_clockSpanNano(clockStart);
532*01826a49SYabin Cui if(el1 < el2) {
533*01826a49SYabin Cui U64 iv = el2 - el1;
534*01826a49SYabin Cui if(g_clockGranularity > iv) {
535*01826a49SYabin Cui g_clockGranularity = iv;
536*01826a49SYabin Cui i = 0;
537*01826a49SYabin Cui } else {
538*01826a49SYabin Cui i++;
539*01826a49SYabin Cui }
540*01826a49SYabin Cui }
541*01826a49SYabin Cui } while(i < 10);
542*01826a49SYabin Cui DEBUGOUTPUT("Granularity: %llu\n", (unsigned long long)g_clockGranularity);
543*01826a49SYabin Cui }
544*01826a49SYabin Cui
545*01826a49SYabin Cui /*-************************************
546*01826a49SYabin Cui * Optimizer Util Functions
547*01826a49SYabin Cui **************************************/
548*01826a49SYabin Cui
549*01826a49SYabin Cui /* checks results are feasible */
feasible(const BMK_benchResult_t results,const constraint_t target)550*01826a49SYabin Cui static int feasible(const BMK_benchResult_t results, const constraint_t target) {
551*01826a49SYabin Cui return (results.cSpeed >= target.cSpeed)
552*01826a49SYabin Cui && (results.dSpeed >= target.dSpeed)
553*01826a49SYabin Cui && (results.cMem <= target.cMem)
554*01826a49SYabin Cui && (!g_optmode || results.cSize <= g_lvltarget.cSize);
555*01826a49SYabin Cui }
556*01826a49SYabin Cui
557*01826a49SYabin Cui /* hill climbing value for part 1 */
558*01826a49SYabin Cui /* Scoring here is a linear reward for all set constraints normalized between 0 and 1
559*01826a49SYabin Cui * (with 0 at 0 and 1 being fully fulfilling the constraint), summed with a logarithmic
560*01826a49SYabin Cui * bonus to exceeding the constraint value. We also give linear ratio for compression ratio.
561*01826a49SYabin Cui * The constant factors are experimental.
562*01826a49SYabin Cui */
563*01826a49SYabin Cui static double
resultScore(const BMK_benchResult_t res,const size_t srcSize,const constraint_t target)564*01826a49SYabin Cui resultScore(const BMK_benchResult_t res, const size_t srcSize, const constraint_t target)
565*01826a49SYabin Cui {
566*01826a49SYabin Cui double cs = 0., ds = 0., rt, cm = 0.;
567*01826a49SYabin Cui const double r1 = 1, r2 = 0.1, rtr = 0.5;
568*01826a49SYabin Cui double ret;
569*01826a49SYabin Cui if(target.cSpeed) { cs = (double)res.cSpeed / (double)target.cSpeed; }
570*01826a49SYabin Cui if(target.dSpeed) { ds = (double)res.dSpeed / (double)target.dSpeed; }
571*01826a49SYabin Cui if(target.cMem != (U32)-1) { cm = (double)target.cMem / (double)res.cMem; }
572*01826a49SYabin Cui rt = ((double)srcSize / (double)res.cSize);
573*01826a49SYabin Cui
574*01826a49SYabin Cui ret = (MIN(1, cs) + MIN(1, ds) + MIN(1, cm))*r1 + rt * rtr +
575*01826a49SYabin Cui (MAX(0, log(cs))+ MAX(0, log(ds))+ MAX(0, log(cm))) * r2;
576*01826a49SYabin Cui
577*01826a49SYabin Cui return ret;
578*01826a49SYabin Cui }
579*01826a49SYabin Cui
580*01826a49SYabin Cui /* calculates normalized squared euclidean distance of result1 if it is in the first quadrant relative to lvlRes */
581*01826a49SYabin Cui static double
resultDistLvl(const BMK_benchResult_t result1,const BMK_benchResult_t lvlRes)582*01826a49SYabin Cui resultDistLvl(const BMK_benchResult_t result1, const BMK_benchResult_t lvlRes)
583*01826a49SYabin Cui {
584*01826a49SYabin Cui double normalizedCSpeedGain1 = ((double)result1.cSpeed / (double)lvlRes.cSpeed) - 1;
585*01826a49SYabin Cui double normalizedRatioGain1 = ((double)lvlRes.cSize / (double)result1.cSize) - 1;
586*01826a49SYabin Cui if(normalizedRatioGain1 < 0 || normalizedCSpeedGain1 < 0) {
587*01826a49SYabin Cui return 0.0;
588*01826a49SYabin Cui }
589*01826a49SYabin Cui return normalizedRatioGain1 * g_ratioMultiplier + normalizedCSpeedGain1;
590*01826a49SYabin Cui }
591*01826a49SYabin Cui
592*01826a49SYabin Cui /* return true if r2 strictly better than r1 */
593*01826a49SYabin Cui static int
compareResultLT(const BMK_benchResult_t result1,const BMK_benchResult_t result2,const constraint_t target,size_t srcSize)594*01826a49SYabin Cui compareResultLT(const BMK_benchResult_t result1, const BMK_benchResult_t result2, const constraint_t target, size_t srcSize)
595*01826a49SYabin Cui {
596*01826a49SYabin Cui if(feasible(result1, target) && feasible(result2, target)) {
597*01826a49SYabin Cui if(g_optmode) {
598*01826a49SYabin Cui return resultDistLvl(result1, g_lvltarget) < resultDistLvl(result2, g_lvltarget);
599*01826a49SYabin Cui } else {
600*01826a49SYabin Cui return (result1.cSize > result2.cSize)
601*01826a49SYabin Cui || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed)
602*01826a49SYabin Cui || (result1.cSize == result2.cSize && result2.cSpeed == result1.cSpeed && result2.dSpeed > result1.dSpeed);
603*01826a49SYabin Cui }
604*01826a49SYabin Cui }
605*01826a49SYabin Cui return feasible(result2, target)
606*01826a49SYabin Cui || (!feasible(result1, target)
607*01826a49SYabin Cui && (resultScore(result1, srcSize, target) < resultScore(result2, srcSize, target)));
608*01826a49SYabin Cui }
609*01826a49SYabin Cui
relaxTarget(constraint_t target)610*01826a49SYabin Cui static constraint_t relaxTarget(constraint_t target) {
611*01826a49SYabin Cui target.cMem = (U32)-1;
612*01826a49SYabin Cui target.cSpeed = (target.cSpeed * g_strictness) / 100;
613*01826a49SYabin Cui target.dSpeed = (target.dSpeed * g_strictness) / 100;
614*01826a49SYabin Cui return target;
615*01826a49SYabin Cui }
616*01826a49SYabin Cui
optimizerAdjustInput(paramValues_t * pc,const size_t maxBlockSize)617*01826a49SYabin Cui static void optimizerAdjustInput(paramValues_t* pc, const size_t maxBlockSize)
618*01826a49SYabin Cui {
619*01826a49SYabin Cui varInds_t v;
620*01826a49SYabin Cui for(v = 0; v < NUM_PARAMS; v++) {
621*01826a49SYabin Cui if(pc->vals[v] != PARAM_UNSET) {
622*01826a49SYabin Cui U32 newval = MIN(MAX(pc->vals[v], mintable[v]), maxtable[v]);
623*01826a49SYabin Cui if(newval != pc->vals[v]) {
624*01826a49SYabin Cui pc->vals[v] = newval;
625*01826a49SYabin Cui DISPLAY("Warning: parameter %s not in valid range, adjusting to ",
626*01826a49SYabin Cui g_paramNames[v]);
627*01826a49SYabin Cui displayParamVal(stderr, v, newval, 0); DISPLAY("\n");
628*01826a49SYabin Cui }
629*01826a49SYabin Cui }
630*01826a49SYabin Cui }
631*01826a49SYabin Cui
632*01826a49SYabin Cui if(pc->vals[wlog_ind] != PARAM_UNSET) {
633*01826a49SYabin Cui
634*01826a49SYabin Cui U32 sshb = maxBlockSize > 1 ? ZSTD_highbit32((U32)(maxBlockSize-1)) + 1 : 1;
635*01826a49SYabin Cui /* edge case of highBit not working for 0 */
636*01826a49SYabin Cui
637*01826a49SYabin Cui if(maxBlockSize < (1ULL << 31) && sshb + 1 < pc->vals[wlog_ind]) {
638*01826a49SYabin Cui U32 adjust = MAX(mintable[wlog_ind], sshb);
639*01826a49SYabin Cui if(adjust != pc->vals[wlog_ind]) {
640*01826a49SYabin Cui pc->vals[wlog_ind] = adjust;
641*01826a49SYabin Cui DISPLAY("Warning: windowLog larger than src/block size, adjusted to %u\n",
642*01826a49SYabin Cui (unsigned)pc->vals[wlog_ind]);
643*01826a49SYabin Cui }
644*01826a49SYabin Cui }
645*01826a49SYabin Cui }
646*01826a49SYabin Cui
647*01826a49SYabin Cui if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) {
648*01826a49SYabin Cui U32 maxclog;
649*01826a49SYabin Cui if(pc->vals[strt_ind] == PARAM_UNSET || pc->vals[strt_ind] >= (U32)ZSTD_btlazy2) {
650*01826a49SYabin Cui maxclog = pc->vals[wlog_ind] + 1;
651*01826a49SYabin Cui } else {
652*01826a49SYabin Cui maxclog = pc->vals[wlog_ind];
653*01826a49SYabin Cui }
654*01826a49SYabin Cui
655*01826a49SYabin Cui if(pc->vals[clog_ind] > maxclog) {
656*01826a49SYabin Cui pc->vals[clog_ind] = maxclog;
657*01826a49SYabin Cui DISPLAY("Warning: chainlog too much larger than windowLog size, adjusted to %u\n",
658*01826a49SYabin Cui (unsigned)pc->vals[clog_ind]);
659*01826a49SYabin Cui }
660*01826a49SYabin Cui }
661*01826a49SYabin Cui
662*01826a49SYabin Cui if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[hlog_ind] != PARAM_UNSET) {
663*01826a49SYabin Cui if(pc->vals[wlog_ind] + 1 < pc->vals[hlog_ind]) {
664*01826a49SYabin Cui pc->vals[hlog_ind] = pc->vals[wlog_ind] + 1;
665*01826a49SYabin Cui DISPLAY("Warning: hashlog too much larger than windowLog size, adjusted to %u\n",
666*01826a49SYabin Cui (unsigned)pc->vals[hlog_ind]);
667*01826a49SYabin Cui }
668*01826a49SYabin Cui }
669*01826a49SYabin Cui
670*01826a49SYabin Cui if(pc->vals[slog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) {
671*01826a49SYabin Cui if(pc->vals[slog_ind] > pc->vals[clog_ind]) {
672*01826a49SYabin Cui pc->vals[clog_ind] = pc->vals[slog_ind];
673*01826a49SYabin Cui DISPLAY("Warning: searchLog larger than chainLog, adjusted to %u\n",
674*01826a49SYabin Cui (unsigned)pc->vals[slog_ind]);
675*01826a49SYabin Cui }
676*01826a49SYabin Cui }
677*01826a49SYabin Cui }
678*01826a49SYabin Cui
679*01826a49SYabin Cui static int
redundantParams(const paramValues_t paramValues,const constraint_t target,const size_t maxBlockSize)680*01826a49SYabin Cui redundantParams(const paramValues_t paramValues, const constraint_t target, const size_t maxBlockSize)
681*01826a49SYabin Cui {
682*01826a49SYabin Cui return
683*01826a49SYabin Cui (ZSTD_estimateCStreamSize_usingCParams(pvalsToCParams(paramValues)) > (size_t)target.cMem) /* Uses too much memory */
684*01826a49SYabin Cui || ((1ULL << (paramValues.vals[wlog_ind] - 1)) >= maxBlockSize && paramValues.vals[wlog_ind] != mintable[wlog_ind]) /* wlog too much bigger than src size */
685*01826a49SYabin Cui || (paramValues.vals[clog_ind] > (paramValues.vals[wlog_ind] + (paramValues.vals[strt_ind] > ZSTD_btlazy2))) /* chainLog larger than windowLog*/
686*01826a49SYabin Cui || (paramValues.vals[slog_ind] > paramValues.vals[clog_ind]) /* searchLog larger than chainLog */
687*01826a49SYabin Cui || (paramValues.vals[hlog_ind] > paramValues.vals[wlog_ind] + 1); /* hashLog larger than windowLog + 1 */
688*01826a49SYabin Cui }
689*01826a49SYabin Cui
690*01826a49SYabin Cui
691*01826a49SYabin Cui /*-************************************
692*01826a49SYabin Cui * Display Functions
693*01826a49SYabin Cui **************************************/
694*01826a49SYabin Cui
695*01826a49SYabin Cui /* BMK_paramValues_into_commandLine() :
696*01826a49SYabin Cui * transform a set of parameters paramValues_t
697*01826a49SYabin Cui * into a command line compatible with `zstd` syntax
698*01826a49SYabin Cui * and writes it into FILE* f.
699*01826a49SYabin Cui * f must be already opened and writable */
700*01826a49SYabin Cui static void
BMK_paramValues_into_commandLine(FILE * f,const paramValues_t params)701*01826a49SYabin Cui BMK_paramValues_into_commandLine(FILE* f, const paramValues_t params)
702*01826a49SYabin Cui {
703*01826a49SYabin Cui varInds_t v;
704*01826a49SYabin Cui int first = 1;
705*01826a49SYabin Cui fprintf(f,"--zstd=");
706*01826a49SYabin Cui for (v = 0; v < NUM_PARAMS; v++) {
707*01826a49SYabin Cui if (g_silenceParams[v]) { continue; }
708*01826a49SYabin Cui if (!first) { fprintf(f, ","); }
709*01826a49SYabin Cui fprintf(f,"%s=", g_paramNames[v]);
710*01826a49SYabin Cui
711*01826a49SYabin Cui if (v == strt_ind) { fprintf(f,"%u", (unsigned)params.vals[v]); }
712*01826a49SYabin Cui else { displayParamVal(f, v, params.vals[v], 0); }
713*01826a49SYabin Cui first = 0;
714*01826a49SYabin Cui }
715*01826a49SYabin Cui fprintf(f, "\n");
716*01826a49SYabin Cui }
717*01826a49SYabin Cui
718*01826a49SYabin Cui
719*01826a49SYabin Cui /* comparison function: */
720*01826a49SYabin Cui /* strictly better, strictly worse, equal, speed-side adv, size-side adv */
721*01826a49SYabin Cui #define WORSE_RESULT 0
722*01826a49SYabin Cui #define BETTER_RESULT 1
723*01826a49SYabin Cui #define ERROR_RESULT 2
724*01826a49SYabin Cui
725*01826a49SYabin Cui #define SPEED_RESULT 4
726*01826a49SYabin Cui #define SIZE_RESULT 5
727*01826a49SYabin Cui /* maybe have epsilon-eq to limit table size? */
728*01826a49SYabin Cui static int
speedSizeCompare(const BMK_benchResult_t r1,const BMK_benchResult_t r2)729*01826a49SYabin Cui speedSizeCompare(const BMK_benchResult_t r1, const BMK_benchResult_t r2)
730*01826a49SYabin Cui {
731*01826a49SYabin Cui if(r1.cSpeed < r2.cSpeed) {
732*01826a49SYabin Cui if(r1.cSize >= r2.cSize) {
733*01826a49SYabin Cui return BETTER_RESULT;
734*01826a49SYabin Cui }
735*01826a49SYabin Cui return SPEED_RESULT; /* r2 is smaller but not faster. */
736*01826a49SYabin Cui } else {
737*01826a49SYabin Cui if(r1.cSize <= r2.cSize) {
738*01826a49SYabin Cui return WORSE_RESULT;
739*01826a49SYabin Cui }
740*01826a49SYabin Cui return SIZE_RESULT; /* r2 is faster but not smaller */
741*01826a49SYabin Cui }
742*01826a49SYabin Cui }
743*01826a49SYabin Cui
744*01826a49SYabin Cui /* 0 for insertion, 1 for no insert */
745*01826a49SYabin Cui /* maintain invariant speedSizeCompare(n, n->next) = SPEED_RESULT */
746*01826a49SYabin Cui static int
insertWinner(const winnerInfo_t w,const constraint_t targetConstraints)747*01826a49SYabin Cui insertWinner(const winnerInfo_t w, const constraint_t targetConstraints)
748*01826a49SYabin Cui {
749*01826a49SYabin Cui BMK_benchResult_t r = w.result;
750*01826a49SYabin Cui winner_ll_node* cur_node = g_winners;
751*01826a49SYabin Cui /* first node to insert */
752*01826a49SYabin Cui if(!feasible(r, targetConstraints)) {
753*01826a49SYabin Cui return 1;
754*01826a49SYabin Cui }
755*01826a49SYabin Cui
756*01826a49SYabin Cui if(g_winners == NULL) {
757*01826a49SYabin Cui winner_ll_node* first_node = malloc(sizeof(winner_ll_node));
758*01826a49SYabin Cui if(first_node == NULL) {
759*01826a49SYabin Cui return 1;
760*01826a49SYabin Cui }
761*01826a49SYabin Cui first_node->next = NULL;
762*01826a49SYabin Cui first_node->res = w;
763*01826a49SYabin Cui g_winners = first_node;
764*01826a49SYabin Cui return 0;
765*01826a49SYabin Cui }
766*01826a49SYabin Cui
767*01826a49SYabin Cui while(cur_node->next != NULL) {
768*01826a49SYabin Cui switch(speedSizeCompare(cur_node->res.result, r)) {
769*01826a49SYabin Cui case WORSE_RESULT:
770*01826a49SYabin Cui {
771*01826a49SYabin Cui return 1; /* never insert if better */
772*01826a49SYabin Cui }
773*01826a49SYabin Cui case BETTER_RESULT:
774*01826a49SYabin Cui {
775*01826a49SYabin Cui winner_ll_node* tmp;
776*01826a49SYabin Cui cur_node->res = cur_node->next->res;
777*01826a49SYabin Cui tmp = cur_node->next;
778*01826a49SYabin Cui cur_node->next = cur_node->next->next;
779*01826a49SYabin Cui free(tmp);
780*01826a49SYabin Cui break;
781*01826a49SYabin Cui }
782*01826a49SYabin Cui case SIZE_RESULT:
783*01826a49SYabin Cui {
784*01826a49SYabin Cui cur_node = cur_node->next;
785*01826a49SYabin Cui break;
786*01826a49SYabin Cui }
787*01826a49SYabin Cui case SPEED_RESULT: /* insert after first size result, then return */
788*01826a49SYabin Cui {
789*01826a49SYabin Cui winner_ll_node* newnode = malloc(sizeof(winner_ll_node));
790*01826a49SYabin Cui if(newnode == NULL) {
791*01826a49SYabin Cui return 1;
792*01826a49SYabin Cui }
793*01826a49SYabin Cui newnode->res = cur_node->res;
794*01826a49SYabin Cui cur_node->res = w;
795*01826a49SYabin Cui newnode->next = cur_node->next;
796*01826a49SYabin Cui cur_node->next = newnode;
797*01826a49SYabin Cui return 0;
798*01826a49SYabin Cui }
799*01826a49SYabin Cui }
800*01826a49SYabin Cui
801*01826a49SYabin Cui }
802*01826a49SYabin Cui
803*01826a49SYabin Cui assert(cur_node->next == NULL);
804*01826a49SYabin Cui switch(speedSizeCompare(cur_node->res.result, r)) {
805*01826a49SYabin Cui case WORSE_RESULT:
806*01826a49SYabin Cui {
807*01826a49SYabin Cui return 1; /* never insert if better */
808*01826a49SYabin Cui }
809*01826a49SYabin Cui case BETTER_RESULT:
810*01826a49SYabin Cui {
811*01826a49SYabin Cui cur_node->res = w;
812*01826a49SYabin Cui return 0;
813*01826a49SYabin Cui }
814*01826a49SYabin Cui case SIZE_RESULT:
815*01826a49SYabin Cui {
816*01826a49SYabin Cui winner_ll_node* newnode = malloc(sizeof(winner_ll_node));
817*01826a49SYabin Cui if(newnode == NULL) {
818*01826a49SYabin Cui return 1;
819*01826a49SYabin Cui }
820*01826a49SYabin Cui newnode->res = w;
821*01826a49SYabin Cui newnode->next = NULL;
822*01826a49SYabin Cui cur_node->next = newnode;
823*01826a49SYabin Cui return 0;
824*01826a49SYabin Cui }
825*01826a49SYabin Cui case SPEED_RESULT: /* insert before first size result, then return */
826*01826a49SYabin Cui {
827*01826a49SYabin Cui winner_ll_node* newnode = malloc(sizeof(winner_ll_node));
828*01826a49SYabin Cui if(newnode == NULL) {
829*01826a49SYabin Cui return 1;
830*01826a49SYabin Cui }
831*01826a49SYabin Cui newnode->res = cur_node->res;
832*01826a49SYabin Cui cur_node->res = w;
833*01826a49SYabin Cui newnode->next = cur_node->next;
834*01826a49SYabin Cui cur_node->next = newnode;
835*01826a49SYabin Cui return 0;
836*01826a49SYabin Cui }
837*01826a49SYabin Cui default:
838*01826a49SYabin Cui return 1;
839*01826a49SYabin Cui }
840*01826a49SYabin Cui }
841*01826a49SYabin Cui
842*01826a49SYabin Cui static void
BMK_displayOneResult(FILE * f,winnerInfo_t res,const size_t srcSize)843*01826a49SYabin Cui BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize)
844*01826a49SYabin Cui {
845*01826a49SYabin Cui varInds_t v;
846*01826a49SYabin Cui int first = 1;
847*01826a49SYabin Cui res.params = cParamUnsetMin(res.params);
848*01826a49SYabin Cui fprintf(f, " {");
849*01826a49SYabin Cui for (v = 0; v < NUM_PARAMS; v++) {
850*01826a49SYabin Cui if (g_silenceParams[v]) { continue; }
851*01826a49SYabin Cui if (!first) { fprintf(f, ","); }
852*01826a49SYabin Cui displayParamVal(f, v, res.params.vals[v], 3);
853*01826a49SYabin Cui first = 0;
854*01826a49SYabin Cui }
855*01826a49SYabin Cui
856*01826a49SYabin Cui { double const ratio = res.result.cSize ?
857*01826a49SYabin Cui (double)srcSize / (double)res.result.cSize : 0;
858*01826a49SYabin Cui double const cSpeedMBps = (double)res.result.cSpeed / MB_UNIT;
859*01826a49SYabin Cui double const dSpeedMBps = (double)res.result.dSpeed / MB_UNIT;
860*01826a49SYabin Cui
861*01826a49SYabin Cui fprintf(f, " }, /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
862*01826a49SYabin Cui ratio, cSpeedMBps, dSpeedMBps);
863*01826a49SYabin Cui }
864*01826a49SYabin Cui }
865*01826a49SYabin Cui
866*01826a49SYabin Cui /* Writes to f the results of a parameter benchmark */
867*01826a49SYabin Cui /* when used with --optimize, will only print results better than previously discovered */
868*01826a49SYabin Cui static void
BMK_printWinner(FILE * f,const int cLevel,const BMK_benchResult_t result,const paramValues_t params,const size_t srcSize)869*01826a49SYabin Cui BMK_printWinner(FILE* f, const int cLevel, const BMK_benchResult_t result, const paramValues_t params, const size_t srcSize)
870*01826a49SYabin Cui {
871*01826a49SYabin Cui char lvlstr[15] = "Custom Level";
872*01826a49SYabin Cui winnerInfo_t w;
873*01826a49SYabin Cui w.params = params;
874*01826a49SYabin Cui w.result = result;
875*01826a49SYabin Cui
876*01826a49SYabin Cui fprintf(f, "\r%79s\r", "");
877*01826a49SYabin Cui
878*01826a49SYabin Cui if(cLevel != CUSTOM_LEVEL) {
879*01826a49SYabin Cui snprintf(lvlstr, 15, " Level %2d ", cLevel);
880*01826a49SYabin Cui }
881*01826a49SYabin Cui
882*01826a49SYabin Cui if(TIMED) {
883*01826a49SYabin Cui const U64 mn_in_ns = 60ULL * TIMELOOP_NANOSEC;
884*01826a49SYabin Cui const U64 time_ns = UTIL_clockSpanNano(g_time);
885*01826a49SYabin Cui const U64 minutes = time_ns / mn_in_ns;
886*01826a49SYabin Cui fprintf(f, "%1lu:%2lu:%05.2f - ",
887*01826a49SYabin Cui (unsigned long) minutes / 60,
888*01826a49SYabin Cui (unsigned long) minutes % 60,
889*01826a49SYabin Cui (double)(time_ns - (minutes * mn_in_ns)) / TIMELOOP_NANOSEC );
890*01826a49SYabin Cui }
891*01826a49SYabin Cui
892*01826a49SYabin Cui fprintf(f, "/* %s */ ", lvlstr);
893*01826a49SYabin Cui BMK_displayOneResult(f, w, srcSize);
894*01826a49SYabin Cui }
895*01826a49SYabin Cui
896*01826a49SYabin Cui static void
BMK_printWinnerOpt(FILE * f,const U32 cLevel,const BMK_benchResult_t result,const paramValues_t params,const constraint_t targetConstraints,const size_t srcSize)897*01826a49SYabin Cui BMK_printWinnerOpt(FILE* f, const U32 cLevel, const BMK_benchResult_t result, const paramValues_t params, const constraint_t targetConstraints, const size_t srcSize)
898*01826a49SYabin Cui {
899*01826a49SYabin Cui /* global winner used for constraints */
900*01826a49SYabin Cui /* cSize, cSpeed, dSpeed, cMem */
901*01826a49SYabin Cui static winnerInfo_t g_winner = { { (size_t)-1LL, 0, 0, (size_t)-1LL },
902*01826a49SYabin Cui { { PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET } }
903*01826a49SYabin Cui };
904*01826a49SYabin Cui if ( DEBUG
905*01826a49SYabin Cui || compareResultLT(g_winner.result, result, targetConstraints, srcSize)
906*01826a49SYabin Cui || g_displayLevel >= 4) {
907*01826a49SYabin Cui if ( DEBUG
908*01826a49SYabin Cui && compareResultLT(g_winner.result, result, targetConstraints, srcSize)) {
909*01826a49SYabin Cui DISPLAY("New Winner: \n");
910*01826a49SYabin Cui }
911*01826a49SYabin Cui
912*01826a49SYabin Cui if(g_displayLevel >= 2) {
913*01826a49SYabin Cui BMK_printWinner(f, cLevel, result, params, srcSize);
914*01826a49SYabin Cui }
915*01826a49SYabin Cui
916*01826a49SYabin Cui if(compareResultLT(g_winner.result, result, targetConstraints, srcSize)) {
917*01826a49SYabin Cui if(g_displayLevel >= 1) { BMK_paramValues_into_commandLine(f, params); }
918*01826a49SYabin Cui g_winner.result = result;
919*01826a49SYabin Cui g_winner.params = params;
920*01826a49SYabin Cui }
921*01826a49SYabin Cui }
922*01826a49SYabin Cui
923*01826a49SYabin Cui if(g_optmode && g_optimizer && (DEBUG || g_displayLevel == 3)) {
924*01826a49SYabin Cui winnerInfo_t w;
925*01826a49SYabin Cui winner_ll_node* n;
926*01826a49SYabin Cui w.result = result;
927*01826a49SYabin Cui w.params = params;
928*01826a49SYabin Cui insertWinner(w, targetConstraints);
929*01826a49SYabin Cui
930*01826a49SYabin Cui if(!DEBUG) { fprintf(f, "\033c"); }
931*01826a49SYabin Cui fprintf(f, "\n");
932*01826a49SYabin Cui
933*01826a49SYabin Cui /* the table */
934*01826a49SYabin Cui fprintf(f, "================================\n");
935*01826a49SYabin Cui for(n = g_winners; n != NULL; n = n->next) {
936*01826a49SYabin Cui BMK_displayOneResult(f, n->res, srcSize);
937*01826a49SYabin Cui }
938*01826a49SYabin Cui fprintf(f, "================================\n");
939*01826a49SYabin Cui fprintf(f, "Level Bounds: R: > %.3f AND C: < %.1f MB/s \n\n",
940*01826a49SYabin Cui (double)srcSize / (double)g_lvltarget.cSize, (double)g_lvltarget.cSpeed / MB_UNIT);
941*01826a49SYabin Cui
942*01826a49SYabin Cui
943*01826a49SYabin Cui fprintf(f, "Overall Winner: \n");
944*01826a49SYabin Cui BMK_displayOneResult(f, g_winner, srcSize);
945*01826a49SYabin Cui BMK_paramValues_into_commandLine(f, g_winner.params);
946*01826a49SYabin Cui
947*01826a49SYabin Cui fprintf(f, "Latest BMK: \n");\
948*01826a49SYabin Cui BMK_displayOneResult(f, w, srcSize);
949*01826a49SYabin Cui }
950*01826a49SYabin Cui }
951*01826a49SYabin Cui
952*01826a49SYabin Cui
953*01826a49SYabin Cui /* BMK_print_cLevelEntry() :
954*01826a49SYabin Cui * Writes one cLevelTable entry, for one level.
955*01826a49SYabin Cui * f must exist, be already opened, and be seekable.
956*01826a49SYabin Cui * this function cannot error.
957*01826a49SYabin Cui */
958*01826a49SYabin Cui static void
BMK_print_cLevelEntry(FILE * f,const int cLevel,paramValues_t params,const BMK_benchResult_t result,const size_t srcSize)959*01826a49SYabin Cui BMK_print_cLevelEntry(FILE* f, const int cLevel,
960*01826a49SYabin Cui paramValues_t params,
961*01826a49SYabin Cui const BMK_benchResult_t result, const size_t srcSize)
962*01826a49SYabin Cui {
963*01826a49SYabin Cui varInds_t v;
964*01826a49SYabin Cui int first = 1;
965*01826a49SYabin Cui
966*01826a49SYabin Cui assert(cLevel >= 0);
967*01826a49SYabin Cui assert(cLevel <= NB_LEVELS_TRACKED);
968*01826a49SYabin Cui params = cParamUnsetMin(params);
969*01826a49SYabin Cui
970*01826a49SYabin Cui fprintf(f, " {");
971*01826a49SYabin Cui /* print cParams.
972*01826a49SYabin Cui * assumption : all cParams are present and in order in the following range */
973*01826a49SYabin Cui for (v = 0; v <= strt_ind; v++) {
974*01826a49SYabin Cui if (!first) { fprintf(f, ","); }
975*01826a49SYabin Cui displayParamVal(f, v, params.vals[v], 3);
976*01826a49SYabin Cui first = 0;
977*01826a49SYabin Cui }
978*01826a49SYabin Cui /* print comment */
979*01826a49SYabin Cui { double const ratio = result.cSize ?
980*01826a49SYabin Cui (double)srcSize / (double)result.cSize : 0;
981*01826a49SYabin Cui double const cSpeedMBps = (double)result.cSpeed / MB_UNIT;
982*01826a49SYabin Cui double const dSpeedMBps = (double)result.dSpeed / MB_UNIT;
983*01826a49SYabin Cui
984*01826a49SYabin Cui fprintf(f, " }, /* level %2i: R=%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
985*01826a49SYabin Cui cLevel, ratio, cSpeedMBps, dSpeedMBps);
986*01826a49SYabin Cui }
987*01826a49SYabin Cui }
988*01826a49SYabin Cui
989*01826a49SYabin Cui
990*01826a49SYabin Cui /* BMK_print_cLevelTable() :
991*01826a49SYabin Cui * print candidate compression table into proposed FILE* f.
992*01826a49SYabin Cui * f must exist, be already opened, and be seekable.
993*01826a49SYabin Cui * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized
994*01826a49SYabin Cui * this function cannot error.
995*01826a49SYabin Cui */
996*01826a49SYabin Cui static void
BMK_print_cLevelTable(FILE * f,const winnerInfo_t * winners,const size_t srcSize)997*01826a49SYabin Cui BMK_print_cLevelTable(FILE* f, const winnerInfo_t* winners, const size_t srcSize)
998*01826a49SYabin Cui {
999*01826a49SYabin Cui int cLevel;
1000*01826a49SYabin Cui
1001*01826a49SYabin Cui fprintf(f, "\n /* Proposed configurations : */ \n");
1002*01826a49SYabin Cui fprintf(f, " /* W, C, H, S, L, T, strat */ \n");
1003*01826a49SYabin Cui
1004*01826a49SYabin Cui for (cLevel=0; cLevel <= NB_LEVELS_TRACKED; cLevel++)
1005*01826a49SYabin Cui BMK_print_cLevelEntry(f,
1006*01826a49SYabin Cui cLevel, winners[cLevel].params,
1007*01826a49SYabin Cui winners[cLevel].result, srcSize);
1008*01826a49SYabin Cui }
1009*01826a49SYabin Cui
1010*01826a49SYabin Cui
1011*01826a49SYabin Cui /* BMK_saveAndPrint_cLevelTable() :
1012*01826a49SYabin Cui * save candidate compression table into FILE* f,
1013*01826a49SYabin Cui * and then to stdout.
1014*01826a49SYabin Cui * f must exist, be already opened, and be seekable.
1015*01826a49SYabin Cui * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized
1016*01826a49SYabin Cui * this function cannot error.
1017*01826a49SYabin Cui */
1018*01826a49SYabin Cui static void
BMK_saveAndPrint_cLevelTable(FILE * const f,const winnerInfo_t * winners,const size_t srcSize)1019*01826a49SYabin Cui BMK_saveAndPrint_cLevelTable(FILE* const f,
1020*01826a49SYabin Cui const winnerInfo_t* winners,
1021*01826a49SYabin Cui const size_t srcSize)
1022*01826a49SYabin Cui {
1023*01826a49SYabin Cui fseek(f, 0, SEEK_SET);
1024*01826a49SYabin Cui BMK_print_cLevelTable(f, winners, srcSize);
1025*01826a49SYabin Cui fflush(f);
1026*01826a49SYabin Cui BMK_print_cLevelTable(stdout, winners, srcSize);
1027*01826a49SYabin Cui }
1028*01826a49SYabin Cui
1029*01826a49SYabin Cui
1030*01826a49SYabin Cui /*-*******************************************************
1031*01826a49SYabin Cui * Functions to Benchmark
1032*01826a49SYabin Cui *********************************************************/
1033*01826a49SYabin Cui
1034*01826a49SYabin Cui typedef struct {
1035*01826a49SYabin Cui ZSTD_CCtx* cctx;
1036*01826a49SYabin Cui const void* dictBuffer;
1037*01826a49SYabin Cui size_t dictBufferSize;
1038*01826a49SYabin Cui int cLevel;
1039*01826a49SYabin Cui const paramValues_t* comprParams;
1040*01826a49SYabin Cui } BMK_initCCtxArgs;
1041*01826a49SYabin Cui
local_initCCtx(void * payload)1042*01826a49SYabin Cui static size_t local_initCCtx(void* payload) {
1043*01826a49SYabin Cui const BMK_initCCtxArgs* ag = (const BMK_initCCtxArgs*)payload;
1044*01826a49SYabin Cui varInds_t i;
1045*01826a49SYabin Cui ZSTD_CCtx_reset(ag->cctx, ZSTD_reset_session_and_parameters);
1046*01826a49SYabin Cui ZSTD_CCtx_setParameter(ag->cctx, ZSTD_c_compressionLevel, ag->cLevel);
1047*01826a49SYabin Cui
1048*01826a49SYabin Cui for(i = 0; i < NUM_PARAMS; i++) {
1049*01826a49SYabin Cui if(ag->comprParams->vals[i] != PARAM_UNSET)
1050*01826a49SYabin Cui ZSTD_CCtx_setParameter(ag->cctx, cctxSetParamTable[i], ag->comprParams->vals[i]);
1051*01826a49SYabin Cui }
1052*01826a49SYabin Cui ZSTD_CCtx_loadDictionary(ag->cctx, ag->dictBuffer, ag->dictBufferSize);
1053*01826a49SYabin Cui
1054*01826a49SYabin Cui return 0;
1055*01826a49SYabin Cui }
1056*01826a49SYabin Cui
1057*01826a49SYabin Cui typedef struct {
1058*01826a49SYabin Cui ZSTD_DCtx* dctx;
1059*01826a49SYabin Cui const void* dictBuffer;
1060*01826a49SYabin Cui size_t dictBufferSize;
1061*01826a49SYabin Cui } BMK_initDCtxArgs;
1062*01826a49SYabin Cui
local_initDCtx(void * payload)1063*01826a49SYabin Cui static size_t local_initDCtx(void* payload) {
1064*01826a49SYabin Cui const BMK_initDCtxArgs* ag = (const BMK_initDCtxArgs*)payload;
1065*01826a49SYabin Cui ZSTD_DCtx_reset(ag->dctx, ZSTD_reset_session_and_parameters);
1066*01826a49SYabin Cui ZSTD_DCtx_loadDictionary(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
1067*01826a49SYabin Cui return 0;
1068*01826a49SYabin Cui }
1069*01826a49SYabin Cui
1070*01826a49SYabin Cui /* additional argument is just the context */
local_defaultCompress(const void * srcBuffer,size_t srcSize,void * dstBuffer,size_t dstSize,void * addArgs)1071*01826a49SYabin Cui static size_t local_defaultCompress(
1072*01826a49SYabin Cui const void* srcBuffer, size_t srcSize,
1073*01826a49SYabin Cui void* dstBuffer, size_t dstSize,
1074*01826a49SYabin Cui void* addArgs)
1075*01826a49SYabin Cui {
1076*01826a49SYabin Cui ZSTD_CCtx* cctx = (ZSTD_CCtx*)addArgs;
1077*01826a49SYabin Cui assert(dstSize == ZSTD_compressBound(srcSize)); /* specific to this version, which is only used in paramgrill */
1078*01826a49SYabin Cui return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize);
1079*01826a49SYabin Cui }
1080*01826a49SYabin Cui
1081*01826a49SYabin Cui /* additional argument is just the context */
local_defaultDecompress(const void * srcBuffer,size_t srcSize,void * dstBuffer,size_t dstSize,void * addArgs)1082*01826a49SYabin Cui static size_t local_defaultDecompress(
1083*01826a49SYabin Cui const void* srcBuffer, size_t srcSize,
1084*01826a49SYabin Cui void* dstBuffer, size_t dstSize,
1085*01826a49SYabin Cui void* addArgs) {
1086*01826a49SYabin Cui size_t moreToFlush = 1;
1087*01826a49SYabin Cui ZSTD_DCtx* dctx = (ZSTD_DCtx*)addArgs;
1088*01826a49SYabin Cui ZSTD_inBuffer in;
1089*01826a49SYabin Cui ZSTD_outBuffer out;
1090*01826a49SYabin Cui in.src = srcBuffer;
1091*01826a49SYabin Cui in.size = srcSize;
1092*01826a49SYabin Cui in.pos = 0;
1093*01826a49SYabin Cui out.dst = dstBuffer;
1094*01826a49SYabin Cui out.size = dstSize;
1095*01826a49SYabin Cui out.pos = 0;
1096*01826a49SYabin Cui while (moreToFlush) {
1097*01826a49SYabin Cui if(out.pos == out.size) {
1098*01826a49SYabin Cui return (size_t)-ZSTD_error_dstSize_tooSmall;
1099*01826a49SYabin Cui }
1100*01826a49SYabin Cui moreToFlush = ZSTD_decompressStream(dctx,
1101*01826a49SYabin Cui &out, &in);
1102*01826a49SYabin Cui if (ZSTD_isError(moreToFlush)) {
1103*01826a49SYabin Cui return moreToFlush;
1104*01826a49SYabin Cui }
1105*01826a49SYabin Cui }
1106*01826a49SYabin Cui return out.pos;
1107*01826a49SYabin Cui
1108*01826a49SYabin Cui }
1109*01826a49SYabin Cui
1110*01826a49SYabin Cui /*-************************************
1111*01826a49SYabin Cui * Data Initialization Functions
1112*01826a49SYabin Cui **************************************/
1113*01826a49SYabin Cui
1114*01826a49SYabin Cui typedef struct {
1115*01826a49SYabin Cui void* srcBuffer;
1116*01826a49SYabin Cui size_t srcSize;
1117*01826a49SYabin Cui const void** srcPtrs;
1118*01826a49SYabin Cui size_t* srcSizes;
1119*01826a49SYabin Cui void** dstPtrs;
1120*01826a49SYabin Cui size_t* dstCapacities;
1121*01826a49SYabin Cui size_t* dstSizes;
1122*01826a49SYabin Cui void** resPtrs;
1123*01826a49SYabin Cui size_t* resSizes;
1124*01826a49SYabin Cui size_t nbBlocks;
1125*01826a49SYabin Cui size_t maxBlockSize;
1126*01826a49SYabin Cui } buffers_t;
1127*01826a49SYabin Cui
1128*01826a49SYabin Cui typedef struct {
1129*01826a49SYabin Cui size_t dictSize;
1130*01826a49SYabin Cui void* dictBuffer;
1131*01826a49SYabin Cui ZSTD_CCtx* cctx;
1132*01826a49SYabin Cui ZSTD_DCtx* dctx;
1133*01826a49SYabin Cui } contexts_t;
1134*01826a49SYabin Cui
freeNonSrcBuffers(const buffers_t b)1135*01826a49SYabin Cui static void freeNonSrcBuffers(const buffers_t b) {
1136*01826a49SYabin Cui free((void*)b.srcPtrs);
1137*01826a49SYabin Cui free(b.srcSizes);
1138*01826a49SYabin Cui
1139*01826a49SYabin Cui if(b.dstPtrs != NULL) {
1140*01826a49SYabin Cui free(b.dstPtrs[0]);
1141*01826a49SYabin Cui }
1142*01826a49SYabin Cui free(b.dstPtrs);
1143*01826a49SYabin Cui free(b.dstCapacities);
1144*01826a49SYabin Cui free(b.dstSizes);
1145*01826a49SYabin Cui
1146*01826a49SYabin Cui if(b.resPtrs != NULL) {
1147*01826a49SYabin Cui free(b.resPtrs[0]);
1148*01826a49SYabin Cui }
1149*01826a49SYabin Cui free(b.resPtrs);
1150*01826a49SYabin Cui free(b.resSizes);
1151*01826a49SYabin Cui }
1152*01826a49SYabin Cui
freeBuffers(const buffers_t b)1153*01826a49SYabin Cui static void freeBuffers(const buffers_t b) {
1154*01826a49SYabin Cui if(b.srcPtrs != NULL) {
1155*01826a49SYabin Cui free(b.srcBuffer);
1156*01826a49SYabin Cui }
1157*01826a49SYabin Cui freeNonSrcBuffers(b);
1158*01826a49SYabin Cui }
1159*01826a49SYabin Cui
1160*01826a49SYabin Cui /* srcBuffer will be freed by freeBuffers now */
createBuffersFromMemory(buffers_t * buff,void * srcBuffer,const size_t nbFiles,const size_t * fileSizes)1161*01826a49SYabin Cui static int createBuffersFromMemory(buffers_t* buff, void * srcBuffer, const size_t nbFiles,
1162*01826a49SYabin Cui const size_t* fileSizes)
1163*01826a49SYabin Cui {
1164*01826a49SYabin Cui size_t pos = 0, n, blockSize;
1165*01826a49SYabin Cui U32 maxNbBlocks, blockNb = 0;
1166*01826a49SYabin Cui buff->srcSize = 0;
1167*01826a49SYabin Cui for(n = 0; n < nbFiles; n++) {
1168*01826a49SYabin Cui buff->srcSize += fileSizes[n];
1169*01826a49SYabin Cui }
1170*01826a49SYabin Cui
1171*01826a49SYabin Cui if(buff->srcSize == 0) {
1172*01826a49SYabin Cui DISPLAY("No data to bench\n");
1173*01826a49SYabin Cui return 1;
1174*01826a49SYabin Cui }
1175*01826a49SYabin Cui
1176*01826a49SYabin Cui blockSize = g_blockSize ? g_blockSize : buff->srcSize;
1177*01826a49SYabin Cui maxNbBlocks = (U32) ((buff->srcSize + (blockSize-1)) / blockSize) + (U32)nbFiles;
1178*01826a49SYabin Cui
1179*01826a49SYabin Cui buff->srcPtrs = (const void**)calloc(maxNbBlocks, sizeof(void*));
1180*01826a49SYabin Cui buff->srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1181*01826a49SYabin Cui
1182*01826a49SYabin Cui buff->dstPtrs = (void**)calloc(maxNbBlocks, sizeof(void*));
1183*01826a49SYabin Cui buff->dstCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1184*01826a49SYabin Cui buff->dstSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1185*01826a49SYabin Cui
1186*01826a49SYabin Cui buff->resPtrs = (void**)calloc(maxNbBlocks, sizeof(void*));
1187*01826a49SYabin Cui buff->resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1188*01826a49SYabin Cui
1189*01826a49SYabin Cui if(!buff->srcPtrs || !buff->srcSizes || !buff->dstPtrs || !buff->dstCapacities || !buff->dstSizes || !buff->resPtrs || !buff->resSizes) {
1190*01826a49SYabin Cui DISPLAY("alloc error\n");
1191*01826a49SYabin Cui freeNonSrcBuffers(*buff);
1192*01826a49SYabin Cui return 1;
1193*01826a49SYabin Cui }
1194*01826a49SYabin Cui
1195*01826a49SYabin Cui buff->srcBuffer = srcBuffer;
1196*01826a49SYabin Cui buff->srcPtrs[0] = (const void*)buff->srcBuffer;
1197*01826a49SYabin Cui buff->dstPtrs[0] = malloc(ZSTD_compressBound(buff->srcSize) + (maxNbBlocks * 1024));
1198*01826a49SYabin Cui buff->resPtrs[0] = malloc(buff->srcSize);
1199*01826a49SYabin Cui
1200*01826a49SYabin Cui if(!buff->dstPtrs[0] || !buff->resPtrs[0]) {
1201*01826a49SYabin Cui DISPLAY("alloc error\n");
1202*01826a49SYabin Cui freeNonSrcBuffers(*buff);
1203*01826a49SYabin Cui return 1;
1204*01826a49SYabin Cui }
1205*01826a49SYabin Cui
1206*01826a49SYabin Cui for(n = 0; n < nbFiles; n++) {
1207*01826a49SYabin Cui size_t pos_end = pos + fileSizes[n];
1208*01826a49SYabin Cui for(; pos < pos_end; blockNb++) {
1209*01826a49SYabin Cui buff->srcPtrs[blockNb] = (const void*)((char*)srcBuffer + pos);
1210*01826a49SYabin Cui buff->srcSizes[blockNb] = blockSize;
1211*01826a49SYabin Cui pos += blockSize;
1212*01826a49SYabin Cui }
1213*01826a49SYabin Cui
1214*01826a49SYabin Cui if(fileSizes[n] > 0) { buff->srcSizes[blockNb - 1] = ((fileSizes[n] - 1) % blockSize) + 1; }
1215*01826a49SYabin Cui pos = pos_end;
1216*01826a49SYabin Cui }
1217*01826a49SYabin Cui
1218*01826a49SYabin Cui buff->dstCapacities[0] = ZSTD_compressBound(buff->srcSizes[0]);
1219*01826a49SYabin Cui buff->dstSizes[0] = buff->dstCapacities[0];
1220*01826a49SYabin Cui buff->resSizes[0] = buff->srcSizes[0];
1221*01826a49SYabin Cui buff->maxBlockSize = buff->srcSizes[0];
1222*01826a49SYabin Cui
1223*01826a49SYabin Cui for(n = 1; n < blockNb; n++) {
1224*01826a49SYabin Cui buff->dstPtrs[n] = ((char*)buff->dstPtrs[n-1]) + buff->dstCapacities[n-1];
1225*01826a49SYabin Cui buff->resPtrs[n] = ((char*)buff->resPtrs[n-1]) + buff->resSizes[n-1];
1226*01826a49SYabin Cui buff->dstCapacities[n] = ZSTD_compressBound(buff->srcSizes[n]);
1227*01826a49SYabin Cui buff->dstSizes[n] = buff->dstCapacities[n];
1228*01826a49SYabin Cui buff->resSizes[n] = buff->srcSizes[n];
1229*01826a49SYabin Cui
1230*01826a49SYabin Cui buff->maxBlockSize = MAX(buff->maxBlockSize, buff->srcSizes[n]);
1231*01826a49SYabin Cui }
1232*01826a49SYabin Cui
1233*01826a49SYabin Cui buff->nbBlocks = blockNb;
1234*01826a49SYabin Cui
1235*01826a49SYabin Cui return 0;
1236*01826a49SYabin Cui }
1237*01826a49SYabin Cui
1238*01826a49SYabin Cui /* allocates buffer's arguments. returns success / failure */
createBuffers(buffers_t * buff,const char * const * const fileNamesTable,size_t nbFiles)1239*01826a49SYabin Cui static int createBuffers(buffers_t* buff, const char* const * const fileNamesTable,
1240*01826a49SYabin Cui size_t nbFiles) {
1241*01826a49SYabin Cui size_t pos = 0;
1242*01826a49SYabin Cui size_t n;
1243*01826a49SYabin Cui size_t totalSizeToLoad = (size_t)UTIL_getTotalFileSize(fileNamesTable, (U32)nbFiles);
1244*01826a49SYabin Cui size_t benchedSize = MIN(BMK_findMaxMem(totalSizeToLoad * 3) / 3, totalSizeToLoad);
1245*01826a49SYabin Cui size_t* fileSizes = calloc(sizeof(size_t), nbFiles);
1246*01826a49SYabin Cui void* srcBuffer = NULL;
1247*01826a49SYabin Cui int ret = 0;
1248*01826a49SYabin Cui
1249*01826a49SYabin Cui if(!totalSizeToLoad || !benchedSize) {
1250*01826a49SYabin Cui ret = 1;
1251*01826a49SYabin Cui DISPLAY("Nothing to Bench\n");
1252*01826a49SYabin Cui goto _cleanUp;
1253*01826a49SYabin Cui }
1254*01826a49SYabin Cui
1255*01826a49SYabin Cui srcBuffer = malloc(benchedSize);
1256*01826a49SYabin Cui
1257*01826a49SYabin Cui if(!fileSizes || !srcBuffer) {
1258*01826a49SYabin Cui ret = 1;
1259*01826a49SYabin Cui goto _cleanUp;
1260*01826a49SYabin Cui }
1261*01826a49SYabin Cui
1262*01826a49SYabin Cui for(n = 0; n < nbFiles; n++) {
1263*01826a49SYabin Cui FILE* f;
1264*01826a49SYabin Cui U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
1265*01826a49SYabin Cui if (UTIL_isDirectory(fileNamesTable[n])) {
1266*01826a49SYabin Cui DISPLAY("Ignoring %s directory... \n", fileNamesTable[n]);
1267*01826a49SYabin Cui continue;
1268*01826a49SYabin Cui }
1269*01826a49SYabin Cui if (fileSize == UTIL_FILESIZE_UNKNOWN) {
1270*01826a49SYabin Cui DISPLAY("Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]);
1271*01826a49SYabin Cui continue;
1272*01826a49SYabin Cui }
1273*01826a49SYabin Cui f = fopen(fileNamesTable[n], "rb");
1274*01826a49SYabin Cui if (f==NULL) {
1275*01826a49SYabin Cui DISPLAY("impossible to open file %s\n", fileNamesTable[n]);
1276*01826a49SYabin Cui fclose(f);
1277*01826a49SYabin Cui ret = 10;
1278*01826a49SYabin Cui goto _cleanUp;
1279*01826a49SYabin Cui }
1280*01826a49SYabin Cui
1281*01826a49SYabin Cui DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[n]);
1282*01826a49SYabin Cui
1283*01826a49SYabin Cui if (fileSize + pos > benchedSize) fileSize = benchedSize - pos, nbFiles=n; /* buffer too small - stop after this file */
1284*01826a49SYabin Cui {
1285*01826a49SYabin Cui char* buffer = (char*)(srcBuffer);
1286*01826a49SYabin Cui size_t const readSize = fread((buffer)+pos, 1, (size_t)fileSize, f);
1287*01826a49SYabin Cui fclose(f);
1288*01826a49SYabin Cui if (readSize != (size_t)fileSize) {
1289*01826a49SYabin Cui DISPLAY("could not read %s", fileNamesTable[n]);
1290*01826a49SYabin Cui ret = 1;
1291*01826a49SYabin Cui goto _cleanUp;
1292*01826a49SYabin Cui }
1293*01826a49SYabin Cui
1294*01826a49SYabin Cui fileSizes[n] = readSize;
1295*01826a49SYabin Cui pos += readSize;
1296*01826a49SYabin Cui }
1297*01826a49SYabin Cui }
1298*01826a49SYabin Cui
1299*01826a49SYabin Cui ret = createBuffersFromMemory(buff, srcBuffer, nbFiles, fileSizes);
1300*01826a49SYabin Cui
1301*01826a49SYabin Cui _cleanUp:
1302*01826a49SYabin Cui if(ret) { free(srcBuffer); }
1303*01826a49SYabin Cui free(fileSizes);
1304*01826a49SYabin Cui return ret;
1305*01826a49SYabin Cui }
1306*01826a49SYabin Cui
freeContexts(const contexts_t ctx)1307*01826a49SYabin Cui static void freeContexts(const contexts_t ctx) {
1308*01826a49SYabin Cui free(ctx.dictBuffer);
1309*01826a49SYabin Cui ZSTD_freeCCtx(ctx.cctx);
1310*01826a49SYabin Cui ZSTD_freeDCtx(ctx.dctx);
1311*01826a49SYabin Cui }
1312*01826a49SYabin Cui
createContexts(contexts_t * ctx,const char * dictFileName)1313*01826a49SYabin Cui static int createContexts(contexts_t* ctx, const char* dictFileName) {
1314*01826a49SYabin Cui FILE* f;
1315*01826a49SYabin Cui size_t readSize;
1316*01826a49SYabin Cui ctx->cctx = ZSTD_createCCtx();
1317*01826a49SYabin Cui ctx->dctx = ZSTD_createDCtx();
1318*01826a49SYabin Cui assert(ctx->cctx != NULL);
1319*01826a49SYabin Cui assert(ctx->dctx != NULL);
1320*01826a49SYabin Cui
1321*01826a49SYabin Cui if(dictFileName == NULL) {
1322*01826a49SYabin Cui ctx->dictSize = 0;
1323*01826a49SYabin Cui ctx->dictBuffer = NULL;
1324*01826a49SYabin Cui return 0;
1325*01826a49SYabin Cui }
1326*01826a49SYabin Cui { U64 const dictFileSize = UTIL_getFileSize(dictFileName);
1327*01826a49SYabin Cui assert(dictFileSize != UTIL_FILESIZE_UNKNOWN);
1328*01826a49SYabin Cui ctx->dictSize = (size_t)dictFileSize;
1329*01826a49SYabin Cui assert((U64)ctx->dictSize == dictFileSize); /* check overflow */
1330*01826a49SYabin Cui }
1331*01826a49SYabin Cui ctx->dictBuffer = malloc(ctx->dictSize);
1332*01826a49SYabin Cui
1333*01826a49SYabin Cui f = fopen(dictFileName, "rb");
1334*01826a49SYabin Cui
1335*01826a49SYabin Cui if (f==NULL) {
1336*01826a49SYabin Cui DISPLAY("unable to open file\n");
1337*01826a49SYabin Cui freeContexts(*ctx);
1338*01826a49SYabin Cui return 1;
1339*01826a49SYabin Cui }
1340*01826a49SYabin Cui
1341*01826a49SYabin Cui if (ctx->dictSize > 64 MB || !(ctx->dictBuffer)) {
1342*01826a49SYabin Cui DISPLAY("dictionary too large\n");
1343*01826a49SYabin Cui fclose(f);
1344*01826a49SYabin Cui freeContexts(*ctx);
1345*01826a49SYabin Cui return 1;
1346*01826a49SYabin Cui }
1347*01826a49SYabin Cui readSize = fread(ctx->dictBuffer, 1, ctx->dictSize, f);
1348*01826a49SYabin Cui fclose(f);
1349*01826a49SYabin Cui if (readSize != ctx->dictSize) {
1350*01826a49SYabin Cui DISPLAY("unable to read file\n");
1351*01826a49SYabin Cui freeContexts(*ctx);
1352*01826a49SYabin Cui return 1;
1353*01826a49SYabin Cui }
1354*01826a49SYabin Cui return 0;
1355*01826a49SYabin Cui }
1356*01826a49SYabin Cui
1357*01826a49SYabin Cui /*-************************************
1358*01826a49SYabin Cui * Optimizer Memoization Functions
1359*01826a49SYabin Cui **************************************/
1360*01826a49SYabin Cui
1361*01826a49SYabin Cui /* return: new length */
1362*01826a49SYabin Cui /* keep old array, will need if iter over strategy. */
1363*01826a49SYabin Cui /* prunes useless params */
sanitizeVarArray(varInds_t * varNew,const size_t varLength,const varInds_t * varArray,const ZSTD_strategy strat)1364*01826a49SYabin Cui static size_t sanitizeVarArray(varInds_t* varNew, const size_t varLength, const varInds_t* varArray, const ZSTD_strategy strat) {
1365*01826a49SYabin Cui size_t i, j = 0;
1366*01826a49SYabin Cui for(i = 0; i < varLength; i++) {
1367*01826a49SYabin Cui if( !((varArray[i] == clog_ind && strat == ZSTD_fast)
1368*01826a49SYabin Cui || (varArray[i] == slog_ind && strat == ZSTD_fast)
1369*01826a49SYabin Cui || (varArray[i] == slog_ind && strat == ZSTD_dfast)
1370*01826a49SYabin Cui || (varArray[i] == tlen_ind && strat < ZSTD_btopt && strat != ZSTD_fast))) {
1371*01826a49SYabin Cui varNew[j] = varArray[i];
1372*01826a49SYabin Cui j++;
1373*01826a49SYabin Cui }
1374*01826a49SYabin Cui }
1375*01826a49SYabin Cui return j;
1376*01826a49SYabin Cui }
1377*01826a49SYabin Cui
1378*01826a49SYabin Cui /* res should be NUM_PARAMS size */
1379*01826a49SYabin Cui /* constructs varArray from paramValues_t style parameter */
1380*01826a49SYabin Cui /* pass in using dict. */
variableParams(const paramValues_t paramConstraints,varInds_t * res,const int usingDictionary)1381*01826a49SYabin Cui static size_t variableParams(const paramValues_t paramConstraints, varInds_t* res, const int usingDictionary) {
1382*01826a49SYabin Cui varInds_t i;
1383*01826a49SYabin Cui size_t j = 0;
1384*01826a49SYabin Cui for(i = 0; i < NUM_PARAMS; i++) {
1385*01826a49SYabin Cui if(paramConstraints.vals[i] == PARAM_UNSET) {
1386*01826a49SYabin Cui if(i == fadt_ind && !usingDictionary) continue; /* don't use fadt if no dictionary */
1387*01826a49SYabin Cui res[j] = i; j++;
1388*01826a49SYabin Cui }
1389*01826a49SYabin Cui }
1390*01826a49SYabin Cui return j;
1391*01826a49SYabin Cui }
1392*01826a49SYabin Cui
1393*01826a49SYabin Cui /* length of memo table given free variables */
memoTableLen(const varInds_t * varyParams,const size_t varyLen)1394*01826a49SYabin Cui static size_t memoTableLen(const varInds_t* varyParams, const size_t varyLen) {
1395*01826a49SYabin Cui size_t arrayLen = 1;
1396*01826a49SYabin Cui size_t i;
1397*01826a49SYabin Cui for(i = 0; i < varyLen; i++) {
1398*01826a49SYabin Cui if(varyParams[i] == strt_ind) continue; /* strategy separated by table */
1399*01826a49SYabin Cui arrayLen *= rangetable[varyParams[i]];
1400*01826a49SYabin Cui }
1401*01826a49SYabin Cui return arrayLen;
1402*01826a49SYabin Cui }
1403*01826a49SYabin Cui
1404*01826a49SYabin Cui /* returns unique index in memotable of compression parameters */
memoTableIndDirect(const paramValues_t * ptr,const varInds_t * varyParams,const size_t varyLen)1405*01826a49SYabin Cui static unsigned memoTableIndDirect(const paramValues_t* ptr, const varInds_t* varyParams, const size_t varyLen) {
1406*01826a49SYabin Cui size_t i;
1407*01826a49SYabin Cui unsigned ind = 0;
1408*01826a49SYabin Cui for(i = 0; i < varyLen; i++) {
1409*01826a49SYabin Cui varInds_t v = varyParams[i];
1410*01826a49SYabin Cui if(v == strt_ind) continue; /* exclude strategy from memotable */
1411*01826a49SYabin Cui ind *= rangetable[v]; ind += (unsigned)invRangeMap(v, ptr->vals[v]);
1412*01826a49SYabin Cui }
1413*01826a49SYabin Cui return ind;
1414*01826a49SYabin Cui }
1415*01826a49SYabin Cui
memoTableGet(const memoTable_t * memoTableArray,const paramValues_t p)1416*01826a49SYabin Cui static size_t memoTableGet(const memoTable_t* memoTableArray, const paramValues_t p) {
1417*01826a49SYabin Cui const memoTable_t mt = memoTableArray[p.vals[strt_ind]];
1418*01826a49SYabin Cui switch(mt.tableType) {
1419*01826a49SYabin Cui case directMap:
1420*01826a49SYabin Cui return mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)];
1421*01826a49SYabin Cui case xxhashMap:
1422*01826a49SYabin Cui return mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen];
1423*01826a49SYabin Cui case noMemo:
1424*01826a49SYabin Cui return 0;
1425*01826a49SYabin Cui }
1426*01826a49SYabin Cui return 0; /* should never happen, stop compiler warnings */
1427*01826a49SYabin Cui }
1428*01826a49SYabin Cui
memoTableSet(const memoTable_t * memoTableArray,const paramValues_t p,const BYTE value)1429*01826a49SYabin Cui static void memoTableSet(const memoTable_t* memoTableArray, const paramValues_t p, const BYTE value) {
1430*01826a49SYabin Cui const memoTable_t mt = memoTableArray[p.vals[strt_ind]];
1431*01826a49SYabin Cui switch(mt.tableType) {
1432*01826a49SYabin Cui case directMap:
1433*01826a49SYabin Cui mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)] = value; break;
1434*01826a49SYabin Cui case xxhashMap:
1435*01826a49SYabin Cui mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen] = value; break;
1436*01826a49SYabin Cui case noMemo:
1437*01826a49SYabin Cui break;
1438*01826a49SYabin Cui }
1439*01826a49SYabin Cui }
1440*01826a49SYabin Cui
1441*01826a49SYabin Cui /* frees all allocated memotables */
1442*01826a49SYabin Cui /* secret contract :
1443*01826a49SYabin Cui * mtAll is a table of (ZSTD_STRATEGY_MAX+1) memoTable_t */
freeMemoTableArray(memoTable_t * const mtAll)1444*01826a49SYabin Cui static void freeMemoTableArray(memoTable_t* const mtAll) {
1445*01826a49SYabin Cui int i;
1446*01826a49SYabin Cui if(mtAll == NULL) { return; }
1447*01826a49SYabin Cui for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) {
1448*01826a49SYabin Cui free(mtAll[i].table);
1449*01826a49SYabin Cui }
1450*01826a49SYabin Cui free(mtAll);
1451*01826a49SYabin Cui }
1452*01826a49SYabin Cui
1453*01826a49SYabin Cui /* inits memotables for all (including mallocs), all strategies */
1454*01826a49SYabin Cui /* takes unsanitized varyParams */
1455*01826a49SYabin Cui static memoTable_t*
createMemoTableArray(const paramValues_t p,const varInds_t * const varyParams,const size_t varyLen,const U32 memoTableLog)1456*01826a49SYabin Cui createMemoTableArray(const paramValues_t p,
1457*01826a49SYabin Cui const varInds_t* const varyParams,
1458*01826a49SYabin Cui const size_t varyLen,
1459*01826a49SYabin Cui const U32 memoTableLog)
1460*01826a49SYabin Cui {
1461*01826a49SYabin Cui memoTable_t* const mtAll = (memoTable_t*)calloc(sizeof(memoTable_t),(ZSTD_STRATEGY_MAX + 1));
1462*01826a49SYabin Cui ZSTD_strategy i, stratMin = ZSTD_STRATEGY_MIN, stratMax = ZSTD_STRATEGY_MAX;
1463*01826a49SYabin Cui
1464*01826a49SYabin Cui if(mtAll == NULL) {
1465*01826a49SYabin Cui return NULL;
1466*01826a49SYabin Cui }
1467*01826a49SYabin Cui
1468*01826a49SYabin Cui for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) {
1469*01826a49SYabin Cui mtAll[i].varLen = sanitizeVarArray(mtAll[i].varArray, varyLen, varyParams, i);
1470*01826a49SYabin Cui }
1471*01826a49SYabin Cui
1472*01826a49SYabin Cui /* no memoization */
1473*01826a49SYabin Cui if(memoTableLog == 0) {
1474*01826a49SYabin Cui for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) {
1475*01826a49SYabin Cui mtAll[i].tableType = noMemo;
1476*01826a49SYabin Cui mtAll[i].table = NULL;
1477*01826a49SYabin Cui mtAll[i].tableLen = 0;
1478*01826a49SYabin Cui }
1479*01826a49SYabin Cui return mtAll;
1480*01826a49SYabin Cui }
1481*01826a49SYabin Cui
1482*01826a49SYabin Cui
1483*01826a49SYabin Cui if(p.vals[strt_ind] != PARAM_UNSET) {
1484*01826a49SYabin Cui stratMin = p.vals[strt_ind];
1485*01826a49SYabin Cui stratMax = p.vals[strt_ind];
1486*01826a49SYabin Cui }
1487*01826a49SYabin Cui
1488*01826a49SYabin Cui
1489*01826a49SYabin Cui for(i = stratMin; i <= stratMax; i++) {
1490*01826a49SYabin Cui size_t mtl = memoTableLen(mtAll[i].varArray, mtAll[i].varLen);
1491*01826a49SYabin Cui mtAll[i].tableType = directMap;
1492*01826a49SYabin Cui
1493*01826a49SYabin Cui if(memoTableLog != PARAM_UNSET && mtl > (1ULL << memoTableLog)) { /* use hash table */ /* provide some option to only use hash tables? */
1494*01826a49SYabin Cui mtAll[i].tableType = xxhashMap;
1495*01826a49SYabin Cui mtl = ((size_t)1 << memoTableLog);
1496*01826a49SYabin Cui }
1497*01826a49SYabin Cui
1498*01826a49SYabin Cui mtAll[i].table = (BYTE*)calloc(sizeof(BYTE), mtl);
1499*01826a49SYabin Cui mtAll[i].tableLen = mtl;
1500*01826a49SYabin Cui
1501*01826a49SYabin Cui if(mtAll[i].table == NULL) {
1502*01826a49SYabin Cui freeMemoTableArray(mtAll);
1503*01826a49SYabin Cui return NULL;
1504*01826a49SYabin Cui }
1505*01826a49SYabin Cui }
1506*01826a49SYabin Cui
1507*01826a49SYabin Cui return mtAll;
1508*01826a49SYabin Cui }
1509*01826a49SYabin Cui
1510*01826a49SYabin Cui /* Sets pc to random unmeasured set of parameters */
1511*01826a49SYabin Cui /* specify strategy */
randomConstrainedParams(paramValues_t * pc,const memoTable_t * memoTableArray,const ZSTD_strategy st)1512*01826a49SYabin Cui static void randomConstrainedParams(paramValues_t* pc, const memoTable_t* memoTableArray, const ZSTD_strategy st)
1513*01826a49SYabin Cui {
1514*01826a49SYabin Cui size_t j;
1515*01826a49SYabin Cui const memoTable_t mt = memoTableArray[st];
1516*01826a49SYabin Cui pc->vals[strt_ind] = st;
1517*01826a49SYabin Cui for(j = 0; j < mt.tableLen; j++) {
1518*01826a49SYabin Cui int i;
1519*01826a49SYabin Cui for(i = 0; i < NUM_PARAMS; i++) {
1520*01826a49SYabin Cui varInds_t v = mt.varArray[i];
1521*01826a49SYabin Cui if(v == strt_ind) continue;
1522*01826a49SYabin Cui pc->vals[v] = rangeMap(v, FUZ_rand(&g_rand) % rangetable[v]);
1523*01826a49SYabin Cui }
1524*01826a49SYabin Cui
1525*01826a49SYabin Cui if(!(memoTableGet(memoTableArray, *pc))) break; /* only pick unpicked params. */
1526*01826a49SYabin Cui }
1527*01826a49SYabin Cui }
1528*01826a49SYabin Cui
1529*01826a49SYabin Cui /*-************************************
1530*01826a49SYabin Cui * Benchmarking Functions
1531*01826a49SYabin Cui **************************************/
1532*01826a49SYabin Cui
display_params_tested(paramValues_t cParams)1533*01826a49SYabin Cui static void display_params_tested(paramValues_t cParams)
1534*01826a49SYabin Cui {
1535*01826a49SYabin Cui varInds_t vi;
1536*01826a49SYabin Cui DISPLAYLEVEL(3, "\r testing :");
1537*01826a49SYabin Cui for (vi=0; vi < NUM_PARAMS; vi++) {
1538*01826a49SYabin Cui DISPLAYLEVEL(3, "%3u,", (unsigned)cParams.vals[vi]);
1539*01826a49SYabin Cui }
1540*01826a49SYabin Cui DISPLAYLEVEL(3, "\b \r");
1541*01826a49SYabin Cui }
1542*01826a49SYabin Cui
1543*01826a49SYabin Cui /* Replicate functionality of benchMemAdvanced, but with pre-split src / dst buffers */
1544*01826a49SYabin Cui /* The purpose is so that sufficient information is returned so that a decompression call to benchMemInvertible is possible */
1545*01826a49SYabin Cui /* BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, fileSizes, nbFiles, 0, &cParams, dictBuffer, dictSize, ctx, dctx, 0, "File", &adv); */
1546*01826a49SYabin Cui /* nbSeconds used in same way as in BMK_advancedParams_t */
1547*01826a49SYabin Cui /* if in decodeOnly, then srcPtr's will be compressed blocks, and uncompressedBlocks will be written to dstPtrs */
1548*01826a49SYabin Cui /* dictionary nullable, nothing else though. */
1549*01826a49SYabin Cui /* note : it would be a lot better if this function was present in benchzstd.c,
1550*01826a49SYabin Cui * sharing code with benchMemAdvanced(), since it's technically a part of it */
1551*01826a49SYabin Cui static BMK_benchOutcome_t
BMK_benchMemInvertible(buffers_t buf,contexts_t ctx,int cLevel,const paramValues_t * comprParams,BMK_mode_t mode,unsigned nbSeconds)1552*01826a49SYabin Cui BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
1553*01826a49SYabin Cui int cLevel, const paramValues_t* comprParams,
1554*01826a49SYabin Cui BMK_mode_t mode, unsigned nbSeconds)
1555*01826a49SYabin Cui {
1556*01826a49SYabin Cui U32 i;
1557*01826a49SYabin Cui BMK_benchResult_t bResult;
1558*01826a49SYabin Cui const void *const *const srcPtrs = (const void *const *const)buf.srcPtrs;
1559*01826a49SYabin Cui size_t const *const srcSizes = buf.srcSizes;
1560*01826a49SYabin Cui void** const dstPtrs = buf.dstPtrs;
1561*01826a49SYabin Cui size_t const *const dstCapacities = buf.dstCapacities;
1562*01826a49SYabin Cui size_t* const dstSizes = buf.dstSizes;
1563*01826a49SYabin Cui void** const resPtrs = buf.resPtrs;
1564*01826a49SYabin Cui size_t const *const resSizes = buf.resSizes;
1565*01826a49SYabin Cui const void* dictBuffer = ctx.dictBuffer;
1566*01826a49SYabin Cui const size_t dictBufferSize = ctx.dictSize;
1567*01826a49SYabin Cui const size_t nbBlocks = buf.nbBlocks;
1568*01826a49SYabin Cui const size_t srcSize = buf.srcSize;
1569*01826a49SYabin Cui ZSTD_CCtx* cctx = ctx.cctx;
1570*01826a49SYabin Cui ZSTD_DCtx* dctx = ctx.dctx;
1571*01826a49SYabin Cui
1572*01826a49SYabin Cui /* init */
1573*01826a49SYabin Cui display_params_tested(*comprParams);
1574*01826a49SYabin Cui memset(&bResult, 0, sizeof(bResult));
1575*01826a49SYabin Cui
1576*01826a49SYabin Cui /* warming up memory */
1577*01826a49SYabin Cui for (i = 0; i < buf.nbBlocks; i++) {
1578*01826a49SYabin Cui if (mode != BMK_decodeOnly) {
1579*01826a49SYabin Cui RDG_genBuffer(dstPtrs[i], dstCapacities[i], 0.10, 0.50, 1);
1580*01826a49SYabin Cui } else {
1581*01826a49SYabin Cui RDG_genBuffer(resPtrs[i], resSizes[i], 0.10, 0.50, 1);
1582*01826a49SYabin Cui }
1583*01826a49SYabin Cui }
1584*01826a49SYabin Cui
1585*01826a49SYabin Cui /* Bench */
1586*01826a49SYabin Cui {
1587*01826a49SYabin Cui /* init args */
1588*01826a49SYabin Cui int compressionCompleted = (mode == BMK_decodeOnly);
1589*01826a49SYabin Cui int decompressionCompleted = (mode == BMK_compressOnly);
1590*01826a49SYabin Cui BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(nbSeconds * 1000, 1000);
1591*01826a49SYabin Cui BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(nbSeconds * 1000, 1000);
1592*01826a49SYabin Cui BMK_benchParams_t cbp, dbp;
1593*01826a49SYabin Cui BMK_initCCtxArgs cctxprep;
1594*01826a49SYabin Cui BMK_initDCtxArgs dctxprep;
1595*01826a49SYabin Cui
1596*01826a49SYabin Cui cbp.benchFn = local_defaultCompress;
1597*01826a49SYabin Cui cbp.benchPayload = cctx;
1598*01826a49SYabin Cui cbp.initFn = local_initCCtx;
1599*01826a49SYabin Cui cbp.initPayload = &cctxprep;
1600*01826a49SYabin Cui cbp.errorFn = ZSTD_isError;
1601*01826a49SYabin Cui cbp.blockCount = nbBlocks;
1602*01826a49SYabin Cui cbp.srcBuffers = srcPtrs;
1603*01826a49SYabin Cui cbp.srcSizes = srcSizes;
1604*01826a49SYabin Cui cbp.dstBuffers = dstPtrs;
1605*01826a49SYabin Cui cbp.dstCapacities = dstCapacities;
1606*01826a49SYabin Cui cbp.blockResults = dstSizes;
1607*01826a49SYabin Cui
1608*01826a49SYabin Cui cctxprep.cctx = cctx;
1609*01826a49SYabin Cui cctxprep.dictBuffer = dictBuffer;
1610*01826a49SYabin Cui cctxprep.dictBufferSize = dictBufferSize;
1611*01826a49SYabin Cui cctxprep.cLevel = cLevel;
1612*01826a49SYabin Cui cctxprep.comprParams = comprParams;
1613*01826a49SYabin Cui
1614*01826a49SYabin Cui dbp.benchFn = local_defaultDecompress;
1615*01826a49SYabin Cui dbp.benchPayload = dctx;
1616*01826a49SYabin Cui dbp.initFn = local_initDCtx;
1617*01826a49SYabin Cui dbp.initPayload = &dctxprep;
1618*01826a49SYabin Cui dbp.errorFn = ZSTD_isError;
1619*01826a49SYabin Cui dbp.blockCount = nbBlocks;
1620*01826a49SYabin Cui dbp.srcBuffers = (const void* const *) dstPtrs;
1621*01826a49SYabin Cui dbp.srcSizes = dstCapacities;
1622*01826a49SYabin Cui dbp.dstBuffers = resPtrs;
1623*01826a49SYabin Cui dbp.dstCapacities = resSizes;
1624*01826a49SYabin Cui dbp.blockResults = NULL;
1625*01826a49SYabin Cui
1626*01826a49SYabin Cui dctxprep.dctx = dctx;
1627*01826a49SYabin Cui dctxprep.dictBuffer = dictBuffer;
1628*01826a49SYabin Cui dctxprep.dictBufferSize = dictBufferSize;
1629*01826a49SYabin Cui
1630*01826a49SYabin Cui assert(timeStateCompress != NULL);
1631*01826a49SYabin Cui assert(timeStateDecompress != NULL);
1632*01826a49SYabin Cui while(!compressionCompleted) {
1633*01826a49SYabin Cui BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress, cbp);
1634*01826a49SYabin Cui
1635*01826a49SYabin Cui if (!BMK_isSuccessful_runOutcome(cOutcome)) {
1636*01826a49SYabin Cui BMK_benchOutcome_t bOut;
1637*01826a49SYabin Cui memset(&bOut, 0, sizeof(bOut));
1638*01826a49SYabin Cui bOut.tag = 1; /* should rather be a function or a constant */
1639*01826a49SYabin Cui BMK_freeTimedFnState(timeStateCompress);
1640*01826a49SYabin Cui BMK_freeTimedFnState(timeStateDecompress);
1641*01826a49SYabin Cui return bOut;
1642*01826a49SYabin Cui }
1643*01826a49SYabin Cui { BMK_runTime_t const rResult = BMK_extract_runTime(cOutcome);
1644*01826a49SYabin Cui bResult.cSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun);
1645*01826a49SYabin Cui bResult.cSize = rResult.sumOfReturn;
1646*01826a49SYabin Cui }
1647*01826a49SYabin Cui compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress);
1648*01826a49SYabin Cui }
1649*01826a49SYabin Cui
1650*01826a49SYabin Cui while (!decompressionCompleted) {
1651*01826a49SYabin Cui BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp);
1652*01826a49SYabin Cui
1653*01826a49SYabin Cui if (!BMK_isSuccessful_runOutcome(dOutcome)) {
1654*01826a49SYabin Cui BMK_benchOutcome_t bOut;
1655*01826a49SYabin Cui memset(&bOut, 0, sizeof(bOut));
1656*01826a49SYabin Cui bOut.tag = 1; /* should rather be a function or a constant */
1657*01826a49SYabin Cui BMK_freeTimedFnState(timeStateCompress);
1658*01826a49SYabin Cui BMK_freeTimedFnState(timeStateDecompress);
1659*01826a49SYabin Cui return bOut;
1660*01826a49SYabin Cui }
1661*01826a49SYabin Cui { BMK_runTime_t const rResult = BMK_extract_runTime(dOutcome);
1662*01826a49SYabin Cui bResult.dSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun);
1663*01826a49SYabin Cui }
1664*01826a49SYabin Cui decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress);
1665*01826a49SYabin Cui }
1666*01826a49SYabin Cui
1667*01826a49SYabin Cui BMK_freeTimedFnState(timeStateCompress);
1668*01826a49SYabin Cui BMK_freeTimedFnState(timeStateDecompress);
1669*01826a49SYabin Cui }
1670*01826a49SYabin Cui
1671*01826a49SYabin Cui /* Bench */
1672*01826a49SYabin Cui bResult.cMem = ((size_t)1 << (comprParams->vals[wlog_ind])) + ZSTD_sizeof_CCtx(cctx);
1673*01826a49SYabin Cui
1674*01826a49SYabin Cui { BMK_benchOutcome_t bOut;
1675*01826a49SYabin Cui bOut.tag = 0;
1676*01826a49SYabin Cui bOut.internal_never_use_directly = bResult; /* should be a function */
1677*01826a49SYabin Cui return bOut;
1678*01826a49SYabin Cui }
1679*01826a49SYabin Cui }
1680*01826a49SYabin Cui
1681*01826a49SYabin Cui /* BMK_benchParam() :
1682*01826a49SYabin Cui * benchmark a set of `cParams` over sample `buf`,
1683*01826a49SYabin Cui * store the result in `resultPtr`.
1684*01826a49SYabin Cui * @return : 0 if success, 1 if error */
BMK_benchParam(BMK_benchResult_t * resultPtr,buffers_t buf,contexts_t ctx,paramValues_t cParams)1685*01826a49SYabin Cui static int BMK_benchParam ( BMK_benchResult_t* resultPtr,
1686*01826a49SYabin Cui buffers_t buf, contexts_t ctx,
1687*01826a49SYabin Cui paramValues_t cParams)
1688*01826a49SYabin Cui {
1689*01826a49SYabin Cui BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx,
1690*01826a49SYabin Cui BASE_CLEVEL, &cParams,
1691*01826a49SYabin Cui BMK_both, 3);
1692*01826a49SYabin Cui if (!BMK_isSuccessful_benchOutcome(outcome)) return 1;
1693*01826a49SYabin Cui *resultPtr = BMK_extract_benchResult(outcome);
1694*01826a49SYabin Cui return 0;
1695*01826a49SYabin Cui }
1696*01826a49SYabin Cui
1697*01826a49SYabin Cui
1698*01826a49SYabin Cui /* Benchmarking which stops when we are sufficiently sure the solution is infeasible / worse than the winner */
1699*01826a49SYabin Cui #define VARIANCE 1.2
allBench(BMK_benchResult_t * resultPtr,const buffers_t buf,const contexts_t ctx,const paramValues_t cParams,const constraint_t target,BMK_benchResult_t * winnerResult,int feas)1700*01826a49SYabin Cui static int allBench(BMK_benchResult_t* resultPtr,
1701*01826a49SYabin Cui const buffers_t buf, const contexts_t ctx,
1702*01826a49SYabin Cui const paramValues_t cParams,
1703*01826a49SYabin Cui const constraint_t target,
1704*01826a49SYabin Cui BMK_benchResult_t* winnerResult, int feas)
1705*01826a49SYabin Cui {
1706*01826a49SYabin Cui BMK_benchResult_t benchres;
1707*01826a49SYabin Cui double uncertaintyConstantC = 3., uncertaintyConstantD = 3.;
1708*01826a49SYabin Cui double winnerRS;
1709*01826a49SYabin Cui
1710*01826a49SYabin Cui BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx, BASE_CLEVEL, &cParams, BMK_both, 2);
1711*01826a49SYabin Cui if (!BMK_isSuccessful_benchOutcome(outcome)) {
1712*01826a49SYabin Cui DEBUGOUTPUT("Benchmarking failed \n");
1713*01826a49SYabin Cui return ERROR_RESULT;
1714*01826a49SYabin Cui }
1715*01826a49SYabin Cui benchres = BMK_extract_benchResult(outcome);
1716*01826a49SYabin Cui
1717*01826a49SYabin Cui winnerRS = resultScore(*winnerResult, buf.srcSize, target);
1718*01826a49SYabin Cui DEBUGOUTPUT("WinnerScore: %f \n ", winnerRS);
1719*01826a49SYabin Cui
1720*01826a49SYabin Cui *resultPtr = benchres;
1721*01826a49SYabin Cui
1722*01826a49SYabin Cui /* anything with worse ratio in feas is definitely worse, discard */
1723*01826a49SYabin Cui if(feas && benchres.cSize < winnerResult->cSize && !g_optmode) {
1724*01826a49SYabin Cui return WORSE_RESULT;
1725*01826a49SYabin Cui }
1726*01826a49SYabin Cui
1727*01826a49SYabin Cui /* calculate uncertainty in compression / decompression runs */
1728*01826a49SYabin Cui if (benchres.cSpeed) {
1729*01826a49SYabin Cui double const loopDurationC = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.cSpeed);
1730*01826a49SYabin Cui uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC);
1731*01826a49SYabin Cui }
1732*01826a49SYabin Cui
1733*01826a49SYabin Cui if (benchres.dSpeed) {
1734*01826a49SYabin Cui double const loopDurationD = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.dSpeed);
1735*01826a49SYabin Cui uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD);
1736*01826a49SYabin Cui }
1737*01826a49SYabin Cui
1738*01826a49SYabin Cui /* optimistic assumption of benchres */
1739*01826a49SYabin Cui { BMK_benchResult_t resultMax = benchres;
1740*01826a49SYabin Cui resultMax.cSpeed = (unsigned long long)((double)resultMax.cSpeed * uncertaintyConstantC * VARIANCE);
1741*01826a49SYabin Cui resultMax.dSpeed = (unsigned long long)((double)resultMax.dSpeed * uncertaintyConstantD * VARIANCE);
1742*01826a49SYabin Cui
1743*01826a49SYabin Cui /* disregard infeasible results in feas mode */
1744*01826a49SYabin Cui /* disregard if resultMax < winner in infeas mode */
1745*01826a49SYabin Cui if((feas && !feasible(resultMax, target)) ||
1746*01826a49SYabin Cui (!feas && (winnerRS > resultScore(resultMax, buf.srcSize, target)))) {
1747*01826a49SYabin Cui return WORSE_RESULT;
1748*01826a49SYabin Cui }
1749*01826a49SYabin Cui }
1750*01826a49SYabin Cui
1751*01826a49SYabin Cui /* compare by resultScore when in infeas */
1752*01826a49SYabin Cui /* compare by compareResultLT when in feas */
1753*01826a49SYabin Cui if((!feas && (resultScore(benchres, buf.srcSize, target) > resultScore(*winnerResult, buf.srcSize, target))) ||
1754*01826a49SYabin Cui (feas && (compareResultLT(*winnerResult, benchres, target, buf.srcSize))) ) {
1755*01826a49SYabin Cui return BETTER_RESULT;
1756*01826a49SYabin Cui } else {
1757*01826a49SYabin Cui return WORSE_RESULT;
1758*01826a49SYabin Cui }
1759*01826a49SYabin Cui }
1760*01826a49SYabin Cui
1761*01826a49SYabin Cui
1762*01826a49SYabin Cui #define INFEASIBLE_THRESHOLD 200
1763*01826a49SYabin Cui /* Memoized benchmarking, won't benchmark anything which has already been benchmarked before. */
benchMemo(BMK_benchResult_t * resultPtr,const buffers_t buf,const contexts_t ctx,const paramValues_t cParams,const constraint_t target,BMK_benchResult_t * winnerResult,memoTable_t * const memoTableArray,const int feas)1764*01826a49SYabin Cui static int benchMemo(BMK_benchResult_t* resultPtr,
1765*01826a49SYabin Cui const buffers_t buf, const contexts_t ctx,
1766*01826a49SYabin Cui const paramValues_t cParams,
1767*01826a49SYabin Cui const constraint_t target,
1768*01826a49SYabin Cui BMK_benchResult_t* winnerResult, memoTable_t* const memoTableArray,
1769*01826a49SYabin Cui const int feas) {
1770*01826a49SYabin Cui static int bmcount = 0;
1771*01826a49SYabin Cui int res;
1772*01826a49SYabin Cui
1773*01826a49SYabin Cui if ( memoTableGet(memoTableArray, cParams) >= INFEASIBLE_THRESHOLD
1774*01826a49SYabin Cui || redundantParams(cParams, target, buf.maxBlockSize) ) {
1775*01826a49SYabin Cui return WORSE_RESULT;
1776*01826a49SYabin Cui }
1777*01826a49SYabin Cui
1778*01826a49SYabin Cui res = allBench(resultPtr, buf, ctx, cParams, target, winnerResult, feas);
1779*01826a49SYabin Cui
1780*01826a49SYabin Cui if(DEBUG && !(bmcount % 250)) {
1781*01826a49SYabin Cui DISPLAY("Count: %d\n", bmcount);
1782*01826a49SYabin Cui bmcount++;
1783*01826a49SYabin Cui }
1784*01826a49SYabin Cui BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, *resultPtr, cParams, target, buf.srcSize);
1785*01826a49SYabin Cui
1786*01826a49SYabin Cui if(res == BETTER_RESULT || feas) {
1787*01826a49SYabin Cui memoTableSet(memoTableArray, cParams, 255); /* what happens if collisions are frequent */
1788*01826a49SYabin Cui }
1789*01826a49SYabin Cui return res;
1790*01826a49SYabin Cui }
1791*01826a49SYabin Cui
1792*01826a49SYabin Cui
1793*01826a49SYabin Cui typedef struct {
1794*01826a49SYabin Cui U64 cSpeed_min;
1795*01826a49SYabin Cui U64 dSpeed_min;
1796*01826a49SYabin Cui U32 windowLog_max;
1797*01826a49SYabin Cui ZSTD_strategy strategy_max;
1798*01826a49SYabin Cui } level_constraints_t;
1799*01826a49SYabin Cui
1800*01826a49SYabin Cui static level_constraints_t g_level_constraint[NB_LEVELS_TRACKED+1];
1801*01826a49SYabin Cui
BMK_init_level_constraints(int bytePerSec_level1)1802*01826a49SYabin Cui static void BMK_init_level_constraints(int bytePerSec_level1)
1803*01826a49SYabin Cui {
1804*01826a49SYabin Cui assert(NB_LEVELS_TRACKED >= ZSTD_maxCLevel());
1805*01826a49SYabin Cui memset(g_level_constraint, 0, sizeof(g_level_constraint));
1806*01826a49SYabin Cui g_level_constraint[1].cSpeed_min = bytePerSec_level1;
1807*01826a49SYabin Cui g_level_constraint[1].dSpeed_min = 0;
1808*01826a49SYabin Cui g_level_constraint[1].windowLog_max = 19;
1809*01826a49SYabin Cui g_level_constraint[1].strategy_max = ZSTD_fast;
1810*01826a49SYabin Cui
1811*01826a49SYabin Cui /* establish speed objectives (relative to level 1) */
1812*01826a49SYabin Cui { int l;
1813*01826a49SYabin Cui for (l=2; l<=NB_LEVELS_TRACKED; l++) {
1814*01826a49SYabin Cui g_level_constraint[l].cSpeed_min = (g_level_constraint[l-1].cSpeed_min * 49) / 64;
1815*01826a49SYabin Cui g_level_constraint[l].dSpeed_min = 0;
1816*01826a49SYabin Cui g_level_constraint[l].windowLog_max = (l<20) ? 23 : l+5; /* only --ultra levels >= 20 can use windowlog > 23 */
1817*01826a49SYabin Cui g_level_constraint[l].strategy_max = ZSTD_STRATEGY_MAX;
1818*01826a49SYabin Cui } }
1819*01826a49SYabin Cui }
1820*01826a49SYabin Cui
BMK_seed(winnerInfo_t * winners,const paramValues_t params,const buffers_t buf,const contexts_t ctx)1821*01826a49SYabin Cui static int BMK_seed(winnerInfo_t* winners,
1822*01826a49SYabin Cui const paramValues_t params,
1823*01826a49SYabin Cui const buffers_t buf,
1824*01826a49SYabin Cui const contexts_t ctx)
1825*01826a49SYabin Cui {
1826*01826a49SYabin Cui BMK_benchResult_t testResult;
1827*01826a49SYabin Cui int better = 0;
1828*01826a49SYabin Cui int cLevel;
1829*01826a49SYabin Cui
1830*01826a49SYabin Cui BMK_benchParam(&testResult, buf, ctx, params);
1831*01826a49SYabin Cui
1832*01826a49SYabin Cui for (cLevel = 1; cLevel <= NB_LEVELS_TRACKED; cLevel++) {
1833*01826a49SYabin Cui
1834*01826a49SYabin Cui if (testResult.cSpeed < g_level_constraint[cLevel].cSpeed_min)
1835*01826a49SYabin Cui continue; /* not fast enough for this level */
1836*01826a49SYabin Cui if (testResult.dSpeed < g_level_constraint[cLevel].dSpeed_min)
1837*01826a49SYabin Cui continue; /* not fast enough for this level */
1838*01826a49SYabin Cui if (params.vals[wlog_ind] > g_level_constraint[cLevel].windowLog_max)
1839*01826a49SYabin Cui continue; /* too much memory for this level */
1840*01826a49SYabin Cui if (params.vals[strt_ind] > (U32)g_level_constraint[cLevel].strategy_max)
1841*01826a49SYabin Cui continue; /* forbidden strategy for this level */
1842*01826a49SYabin Cui if (winners[cLevel].result.cSize==0) {
1843*01826a49SYabin Cui /* first solution for this cLevel */
1844*01826a49SYabin Cui winners[cLevel].result = testResult;
1845*01826a49SYabin Cui winners[cLevel].params = params;
1846*01826a49SYabin Cui BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize);
1847*01826a49SYabin Cui better = 1;
1848*01826a49SYabin Cui continue;
1849*01826a49SYabin Cui }
1850*01826a49SYabin Cui
1851*01826a49SYabin Cui if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) {
1852*01826a49SYabin Cui /* Validate solution is "good enough" */
1853*01826a49SYabin Cui double W_ratio = (double)buf.srcSize / (double)testResult.cSize;
1854*01826a49SYabin Cui double O_ratio = (double)buf.srcSize / (double)winners[cLevel].result.cSize;
1855*01826a49SYabin Cui double W_ratioNote = log (W_ratio);
1856*01826a49SYabin Cui double O_ratioNote = log (O_ratio);
1857*01826a49SYabin Cui size_t W_DMemUsed = (1 << params.vals[wlog_ind]) + (16 KB);
1858*01826a49SYabin Cui size_t O_DMemUsed = (1 << winners[cLevel].params.vals[wlog_ind]) + (16 KB);
1859*01826a49SYabin Cui double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed);
1860*01826a49SYabin Cui double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed);
1861*01826a49SYabin Cui
1862*01826a49SYabin Cui size_t W_CMemUsed = ((size_t)1 << params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(params));
1863*01826a49SYabin Cui size_t O_CMemUsed = ((size_t)1 << winners[cLevel].params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(winners[cLevel].params));
1864*01826a49SYabin Cui double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed);
1865*01826a49SYabin Cui double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed);
1866*01826a49SYabin Cui
1867*01826a49SYabin Cui double W_CSpeed_note = W_ratioNote * (double)( 30 + 10*cLevel) + log((double)testResult.cSpeed);
1868*01826a49SYabin Cui double O_CSpeed_note = O_ratioNote * (double)( 30 + 10*cLevel) + log((double)winners[cLevel].result.cSpeed);
1869*01826a49SYabin Cui
1870*01826a49SYabin Cui double W_DSpeed_note = W_ratioNote * (double)( 20 + 2*cLevel) + log((double)testResult.dSpeed);
1871*01826a49SYabin Cui double O_DSpeed_note = O_ratioNote * (double)( 20 + 2*cLevel) + log((double)winners[cLevel].result.dSpeed);
1872*01826a49SYabin Cui
1873*01826a49SYabin Cui if (W_DMemUsed_note < O_DMemUsed_note) {
1874*01826a49SYabin Cui /* uses too much Decompression memory for too little benefit */
1875*01826a49SYabin Cui if (W_ratio > O_ratio)
1876*01826a49SYabin Cui DISPLAYLEVEL(3, "Decompression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n",
1877*01826a49SYabin Cui W_ratio, (double)(W_DMemUsed) / 1024 / 1024,
1878*01826a49SYabin Cui O_ratio, (double)(O_DMemUsed) / 1024 / 1024, cLevel);
1879*01826a49SYabin Cui continue;
1880*01826a49SYabin Cui }
1881*01826a49SYabin Cui if (W_CMemUsed_note < O_CMemUsed_note) {
1882*01826a49SYabin Cui /* uses too much memory for compression for too little benefit */
1883*01826a49SYabin Cui if (W_ratio > O_ratio)
1884*01826a49SYabin Cui DISPLAYLEVEL(3, "Compression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n",
1885*01826a49SYabin Cui W_ratio, (double)(W_CMemUsed) / 1024 / 1024,
1886*01826a49SYabin Cui O_ratio, (double)(O_CMemUsed) / 1024 / 1024,
1887*01826a49SYabin Cui cLevel);
1888*01826a49SYabin Cui continue;
1889*01826a49SYabin Cui }
1890*01826a49SYabin Cui if (W_CSpeed_note < O_CSpeed_note ) {
1891*01826a49SYabin Cui /* too large compression speed difference for the compression benefit */
1892*01826a49SYabin Cui if (W_ratio > O_ratio)
1893*01826a49SYabin Cui DISPLAYLEVEL(3, "Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
1894*01826a49SYabin Cui W_ratio, (double)testResult.cSpeed / MB_UNIT,
1895*01826a49SYabin Cui O_ratio, (double)winners[cLevel].result.cSpeed / MB_UNIT,
1896*01826a49SYabin Cui cLevel);
1897*01826a49SYabin Cui continue;
1898*01826a49SYabin Cui }
1899*01826a49SYabin Cui if (W_DSpeed_note < O_DSpeed_note ) {
1900*01826a49SYabin Cui /* too large decompression speed difference for the compression benefit */
1901*01826a49SYabin Cui if (W_ratio > O_ratio)
1902*01826a49SYabin Cui DISPLAYLEVEL(3, "Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
1903*01826a49SYabin Cui W_ratio, (double)testResult.dSpeed / MB_UNIT,
1904*01826a49SYabin Cui O_ratio, (double)winners[cLevel].result.dSpeed / MB_UNIT,
1905*01826a49SYabin Cui cLevel);
1906*01826a49SYabin Cui continue;
1907*01826a49SYabin Cui }
1908*01826a49SYabin Cui
1909*01826a49SYabin Cui if (W_ratio < O_ratio)
1910*01826a49SYabin Cui DISPLAYLEVEL(3, "Solution %4.3f selected over %4.3f at level %i, due to better secondary statistics \n",
1911*01826a49SYabin Cui W_ratio, O_ratio, cLevel);
1912*01826a49SYabin Cui
1913*01826a49SYabin Cui winners[cLevel].result = testResult;
1914*01826a49SYabin Cui winners[cLevel].params = params;
1915*01826a49SYabin Cui BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize);
1916*01826a49SYabin Cui
1917*01826a49SYabin Cui better = 1;
1918*01826a49SYabin Cui } }
1919*01826a49SYabin Cui
1920*01826a49SYabin Cui return better;
1921*01826a49SYabin Cui }
1922*01826a49SYabin Cui
1923*01826a49SYabin Cui /*-************************************
1924*01826a49SYabin Cui * Compression Level Table Generation Functions
1925*01826a49SYabin Cui **************************************/
1926*01826a49SYabin Cui
1927*01826a49SYabin Cui #define PARAMTABLELOG 25
1928*01826a49SYabin Cui #define PARAMTABLESIZE (1<<PARAMTABLELOG)
1929*01826a49SYabin Cui #define PARAMTABLEMASK (PARAMTABLESIZE-1)
1930*01826a49SYabin Cui static BYTE g_alreadyTested[PARAMTABLESIZE] = {0}; /* init to zero */
1931*01826a49SYabin Cui
NB_TESTS_PLAYED(paramValues_t p)1932*01826a49SYabin Cui static BYTE* NB_TESTS_PLAYED(paramValues_t p)
1933*01826a49SYabin Cui {
1934*01826a49SYabin Cui ZSTD_compressionParameters const cParams = pvalsToCParams(sanitizeParams(p));
1935*01826a49SYabin Cui unsigned long long const h64 = XXH64(&cParams, sizeof(cParams), 0);
1936*01826a49SYabin Cui return &g_alreadyTested[(h64 >> 3) & PARAMTABLEMASK];
1937*01826a49SYabin Cui }
1938*01826a49SYabin Cui
playAround(FILE * f,winnerInfo_t * winners,paramValues_t p,const buffers_t buf,const contexts_t ctx)1939*01826a49SYabin Cui static void playAround(FILE* f,
1940*01826a49SYabin Cui winnerInfo_t* winners,
1941*01826a49SYabin Cui paramValues_t p,
1942*01826a49SYabin Cui const buffers_t buf, const contexts_t ctx)
1943*01826a49SYabin Cui {
1944*01826a49SYabin Cui int nbVariations = 0;
1945*01826a49SYabin Cui UTIL_time_t const clockStart = UTIL_getTime();
1946*01826a49SYabin Cui
1947*01826a49SYabin Cui while (UTIL_clockSpanMicro(clockStart) < g_maxVariationTime) {
1948*01826a49SYabin Cui if (nbVariations++ > g_maxNbVariations) break;
1949*01826a49SYabin Cui
1950*01826a49SYabin Cui do {
1951*01826a49SYabin Cui int i;
1952*01826a49SYabin Cui for(i = 0; i < 4; i++) {
1953*01826a49SYabin Cui paramVaryOnce(FUZ_rand(&g_rand) % (strt_ind + 1),
1954*01826a49SYabin Cui ((FUZ_rand(&g_rand) & 1) << 1) - 1,
1955*01826a49SYabin Cui &p);
1956*01826a49SYabin Cui }
1957*01826a49SYabin Cui } while (!paramValid(p));
1958*01826a49SYabin Cui
1959*01826a49SYabin Cui /* exclude faster if already played params */
1960*01826a49SYabin Cui if (FUZ_rand(&g_rand) & ((1 << *NB_TESTS_PLAYED(p))-1))
1961*01826a49SYabin Cui continue;
1962*01826a49SYabin Cui
1963*01826a49SYabin Cui /* test */
1964*01826a49SYabin Cui { BYTE* const b = NB_TESTS_PLAYED(p);
1965*01826a49SYabin Cui (*b)++;
1966*01826a49SYabin Cui }
1967*01826a49SYabin Cui if (!BMK_seed(winners, p, buf, ctx)) continue;
1968*01826a49SYabin Cui
1969*01826a49SYabin Cui /* improvement found => search more */
1970*01826a49SYabin Cui BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize);
1971*01826a49SYabin Cui playAround(f, winners, p, buf, ctx);
1972*01826a49SYabin Cui }
1973*01826a49SYabin Cui
1974*01826a49SYabin Cui }
1975*01826a49SYabin Cui
1976*01826a49SYabin Cui static void
BMK_selectRandomStart(FILE * f,winnerInfo_t * winners,const buffers_t buf,const contexts_t ctx)1977*01826a49SYabin Cui BMK_selectRandomStart( FILE* f,
1978*01826a49SYabin Cui winnerInfo_t* winners,
1979*01826a49SYabin Cui const buffers_t buf, const contexts_t ctx)
1980*01826a49SYabin Cui {
1981*01826a49SYabin Cui U32 const id = FUZ_rand(&g_rand) % (NB_LEVELS_TRACKED+1);
1982*01826a49SYabin Cui if ((id==0) || (winners[id].params.vals[wlog_ind]==0)) {
1983*01826a49SYabin Cui /* use some random entry */
1984*01826a49SYabin Cui paramValues_t const p = adjustParams(cParamsToPVals(pvalsToCParams(randomParams())), /* defaults nonCompression parameters */
1985*01826a49SYabin Cui buf.srcSize, 0);
1986*01826a49SYabin Cui playAround(f, winners, p, buf, ctx);
1987*01826a49SYabin Cui } else {
1988*01826a49SYabin Cui playAround(f, winners, winners[id].params, buf, ctx);
1989*01826a49SYabin Cui }
1990*01826a49SYabin Cui }
1991*01826a49SYabin Cui
1992*01826a49SYabin Cui
1993*01826a49SYabin Cui /* BMK_generate_cLevelTable() :
1994*01826a49SYabin Cui * test a large number of configurations
1995*01826a49SYabin Cui * and distribute them across compression levels according to speed conditions.
1996*01826a49SYabin Cui * display and save all intermediate results into rfName = "grillResults.txt".
1997*01826a49SYabin Cui * the function automatically stops after g_timeLimit_s.
1998*01826a49SYabin Cui * this function cannot error, it directly exit() in case of problem.
1999*01826a49SYabin Cui */
BMK_generate_cLevelTable(const buffers_t buf,const contexts_t ctx)2000*01826a49SYabin Cui static void BMK_generate_cLevelTable(const buffers_t buf, const contexts_t ctx)
2001*01826a49SYabin Cui {
2002*01826a49SYabin Cui paramValues_t params;
2003*01826a49SYabin Cui winnerInfo_t winners[NB_LEVELS_TRACKED+1];
2004*01826a49SYabin Cui const char* const rfName = "grillResults.txt";
2005*01826a49SYabin Cui FILE* const f = fopen(rfName, "w");
2006*01826a49SYabin Cui
2007*01826a49SYabin Cui /* init */
2008*01826a49SYabin Cui assert(g_singleRun==0);
2009*01826a49SYabin Cui memset(winners, 0, sizeof(winners));
2010*01826a49SYabin Cui if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); }
2011*01826a49SYabin Cui
2012*01826a49SYabin Cui if (g_target) {
2013*01826a49SYabin Cui BMK_init_level_constraints(g_target * MB_UNIT);
2014*01826a49SYabin Cui } else {
2015*01826a49SYabin Cui /* baseline config for level 1 */
2016*01826a49SYabin Cui paramValues_t const l1params = cParamsToPVals(ZSTD_getCParams(1, buf.maxBlockSize, ctx.dictSize));
2017*01826a49SYabin Cui BMK_benchResult_t testResult;
2018*01826a49SYabin Cui BMK_benchParam(&testResult, buf, ctx, l1params);
2019*01826a49SYabin Cui BMK_init_level_constraints((int)((testResult.cSpeed * 31) / 32));
2020*01826a49SYabin Cui }
2021*01826a49SYabin Cui
2022*01826a49SYabin Cui /* populate initial solution */
2023*01826a49SYabin Cui { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
2024*01826a49SYabin Cui int i;
2025*01826a49SYabin Cui for (i=0; i<=maxSeeds; i++) {
2026*01826a49SYabin Cui params = cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, 0));
2027*01826a49SYabin Cui BMK_seed(winners, params, buf, ctx);
2028*01826a49SYabin Cui } }
2029*01826a49SYabin Cui BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize);
2030*01826a49SYabin Cui
2031*01826a49SYabin Cui /* start tests */
2032*01826a49SYabin Cui { const UTIL_time_t grillStart = UTIL_getTime();
2033*01826a49SYabin Cui do {
2034*01826a49SYabin Cui BMK_selectRandomStart(f, winners, buf, ctx);
2035*01826a49SYabin Cui } while (BMK_timeSpan_s(grillStart) < g_timeLimit_s);
2036*01826a49SYabin Cui }
2037*01826a49SYabin Cui
2038*01826a49SYabin Cui /* end summary */
2039*01826a49SYabin Cui BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize);
2040*01826a49SYabin Cui DISPLAY("grillParams operations completed \n");
2041*01826a49SYabin Cui
2042*01826a49SYabin Cui /* clean up*/
2043*01826a49SYabin Cui fclose(f);
2044*01826a49SYabin Cui }
2045*01826a49SYabin Cui
2046*01826a49SYabin Cui
2047*01826a49SYabin Cui /*-************************************
2048*01826a49SYabin Cui * Single Benchmark Functions
2049*01826a49SYabin Cui **************************************/
2050*01826a49SYabin Cui
2051*01826a49SYabin Cui static int
benchOnce(const buffers_t buf,const contexts_t ctx,const int cLevel)2052*01826a49SYabin Cui benchOnce(const buffers_t buf, const contexts_t ctx, const int cLevel)
2053*01826a49SYabin Cui {
2054*01826a49SYabin Cui BMK_benchResult_t testResult;
2055*01826a49SYabin Cui g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevel, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize);
2056*01826a49SYabin Cui
2057*01826a49SYabin Cui if (BMK_benchParam(&testResult, buf, ctx, g_params)) {
2058*01826a49SYabin Cui DISPLAY("Error during benchmarking\n");
2059*01826a49SYabin Cui return 1;
2060*01826a49SYabin Cui }
2061*01826a49SYabin Cui
2062*01826a49SYabin Cui BMK_printWinner(stdout, CUSTOM_LEVEL, testResult, g_params, buf.srcSize);
2063*01826a49SYabin Cui
2064*01826a49SYabin Cui return 0;
2065*01826a49SYabin Cui }
2066*01826a49SYabin Cui
benchSample(double compressibility,int cLevel)2067*01826a49SYabin Cui static int benchSample(double compressibility, int cLevel)
2068*01826a49SYabin Cui {
2069*01826a49SYabin Cui const char* const name = "Sample 10MB";
2070*01826a49SYabin Cui size_t const benchedSize = 10 MB;
2071*01826a49SYabin Cui void* const srcBuffer = malloc(benchedSize);
2072*01826a49SYabin Cui int ret = 0;
2073*01826a49SYabin Cui
2074*01826a49SYabin Cui buffers_t buf;
2075*01826a49SYabin Cui contexts_t ctx;
2076*01826a49SYabin Cui
2077*01826a49SYabin Cui if(srcBuffer == NULL) {
2078*01826a49SYabin Cui DISPLAY("Out of Memory\n");
2079*01826a49SYabin Cui return 2;
2080*01826a49SYabin Cui }
2081*01826a49SYabin Cui
2082*01826a49SYabin Cui RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
2083*01826a49SYabin Cui
2084*01826a49SYabin Cui if(createBuffersFromMemory(&buf, srcBuffer, 1, &benchedSize)) {
2085*01826a49SYabin Cui DISPLAY("Buffer Creation Error\n");
2086*01826a49SYabin Cui free(srcBuffer);
2087*01826a49SYabin Cui return 3;
2088*01826a49SYabin Cui }
2089*01826a49SYabin Cui
2090*01826a49SYabin Cui if(createContexts(&ctx, NULL)) {
2091*01826a49SYabin Cui DISPLAY("Context Creation Error\n");
2092*01826a49SYabin Cui freeBuffers(buf);
2093*01826a49SYabin Cui return 1;
2094*01826a49SYabin Cui }
2095*01826a49SYabin Cui
2096*01826a49SYabin Cui /* bench */
2097*01826a49SYabin Cui DISPLAY("\r%79s\r", "");
2098*01826a49SYabin Cui DISPLAY("using %s %i%%: \n", name, (int)(compressibility*100));
2099*01826a49SYabin Cui
2100*01826a49SYabin Cui if(g_singleRun) {
2101*01826a49SYabin Cui ret = benchOnce(buf, ctx, cLevel);
2102*01826a49SYabin Cui } else {
2103*01826a49SYabin Cui BMK_generate_cLevelTable(buf, ctx);
2104*01826a49SYabin Cui }
2105*01826a49SYabin Cui
2106*01826a49SYabin Cui freeBuffers(buf);
2107*01826a49SYabin Cui freeContexts(ctx);
2108*01826a49SYabin Cui
2109*01826a49SYabin Cui return ret;
2110*01826a49SYabin Cui }
2111*01826a49SYabin Cui
2112*01826a49SYabin Cui /* benchFiles() :
2113*01826a49SYabin Cui * note: while this function takes a table of filenames,
2114*01826a49SYabin Cui * in practice, only the first filename will be used */
benchFiles(const char ** fileNamesTable,int nbFiles,const char * dictFileName,int cLevel)2115*01826a49SYabin Cui static int benchFiles(const char** fileNamesTable, int nbFiles,
2116*01826a49SYabin Cui const char* dictFileName, int cLevel)
2117*01826a49SYabin Cui {
2118*01826a49SYabin Cui buffers_t buf;
2119*01826a49SYabin Cui contexts_t ctx;
2120*01826a49SYabin Cui int ret = 0;
2121*01826a49SYabin Cui
2122*01826a49SYabin Cui if (createBuffers(&buf, fileNamesTable, nbFiles)) {
2123*01826a49SYabin Cui DISPLAY("unable to load files\n");
2124*01826a49SYabin Cui return 1;
2125*01826a49SYabin Cui }
2126*01826a49SYabin Cui
2127*01826a49SYabin Cui if (createContexts(&ctx, dictFileName)) {
2128*01826a49SYabin Cui DISPLAY("unable to load dictionary\n");
2129*01826a49SYabin Cui freeBuffers(buf);
2130*01826a49SYabin Cui return 2;
2131*01826a49SYabin Cui }
2132*01826a49SYabin Cui
2133*01826a49SYabin Cui DISPLAY("\r%79s\r", "");
2134*01826a49SYabin Cui if (nbFiles == 1) {
2135*01826a49SYabin Cui DISPLAY("using %s : \n", fileNamesTable[0]);
2136*01826a49SYabin Cui } else {
2137*01826a49SYabin Cui DISPLAY("using %d Files : \n", nbFiles);
2138*01826a49SYabin Cui }
2139*01826a49SYabin Cui
2140*01826a49SYabin Cui if (g_singleRun) {
2141*01826a49SYabin Cui ret = benchOnce(buf, ctx, cLevel);
2142*01826a49SYabin Cui } else {
2143*01826a49SYabin Cui BMK_generate_cLevelTable(buf, ctx);
2144*01826a49SYabin Cui }
2145*01826a49SYabin Cui
2146*01826a49SYabin Cui freeBuffers(buf);
2147*01826a49SYabin Cui freeContexts(ctx);
2148*01826a49SYabin Cui return ret;
2149*01826a49SYabin Cui }
2150*01826a49SYabin Cui
2151*01826a49SYabin Cui
2152*01826a49SYabin Cui /*-************************************
2153*01826a49SYabin Cui * Local Optimization Functions
2154*01826a49SYabin Cui **************************************/
2155*01826a49SYabin Cui
2156*01826a49SYabin Cui /* One iteration of hill climbing. Specifically, it first tries all
2157*01826a49SYabin Cui * valid parameter configurations w/ manhattan distance 1 and picks the best one
2158*01826a49SYabin Cui * failing that, it progressively tries candidates further and further away (up to #dim + 2)
2159*01826a49SYabin Cui * if it finds a candidate exceeding winnerInfo, it will repeat. Otherwise, it will stop the
2160*01826a49SYabin Cui * current stage of hill climbing.
2161*01826a49SYabin Cui * Each iteration of hill climbing proceeds in 2 'phases'. Phase 1 climbs according to
2162*01826a49SYabin Cui * the resultScore function, which is effectively a linear increase in reward until it reaches
2163*01826a49SYabin Cui * the constraint-satisfying value, it which point any excess results in only logarithmic reward.
2164*01826a49SYabin Cui * This aims to find some constraint-satisfying point.
2165*01826a49SYabin Cui * Phase 2 optimizes in accordance with what the original function sets out to maximize, with
2166*01826a49SYabin Cui * all feasible solutions valued over all infeasible solutions.
2167*01826a49SYabin Cui */
2168*01826a49SYabin Cui
2169*01826a49SYabin Cui /* sanitize all params here.
2170*01826a49SYabin Cui * all generation after random should be sanitized. (maybe sanitize random)
2171*01826a49SYabin Cui */
climbOnce(const constraint_t target,memoTable_t * mtAll,const buffers_t buf,const contexts_t ctx,const paramValues_t init)2172*01826a49SYabin Cui static winnerInfo_t climbOnce(const constraint_t target,
2173*01826a49SYabin Cui memoTable_t* mtAll,
2174*01826a49SYabin Cui const buffers_t buf, const contexts_t ctx,
2175*01826a49SYabin Cui const paramValues_t init)
2176*01826a49SYabin Cui {
2177*01826a49SYabin Cui /*
2178*01826a49SYabin Cui * cparam - currently considered 'center'
2179*01826a49SYabin Cui * candidate - params to benchmark/results
2180*01826a49SYabin Cui * winner - best option found so far.
2181*01826a49SYabin Cui */
2182*01826a49SYabin Cui paramValues_t cparam = init;
2183*01826a49SYabin Cui winnerInfo_t candidateInfo, winnerInfo;
2184*01826a49SYabin Cui int better = 1;
2185*01826a49SYabin Cui int feas = 0;
2186*01826a49SYabin Cui
2187*01826a49SYabin Cui winnerInfo = initWinnerInfo(init);
2188*01826a49SYabin Cui candidateInfo = winnerInfo;
2189*01826a49SYabin Cui
2190*01826a49SYabin Cui { winnerInfo_t bestFeasible1 = initWinnerInfo(cparam);
2191*01826a49SYabin Cui DEBUGOUTPUT("Climb Part 1\n");
2192*01826a49SYabin Cui while(better) {
2193*01826a49SYabin Cui int offset;
2194*01826a49SYabin Cui size_t i, dist;
2195*01826a49SYabin Cui const size_t varLen = mtAll[cparam.vals[strt_ind]].varLen;
2196*01826a49SYabin Cui better = 0;
2197*01826a49SYabin Cui DEBUGOUTPUT("Start\n");
2198*01826a49SYabin Cui cparam = winnerInfo.params;
2199*01826a49SYabin Cui candidateInfo.params = cparam;
2200*01826a49SYabin Cui /* all dist-1 candidates */
2201*01826a49SYabin Cui for (i = 0; i < varLen; i++) {
2202*01826a49SYabin Cui for (offset = -1; offset <= 1; offset += 2) {
2203*01826a49SYabin Cui CHECKTIME(winnerInfo);
2204*01826a49SYabin Cui candidateInfo.params = cparam;
2205*01826a49SYabin Cui paramVaryOnce(mtAll[cparam.vals[strt_ind]].varArray[i],
2206*01826a49SYabin Cui offset,
2207*01826a49SYabin Cui &candidateInfo.params);
2208*01826a49SYabin Cui
2209*01826a49SYabin Cui if(paramValid(candidateInfo.params)) {
2210*01826a49SYabin Cui int res;
2211*01826a49SYabin Cui res = benchMemo(&candidateInfo.result, buf, ctx,
2212*01826a49SYabin Cui sanitizeParams(candidateInfo.params), target, &winnerInfo.result, mtAll, feas);
2213*01826a49SYabin Cui DEBUGOUTPUT("Res: %d\n", res);
2214*01826a49SYabin Cui if(res == BETTER_RESULT) { /* synonymous with better when called w/ infeasibleBM */
2215*01826a49SYabin Cui winnerInfo = candidateInfo;
2216*01826a49SYabin Cui better = 1;
2217*01826a49SYabin Cui if(compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) {
2218*01826a49SYabin Cui bestFeasible1 = winnerInfo;
2219*01826a49SYabin Cui }
2220*01826a49SYabin Cui }
2221*01826a49SYabin Cui }
2222*01826a49SYabin Cui } /* for (offset = -1; offset <= 1; offset += 2) */
2223*01826a49SYabin Cui } /* for (i = 0; i < varLen; i++) */
2224*01826a49SYabin Cui
2225*01826a49SYabin Cui if(better) {
2226*01826a49SYabin Cui continue;
2227*01826a49SYabin Cui }
2228*01826a49SYabin Cui
2229*01826a49SYabin Cui for (dist = 2; dist < varLen + 2; dist++) { /* varLen is # dimensions */
2230*01826a49SYabin Cui for (i = 0; i < (1ULL << varLen) / varLen + 2; i++) {
2231*01826a49SYabin Cui int res;
2232*01826a49SYabin Cui CHECKTIME(winnerInfo);
2233*01826a49SYabin Cui candidateInfo.params = cparam;
2234*01826a49SYabin Cui /* param error checking already done here */
2235*01826a49SYabin Cui paramVariation(&candidateInfo.params, mtAll, (U32)dist);
2236*01826a49SYabin Cui
2237*01826a49SYabin Cui res = benchMemo(&candidateInfo.result,
2238*01826a49SYabin Cui buf, ctx,
2239*01826a49SYabin Cui sanitizeParams(candidateInfo.params), target,
2240*01826a49SYabin Cui &winnerInfo.result, mtAll, feas);
2241*01826a49SYabin Cui DEBUGOUTPUT("Res: %d\n", res);
2242*01826a49SYabin Cui if (res == BETTER_RESULT) { /* synonymous with better in this case*/
2243*01826a49SYabin Cui winnerInfo = candidateInfo;
2244*01826a49SYabin Cui better = 1;
2245*01826a49SYabin Cui if (compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) {
2246*01826a49SYabin Cui bestFeasible1 = winnerInfo;
2247*01826a49SYabin Cui }
2248*01826a49SYabin Cui break;
2249*01826a49SYabin Cui }
2250*01826a49SYabin Cui }
2251*01826a49SYabin Cui
2252*01826a49SYabin Cui if (better) {
2253*01826a49SYabin Cui break;
2254*01826a49SYabin Cui }
2255*01826a49SYabin Cui } /* for(dist = 2; dist < varLen + 2; dist++) */
2256*01826a49SYabin Cui
2257*01826a49SYabin Cui if (!better) { /* infeas -> feas -> stop */
2258*01826a49SYabin Cui if (feas) return winnerInfo;
2259*01826a49SYabin Cui feas = 1;
2260*01826a49SYabin Cui better = 1;
2261*01826a49SYabin Cui winnerInfo = bestFeasible1; /* note with change, bestFeasible may not necessarily be feasible, but if one has been benchmarked, it will be. */
2262*01826a49SYabin Cui DEBUGOUTPUT("Climb Part 2\n");
2263*01826a49SYabin Cui }
2264*01826a49SYabin Cui }
2265*01826a49SYabin Cui winnerInfo = bestFeasible1;
2266*01826a49SYabin Cui }
2267*01826a49SYabin Cui
2268*01826a49SYabin Cui return winnerInfo;
2269*01826a49SYabin Cui }
2270*01826a49SYabin Cui
2271*01826a49SYabin Cui /* Optimizes for a fixed strategy */
2272*01826a49SYabin Cui
2273*01826a49SYabin Cui /* flexible parameters: iterations of failed climbing (or if we do non-random, maybe this is when everything is close to visited)
2274*01826a49SYabin Cui weight more on visit for bad results, less on good results/more on later results / ones with more failures.
2275*01826a49SYabin Cui allocate memoTable here.
2276*01826a49SYabin Cui */
2277*01826a49SYabin Cui static winnerInfo_t
optimizeFixedStrategy(const buffers_t buf,const contexts_t ctx,const constraint_t target,paramValues_t paramTarget,const ZSTD_strategy strat,memoTable_t * memoTableArray,const int tries)2278*01826a49SYabin Cui optimizeFixedStrategy(const buffers_t buf, const contexts_t ctx,
2279*01826a49SYabin Cui const constraint_t target, paramValues_t paramTarget,
2280*01826a49SYabin Cui const ZSTD_strategy strat,
2281*01826a49SYabin Cui memoTable_t* memoTableArray, const int tries)
2282*01826a49SYabin Cui {
2283*01826a49SYabin Cui int i = 0;
2284*01826a49SYabin Cui
2285*01826a49SYabin Cui paramValues_t init;
2286*01826a49SYabin Cui winnerInfo_t winnerInfo, candidateInfo;
2287*01826a49SYabin Cui winnerInfo = initWinnerInfo(emptyParams());
2288*01826a49SYabin Cui /* so climb is given the right fixed strategy */
2289*01826a49SYabin Cui paramTarget.vals[strt_ind] = strat;
2290*01826a49SYabin Cui /* to pass ZSTD_checkCParams */
2291*01826a49SYabin Cui paramTarget = cParamUnsetMin(paramTarget);
2292*01826a49SYabin Cui
2293*01826a49SYabin Cui init = paramTarget;
2294*01826a49SYabin Cui
2295*01826a49SYabin Cui for(i = 0; i < tries; i++) {
2296*01826a49SYabin Cui DEBUGOUTPUT("Restart\n");
2297*01826a49SYabin Cui do {
2298*01826a49SYabin Cui randomConstrainedParams(&init, memoTableArray, strat);
2299*01826a49SYabin Cui } while(redundantParams(init, target, buf.maxBlockSize));
2300*01826a49SYabin Cui candidateInfo = climbOnce(target, memoTableArray, buf, ctx, init);
2301*01826a49SYabin Cui if (compareResultLT(winnerInfo.result, candidateInfo.result, target, buf.srcSize)) {
2302*01826a49SYabin Cui winnerInfo = candidateInfo;
2303*01826a49SYabin Cui BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, target, buf.srcSize);
2304*01826a49SYabin Cui i = 0;
2305*01826a49SYabin Cui continue;
2306*01826a49SYabin Cui }
2307*01826a49SYabin Cui CHECKTIME(winnerInfo);
2308*01826a49SYabin Cui i++;
2309*01826a49SYabin Cui }
2310*01826a49SYabin Cui return winnerInfo;
2311*01826a49SYabin Cui }
2312*01826a49SYabin Cui
2313*01826a49SYabin Cui /* goes best, best-1, best+1, best-2, ... */
2314*01826a49SYabin Cui /* return 0 if nothing remaining */
nextStrategy(const int currentStrategy,const int bestStrategy)2315*01826a49SYabin Cui static int nextStrategy(const int currentStrategy, const int bestStrategy)
2316*01826a49SYabin Cui {
2317*01826a49SYabin Cui if(bestStrategy <= currentStrategy) {
2318*01826a49SYabin Cui int candidate = 2 * bestStrategy - currentStrategy - 1;
2319*01826a49SYabin Cui if(candidate < 1) {
2320*01826a49SYabin Cui candidate = currentStrategy + 1;
2321*01826a49SYabin Cui if(candidate > (int)ZSTD_STRATEGY_MAX) {
2322*01826a49SYabin Cui return 0;
2323*01826a49SYabin Cui } else {
2324*01826a49SYabin Cui return candidate;
2325*01826a49SYabin Cui }
2326*01826a49SYabin Cui } else {
2327*01826a49SYabin Cui return candidate;
2328*01826a49SYabin Cui }
2329*01826a49SYabin Cui } else { /* bestStrategy >= currentStrategy */
2330*01826a49SYabin Cui int candidate = 2 * bestStrategy - currentStrategy;
2331*01826a49SYabin Cui if(candidate > (int)ZSTD_STRATEGY_MAX) {
2332*01826a49SYabin Cui candidate = currentStrategy - 1;
2333*01826a49SYabin Cui if(candidate < 1) {
2334*01826a49SYabin Cui return 0;
2335*01826a49SYabin Cui } else {
2336*01826a49SYabin Cui return candidate;
2337*01826a49SYabin Cui }
2338*01826a49SYabin Cui } else {
2339*01826a49SYabin Cui return candidate;
2340*01826a49SYabin Cui }
2341*01826a49SYabin Cui }
2342*01826a49SYabin Cui }
2343*01826a49SYabin Cui
2344*01826a49SYabin Cui /* experiment with playing with this and decay value */
2345*01826a49SYabin Cui
2346*01826a49SYabin Cui /* main fn called when using --optimize */
2347*01826a49SYabin Cui /* Does strategy selection by benchmarking default compression levels
2348*01826a49SYabin Cui * then optimizes by strategy, starting with the best one and moving
2349*01826a49SYabin Cui * progressively moving further away by number
2350*01826a49SYabin Cui * args:
2351*01826a49SYabin Cui * fileNamesTable - list of files to benchmark
2352*01826a49SYabin Cui * nbFiles - length of fileNamesTable
2353*01826a49SYabin Cui * dictFileName - name of dictionary file if one, else NULL
2354*01826a49SYabin Cui * target - performance constraints (cSpeed, dSpeed, cMem)
2355*01826a49SYabin Cui * paramTarget - parameter constraints (i.e. restriction search space to where strategy = ZSTD_fast)
2356*01826a49SYabin Cui * cLevel - compression level to exceed (all solutions must be > lvl in cSpeed + ratio)
2357*01826a49SYabin Cui */
2358*01826a49SYabin Cui
2359*01826a49SYabin Cui static unsigned g_maxTries = 5;
2360*01826a49SYabin Cui #define TRY_DECAY 1
2361*01826a49SYabin Cui
2362*01826a49SYabin Cui static int
optimizeForSize(const char * const * const fileNamesTable,const size_t nbFiles,const char * dictFileName,constraint_t target,paramValues_t paramTarget,const int cLevelOpt,const int cLevelRun,const U32 memoTableLog)2363*01826a49SYabin Cui optimizeForSize(const char* const * const fileNamesTable, const size_t nbFiles,
2364*01826a49SYabin Cui const char* dictFileName,
2365*01826a49SYabin Cui constraint_t target, paramValues_t paramTarget,
2366*01826a49SYabin Cui const int cLevelOpt, const int cLevelRun,
2367*01826a49SYabin Cui const U32 memoTableLog)
2368*01826a49SYabin Cui {
2369*01826a49SYabin Cui varInds_t varArray [NUM_PARAMS];
2370*01826a49SYabin Cui int ret = 0;
2371*01826a49SYabin Cui const size_t varLen = variableParams(paramTarget, varArray, dictFileName != NULL);
2372*01826a49SYabin Cui winnerInfo_t winner = initWinnerInfo(emptyParams());
2373*01826a49SYabin Cui memoTable_t* allMT = NULL;
2374*01826a49SYabin Cui paramValues_t paramBase;
2375*01826a49SYabin Cui contexts_t ctx;
2376*01826a49SYabin Cui buffers_t buf;
2377*01826a49SYabin Cui g_time = UTIL_getTime();
2378*01826a49SYabin Cui
2379*01826a49SYabin Cui if (createBuffers(&buf, fileNamesTable, nbFiles)) {
2380*01826a49SYabin Cui DISPLAY("unable to load files\n");
2381*01826a49SYabin Cui return 1;
2382*01826a49SYabin Cui }
2383*01826a49SYabin Cui
2384*01826a49SYabin Cui if (createContexts(&ctx, dictFileName)) {
2385*01826a49SYabin Cui DISPLAY("unable to load dictionary\n");
2386*01826a49SYabin Cui freeBuffers(buf);
2387*01826a49SYabin Cui return 2;
2388*01826a49SYabin Cui }
2389*01826a49SYabin Cui
2390*01826a49SYabin Cui if (nbFiles == 1) {
2391*01826a49SYabin Cui DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[0]);
2392*01826a49SYabin Cui } else {
2393*01826a49SYabin Cui DISPLAYLEVEL(2, "Loading %lu Files... \r", (unsigned long)nbFiles);
2394*01826a49SYabin Cui }
2395*01826a49SYabin Cui
2396*01826a49SYabin Cui /* sanitize paramTarget */
2397*01826a49SYabin Cui optimizerAdjustInput(¶mTarget, buf.maxBlockSize);
2398*01826a49SYabin Cui paramBase = cParamUnsetMin(paramTarget);
2399*01826a49SYabin Cui
2400*01826a49SYabin Cui allMT = createMemoTableArray(paramTarget, varArray, varLen, memoTableLog);
2401*01826a49SYabin Cui
2402*01826a49SYabin Cui if (!allMT) {
2403*01826a49SYabin Cui DISPLAY("MemoTable Init Error\n");
2404*01826a49SYabin Cui ret = 2;
2405*01826a49SYabin Cui goto _cleanUp;
2406*01826a49SYabin Cui }
2407*01826a49SYabin Cui
2408*01826a49SYabin Cui /* default strictnesses */
2409*01826a49SYabin Cui if (g_strictness == PARAM_UNSET) {
2410*01826a49SYabin Cui if(g_optmode) {
2411*01826a49SYabin Cui g_strictness = 100;
2412*01826a49SYabin Cui } else {
2413*01826a49SYabin Cui g_strictness = 90;
2414*01826a49SYabin Cui }
2415*01826a49SYabin Cui } else {
2416*01826a49SYabin Cui if(0 >= g_strictness || g_strictness > 100) {
2417*01826a49SYabin Cui DISPLAY("Strictness Outside of Bounds\n");
2418*01826a49SYabin Cui ret = 4;
2419*01826a49SYabin Cui goto _cleanUp;
2420*01826a49SYabin Cui }
2421*01826a49SYabin Cui }
2422*01826a49SYabin Cui
2423*01826a49SYabin Cui /* use level'ing mode instead of normal target mode */
2424*01826a49SYabin Cui if (g_optmode) {
2425*01826a49SYabin Cui winner.params = cParamsToPVals(ZSTD_getCParams(cLevelOpt, buf.maxBlockSize, ctx.dictSize));
2426*01826a49SYabin Cui if(BMK_benchParam(&winner.result, buf, ctx, winner.params)) {
2427*01826a49SYabin Cui ret = 3;
2428*01826a49SYabin Cui goto _cleanUp;
2429*01826a49SYabin Cui }
2430*01826a49SYabin Cui
2431*01826a49SYabin Cui g_lvltarget = winner.result;
2432*01826a49SYabin Cui g_lvltarget.cSpeed = (g_lvltarget.cSpeed * g_strictness) / 100;
2433*01826a49SYabin Cui g_lvltarget.dSpeed = (g_lvltarget.dSpeed * g_strictness) / 100;
2434*01826a49SYabin Cui g_lvltarget.cSize = (g_lvltarget.cSize * 100) / g_strictness;
2435*01826a49SYabin Cui
2436*01826a49SYabin Cui target.cSpeed = (U32)g_lvltarget.cSpeed;
2437*01826a49SYabin Cui target.dSpeed = (U32)g_lvltarget.dSpeed;
2438*01826a49SYabin Cui
2439*01826a49SYabin Cui BMK_printWinnerOpt(stdout, cLevelOpt, winner.result, winner.params, target, buf.srcSize);
2440*01826a49SYabin Cui }
2441*01826a49SYabin Cui
2442*01826a49SYabin Cui /* Don't want it to return anything worse than the best known result */
2443*01826a49SYabin Cui if (g_singleRun) {
2444*01826a49SYabin Cui BMK_benchResult_t res;
2445*01826a49SYabin Cui g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevelRun, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize);
2446*01826a49SYabin Cui if (BMK_benchParam(&res, buf, ctx, g_params)) {
2447*01826a49SYabin Cui ret = 45;
2448*01826a49SYabin Cui goto _cleanUp;
2449*01826a49SYabin Cui }
2450*01826a49SYabin Cui if(compareResultLT(winner.result, res, relaxTarget(target), buf.srcSize)) {
2451*01826a49SYabin Cui winner.result = res;
2452*01826a49SYabin Cui winner.params = g_params;
2453*01826a49SYabin Cui }
2454*01826a49SYabin Cui }
2455*01826a49SYabin Cui
2456*01826a49SYabin Cui /* bench */
2457*01826a49SYabin Cui DISPLAYLEVEL(2, "\r%79s\r", "");
2458*01826a49SYabin Cui if(nbFiles == 1) {
2459*01826a49SYabin Cui DISPLAYLEVEL(2, "optimizing for %s", fileNamesTable[0]);
2460*01826a49SYabin Cui } else {
2461*01826a49SYabin Cui DISPLAYLEVEL(2, "optimizing for %lu Files", (unsigned long)nbFiles);
2462*01826a49SYabin Cui }
2463*01826a49SYabin Cui
2464*01826a49SYabin Cui if(target.cSpeed != 0) { DISPLAYLEVEL(2," - limit compression speed %u MB/s", (unsigned)(target.cSpeed >> 20)); }
2465*01826a49SYabin Cui if(target.dSpeed != 0) { DISPLAYLEVEL(2, " - limit decompression speed %u MB/s", (unsigned)(target.dSpeed >> 20)); }
2466*01826a49SYabin Cui if(target.cMem != (U32)-1) { DISPLAYLEVEL(2, " - limit memory %u MB", (unsigned)(target.cMem >> 20)); }
2467*01826a49SYabin Cui
2468*01826a49SYabin Cui DISPLAYLEVEL(2, "\n");
2469*01826a49SYabin Cui init_clockGranularity();
2470*01826a49SYabin Cui
2471*01826a49SYabin Cui { paramValues_t CParams;
2472*01826a49SYabin Cui
2473*01826a49SYabin Cui /* find best solution from default params */
2474*01826a49SYabin Cui { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
2475*01826a49SYabin Cui DEBUGOUTPUT("Strategy Selection\n");
2476*01826a49SYabin Cui if (paramTarget.vals[strt_ind] == PARAM_UNSET) {
2477*01826a49SYabin Cui BMK_benchResult_t candidate;
2478*01826a49SYabin Cui int i;
2479*01826a49SYabin Cui for (i=1; i<=maxSeeds; i++) {
2480*01826a49SYabin Cui int ec;
2481*01826a49SYabin Cui CParams = overwriteParams(cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, ctx.dictSize)), paramTarget);
2482*01826a49SYabin Cui ec = BMK_benchParam(&candidate, buf, ctx, CParams);
2483*01826a49SYabin Cui BMK_printWinnerOpt(stdout, i, candidate, CParams, target, buf.srcSize);
2484*01826a49SYabin Cui
2485*01826a49SYabin Cui if(!ec && compareResultLT(winner.result, candidate, relaxTarget(target), buf.srcSize)) {
2486*01826a49SYabin Cui winner.result = candidate;
2487*01826a49SYabin Cui winner.params = CParams;
2488*01826a49SYabin Cui }
2489*01826a49SYabin Cui
2490*01826a49SYabin Cui CHECKTIMEGT(ret, 0, _displayCleanUp); /* if pass time limit, stop */
2491*01826a49SYabin Cui /* if the current params are too slow, just stop. */
2492*01826a49SYabin Cui if(target.cSpeed > candidate.cSpeed * 3 / 2) { break; }
2493*01826a49SYabin Cui }
2494*01826a49SYabin Cui
2495*01826a49SYabin Cui BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winner.result, winner.params, target, buf.srcSize);
2496*01826a49SYabin Cui }
2497*01826a49SYabin Cui }
2498*01826a49SYabin Cui
2499*01826a49SYabin Cui DEBUGOUTPUT("Real Opt\n");
2500*01826a49SYabin Cui /* start 'real' optimization */
2501*01826a49SYabin Cui { int bestStrategy = (int)winner.params.vals[strt_ind];
2502*01826a49SYabin Cui if (paramTarget.vals[strt_ind] == PARAM_UNSET) {
2503*01826a49SYabin Cui int st = bestStrategy;
2504*01826a49SYabin Cui int tries = g_maxTries;
2505*01826a49SYabin Cui
2506*01826a49SYabin Cui /* one iterations of hill climbing with the level-defined parameters. */
2507*01826a49SYabin Cui { winnerInfo_t const w1 = climbOnce(target, allMT, buf, ctx, winner.params);
2508*01826a49SYabin Cui if (compareResultLT(winner.result, w1.result, target, buf.srcSize)) {
2509*01826a49SYabin Cui winner = w1;
2510*01826a49SYabin Cui }
2511*01826a49SYabin Cui CHECKTIMEGT(ret, 0, _displayCleanUp);
2512*01826a49SYabin Cui }
2513*01826a49SYabin Cui
2514*01826a49SYabin Cui while(st && tries > 0) {
2515*01826a49SYabin Cui winnerInfo_t wc;
2516*01826a49SYabin Cui DEBUGOUTPUT("StrategySwitch: %s\n", g_stratName[st]);
2517*01826a49SYabin Cui
2518*01826a49SYabin Cui wc = optimizeFixedStrategy(buf, ctx, target, paramBase, st, allMT, tries);
2519*01826a49SYabin Cui
2520*01826a49SYabin Cui if(compareResultLT(winner.result, wc.result, target, buf.srcSize)) {
2521*01826a49SYabin Cui winner = wc;
2522*01826a49SYabin Cui tries = g_maxTries;
2523*01826a49SYabin Cui bestStrategy = st;
2524*01826a49SYabin Cui } else {
2525*01826a49SYabin Cui st = nextStrategy(st, bestStrategy);
2526*01826a49SYabin Cui tries -= TRY_DECAY;
2527*01826a49SYabin Cui }
2528*01826a49SYabin Cui CHECKTIMEGT(ret, 0, _displayCleanUp);
2529*01826a49SYabin Cui }
2530*01826a49SYabin Cui } else {
2531*01826a49SYabin Cui winner = optimizeFixedStrategy(buf, ctx, target, paramBase, paramTarget.vals[strt_ind], allMT, g_maxTries);
2532*01826a49SYabin Cui }
2533*01826a49SYabin Cui
2534*01826a49SYabin Cui }
2535*01826a49SYabin Cui
2536*01826a49SYabin Cui /* no solution found */
2537*01826a49SYabin Cui if(winner.result.cSize == (size_t)-1) {
2538*01826a49SYabin Cui ret = 1;
2539*01826a49SYabin Cui DISPLAY("No feasible solution found\n");
2540*01826a49SYabin Cui goto _cleanUp;
2541*01826a49SYabin Cui }
2542*01826a49SYabin Cui
2543*01826a49SYabin Cui /* end summary */
2544*01826a49SYabin Cui _displayCleanUp:
2545*01826a49SYabin Cui if (g_displayLevel >= 0) {
2546*01826a49SYabin Cui BMK_displayOneResult(stdout, winner, buf.srcSize);
2547*01826a49SYabin Cui }
2548*01826a49SYabin Cui BMK_paramValues_into_commandLine(stdout, winner.params);
2549*01826a49SYabin Cui DISPLAYLEVEL(1, "grillParams size - optimizer completed \n");
2550*01826a49SYabin Cui }
2551*01826a49SYabin Cui
2552*01826a49SYabin Cui _cleanUp:
2553*01826a49SYabin Cui freeContexts(ctx);
2554*01826a49SYabin Cui freeBuffers(buf);
2555*01826a49SYabin Cui freeMemoTableArray(allMT);
2556*01826a49SYabin Cui return ret;
2557*01826a49SYabin Cui }
2558*01826a49SYabin Cui
2559*01826a49SYabin Cui /*-************************************
2560*01826a49SYabin Cui * CLI parsing functions
2561*01826a49SYabin Cui **************************************/
2562*01826a49SYabin Cui
2563*01826a49SYabin Cui /** longCommandWArg() :
2564*01826a49SYabin Cui * check if *stringPtr is the same as longCommand.
2565*01826a49SYabin Cui * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
2566*01826a49SYabin Cui * @return 0 and doesn't modify *stringPtr otherwise.
2567*01826a49SYabin Cui * from zstdcli.c
2568*01826a49SYabin Cui */
longCommandWArg(const char ** stringPtr,const char * longCommand)2569*01826a49SYabin Cui static int longCommandWArg(const char** stringPtr, const char* longCommand)
2570*01826a49SYabin Cui {
2571*01826a49SYabin Cui size_t const comSize = strlen(longCommand);
2572*01826a49SYabin Cui int const result = !strncmp(*stringPtr, longCommand, comSize);
2573*01826a49SYabin Cui if (result) *stringPtr += comSize;
2574*01826a49SYabin Cui return result;
2575*01826a49SYabin Cui }
2576*01826a49SYabin Cui
errorOut(const char * msg)2577*01826a49SYabin Cui static void errorOut(const char* msg)
2578*01826a49SYabin Cui {
2579*01826a49SYabin Cui DISPLAY("%s \n", msg); exit(1);
2580*01826a49SYabin Cui }
2581*01826a49SYabin Cui
2582*01826a49SYabin Cui /*! readU32FromChar() :
2583*01826a49SYabin Cui * @return : unsigned integer value read from input in `char` format.
2584*01826a49SYabin Cui * allows and interprets K, KB, KiB, M, MB and MiB suffix.
2585*01826a49SYabin Cui * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
2586*01826a49SYabin Cui * Note : function will exit() program if digit sequence overflows */
readU32FromChar(const char ** stringPtr)2587*01826a49SYabin Cui static unsigned readU32FromChar(const char** stringPtr)
2588*01826a49SYabin Cui {
2589*01826a49SYabin Cui const char errorMsg[] = "error: numeric value too large";
2590*01826a49SYabin Cui unsigned sign = 1;
2591*01826a49SYabin Cui unsigned result = 0;
2592*01826a49SYabin Cui if(**stringPtr == '-') { sign = (unsigned)-1; (*stringPtr)++; }
2593*01826a49SYabin Cui while ((**stringPtr >='0') && (**stringPtr <='9')) {
2594*01826a49SYabin Cui unsigned const max = (((unsigned)(-1)) / 10) - 1;
2595*01826a49SYabin Cui if (result > max) errorOut(errorMsg);
2596*01826a49SYabin Cui result *= 10;
2597*01826a49SYabin Cui assert(**stringPtr >= '0');
2598*01826a49SYabin Cui result += (unsigned)(**stringPtr - '0');
2599*01826a49SYabin Cui (*stringPtr)++ ;
2600*01826a49SYabin Cui }
2601*01826a49SYabin Cui if ((**stringPtr=='K') || (**stringPtr=='M')) {
2602*01826a49SYabin Cui unsigned const maxK = ((unsigned)(-1)) >> 10;
2603*01826a49SYabin Cui if (result > maxK) errorOut(errorMsg);
2604*01826a49SYabin Cui result <<= 10;
2605*01826a49SYabin Cui if (**stringPtr=='M') {
2606*01826a49SYabin Cui if (result > maxK) errorOut(errorMsg);
2607*01826a49SYabin Cui result <<= 10;
2608*01826a49SYabin Cui }
2609*01826a49SYabin Cui (*stringPtr)++; /* skip `K` or `M` */
2610*01826a49SYabin Cui if (**stringPtr=='i') (*stringPtr)++;
2611*01826a49SYabin Cui if (**stringPtr=='B') (*stringPtr)++;
2612*01826a49SYabin Cui }
2613*01826a49SYabin Cui return result * sign;
2614*01826a49SYabin Cui }
2615*01826a49SYabin Cui
readDoubleFromChar(const char ** stringPtr)2616*01826a49SYabin Cui static double readDoubleFromChar(const char** stringPtr)
2617*01826a49SYabin Cui {
2618*01826a49SYabin Cui double result = 0, divide = 10;
2619*01826a49SYabin Cui while ((**stringPtr >='0') && (**stringPtr <='9')) {
2620*01826a49SYabin Cui result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
2621*01826a49SYabin Cui }
2622*01826a49SYabin Cui if(**stringPtr!='.') {
2623*01826a49SYabin Cui return result;
2624*01826a49SYabin Cui }
2625*01826a49SYabin Cui (*stringPtr)++;
2626*01826a49SYabin Cui while ((**stringPtr >='0') && (**stringPtr <='9')) {
2627*01826a49SYabin Cui result += (double)(**stringPtr - '0') / divide, divide *= 10, (*stringPtr)++ ;
2628*01826a49SYabin Cui }
2629*01826a49SYabin Cui return result;
2630*01826a49SYabin Cui }
2631*01826a49SYabin Cui
usage(const char * exename)2632*01826a49SYabin Cui static int usage(const char* exename)
2633*01826a49SYabin Cui {
2634*01826a49SYabin Cui DISPLAY( "Usage :\n");
2635*01826a49SYabin Cui DISPLAY( " %s [arg] file\n", exename);
2636*01826a49SYabin Cui DISPLAY( "Arguments :\n");
2637*01826a49SYabin Cui DISPLAY( " file : path to the file used as reference (if none, generates a compressible sample)\n");
2638*01826a49SYabin Cui DISPLAY( " -H/-h : Help (this text + advanced options)\n");
2639*01826a49SYabin Cui return 0;
2640*01826a49SYabin Cui }
2641*01826a49SYabin Cui
usage_advanced(void)2642*01826a49SYabin Cui static int usage_advanced(void)
2643*01826a49SYabin Cui {
2644*01826a49SYabin Cui DISPLAY( "\nAdvanced options :\n");
2645*01826a49SYabin Cui DISPLAY( " -T# : set level 1 speed objective \n");
2646*01826a49SYabin Cui DISPLAY( " -B# : cut input into blocks of size # (default : single block) \n");
2647*01826a49SYabin Cui DISPLAY( " --optimize= : same as -O with more verbose syntax (see README.md)\n");
2648*01826a49SYabin Cui DISPLAY( " -S : Single run \n");
2649*01826a49SYabin Cui DISPLAY( " --zstd : Single run, parameter selection same as zstdcli \n");
2650*01826a49SYabin Cui DISPLAY( " -P# : generated sample compressibility (default : %.1f%%) \n", COMPRESSIBILITY_DEFAULT * 100);
2651*01826a49SYabin Cui DISPLAY( " -t# : Caps runtime of operation in seconds (default : %u seconds (%.1f hours)) \n",
2652*01826a49SYabin Cui (unsigned)g_timeLimit_s, (double)g_timeLimit_s / 3600);
2653*01826a49SYabin Cui DISPLAY( " -v : Prints Benchmarking output\n");
2654*01826a49SYabin Cui DISPLAY( " -D : Next argument dictionary file\n");
2655*01826a49SYabin Cui DISPLAY( " -s : Separate Files\n");
2656*01826a49SYabin Cui return 0;
2657*01826a49SYabin Cui }
2658*01826a49SYabin Cui
badusage(const char * exename)2659*01826a49SYabin Cui static int badusage(const char* exename)
2660*01826a49SYabin Cui {
2661*01826a49SYabin Cui DISPLAY("Wrong parameters\n");
2662*01826a49SYabin Cui usage(exename);
2663*01826a49SYabin Cui return 1;
2664*01826a49SYabin Cui }
2665*01826a49SYabin Cui
2666*01826a49SYabin Cui #define PARSE_SUB_ARGS(stringLong, stringShort, variable) { \
2667*01826a49SYabin Cui if ( longCommandWArg(&argument, stringLong) \
2668*01826a49SYabin Cui || longCommandWArg(&argument, stringShort) ) { \
2669*01826a49SYabin Cui variable = readU32FromChar(&argument); \
2670*01826a49SYabin Cui if (argument[0]==',') { \
2671*01826a49SYabin Cui argument++; continue; \
2672*01826a49SYabin Cui } else break; \
2673*01826a49SYabin Cui } }
2674*01826a49SYabin Cui
2675*01826a49SYabin Cui /* 1 if successful parse, 0 otherwise */
parse_params(const char ** argptr,paramValues_t * pv)2676*01826a49SYabin Cui static int parse_params(const char** argptr, paramValues_t* pv) {
2677*01826a49SYabin Cui int matched = 0;
2678*01826a49SYabin Cui const char* argOrig = *argptr;
2679*01826a49SYabin Cui varInds_t v;
2680*01826a49SYabin Cui for(v = 0; v < NUM_PARAMS; v++) {
2681*01826a49SYabin Cui if ( longCommandWArg(argptr,g_shortParamNames[v])
2682*01826a49SYabin Cui || longCommandWArg(argptr, g_paramNames[v]) ) {
2683*01826a49SYabin Cui if(**argptr == '=') {
2684*01826a49SYabin Cui (*argptr)++;
2685*01826a49SYabin Cui pv->vals[v] = readU32FromChar(argptr);
2686*01826a49SYabin Cui matched = 1;
2687*01826a49SYabin Cui break;
2688*01826a49SYabin Cui }
2689*01826a49SYabin Cui }
2690*01826a49SYabin Cui /* reset and try again */
2691*01826a49SYabin Cui *argptr = argOrig;
2692*01826a49SYabin Cui }
2693*01826a49SYabin Cui return matched;
2694*01826a49SYabin Cui }
2695*01826a49SYabin Cui
2696*01826a49SYabin Cui /*-************************************
2697*01826a49SYabin Cui * Main
2698*01826a49SYabin Cui **************************************/
2699*01826a49SYabin Cui
main(int argc,const char ** argv)2700*01826a49SYabin Cui int main(int argc, const char** argv)
2701*01826a49SYabin Cui {
2702*01826a49SYabin Cui int i,
2703*01826a49SYabin Cui filenamesStart=0,
2704*01826a49SYabin Cui result;
2705*01826a49SYabin Cui const char* exename=argv[0];
2706*01826a49SYabin Cui const char* input_filename = NULL;
2707*01826a49SYabin Cui const char* dictFileName = NULL;
2708*01826a49SYabin Cui U32 main_pause = 0;
2709*01826a49SYabin Cui int cLevelOpt = 0, cLevelRun = 0;
2710*01826a49SYabin Cui int separateFiles = 0;
2711*01826a49SYabin Cui double compressibility = COMPRESSIBILITY_DEFAULT;
2712*01826a49SYabin Cui U32 memoTableLog = PARAM_UNSET;
2713*01826a49SYabin Cui constraint_t target = { 0, 0, (U32)-1 };
2714*01826a49SYabin Cui
2715*01826a49SYabin Cui paramValues_t paramTarget = emptyParams();
2716*01826a49SYabin Cui g_params = emptyParams();
2717*01826a49SYabin Cui
2718*01826a49SYabin Cui assert(argc>=1); /* for exename */
2719*01826a49SYabin Cui
2720*01826a49SYabin Cui for(i=1; i<argc; i++) {
2721*01826a49SYabin Cui const char* argument = argv[i];
2722*01826a49SYabin Cui DEBUGOUTPUT("%d: %s\n", i, argument);
2723*01826a49SYabin Cui assert(argument != NULL);
2724*01826a49SYabin Cui
2725*01826a49SYabin Cui if(!strcmp(argument,"--no-seed")) { g_noSeed = 1; continue; }
2726*01826a49SYabin Cui
2727*01826a49SYabin Cui if (longCommandWArg(&argument, "--optimize=")) {
2728*01826a49SYabin Cui g_optimizer = 1;
2729*01826a49SYabin Cui for ( ; ;) {
2730*01826a49SYabin Cui if(parse_params(&argument, ¶mTarget)) { if(argument[0] == ',') { argument++; continue; } else break; }
2731*01826a49SYabin Cui PARSE_SUB_ARGS("compressionSpeed=" , "cSpeed=", target.cSpeed);
2732*01826a49SYabin Cui PARSE_SUB_ARGS("decompressionSpeed=", "dSpeed=", target.dSpeed);
2733*01826a49SYabin Cui PARSE_SUB_ARGS("compressionMemory=" , "cMem=", target.cMem);
2734*01826a49SYabin Cui PARSE_SUB_ARGS("strict=", "stc=", g_strictness);
2735*01826a49SYabin Cui PARSE_SUB_ARGS("maxTries=", "tries=", g_maxTries);
2736*01826a49SYabin Cui PARSE_SUB_ARGS("memoLimitLog=", "memLog=", memoTableLog);
2737*01826a49SYabin Cui if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelOpt = (int)readU32FromChar(&argument); g_optmode = 1; if (argument[0]==',') { argument++; continue; } else break; }
2738*01826a49SYabin Cui if (longCommandWArg(&argument, "speedForRatio=") || longCommandWArg(&argument, "speedRatio=")) { g_ratioMultiplier = readDoubleFromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; }
2739*01826a49SYabin Cui
2740*01826a49SYabin Cui DISPLAY("invalid optimization parameter \n");
2741*01826a49SYabin Cui return 1;
2742*01826a49SYabin Cui }
2743*01826a49SYabin Cui
2744*01826a49SYabin Cui if (argument[0] != 0) {
2745*01826a49SYabin Cui DISPLAY("invalid --optimize= format\n");
2746*01826a49SYabin Cui return 1; /* check the end of string */
2747*01826a49SYabin Cui }
2748*01826a49SYabin Cui continue;
2749*01826a49SYabin Cui } else if (longCommandWArg(&argument, "--zstd=")) {
2750*01826a49SYabin Cui /* Decode command (note : aggregated commands are allowed) */
2751*01826a49SYabin Cui g_singleRun = 1;
2752*01826a49SYabin Cui for ( ; ;) {
2753*01826a49SYabin Cui if(parse_params(&argument, &g_params)) { if(argument[0] == ',') { argument++; continue; } else break; }
2754*01826a49SYabin Cui if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelRun = (int)readU32FromChar(&argument); g_params = emptyParams(); if (argument[0]==',') { argument++; continue; } else break; }
2755*01826a49SYabin Cui
2756*01826a49SYabin Cui DISPLAY("invalid compression parameter \n");
2757*01826a49SYabin Cui return 1;
2758*01826a49SYabin Cui }
2759*01826a49SYabin Cui
2760*01826a49SYabin Cui if (argument[0] != 0) {
2761*01826a49SYabin Cui DISPLAY("invalid --zstd= format\n");
2762*01826a49SYabin Cui return 1; /* check the end of string */
2763*01826a49SYabin Cui }
2764*01826a49SYabin Cui continue;
2765*01826a49SYabin Cui /* if not return, success */
2766*01826a49SYabin Cui
2767*01826a49SYabin Cui } else if (longCommandWArg(&argument, "--display=")) {
2768*01826a49SYabin Cui /* Decode command (note : aggregated commands are allowed) */
2769*01826a49SYabin Cui memset(g_silenceParams, 1, sizeof(g_silenceParams));
2770*01826a49SYabin Cui for ( ; ;) {
2771*01826a49SYabin Cui int found = 0;
2772*01826a49SYabin Cui varInds_t v;
2773*01826a49SYabin Cui for(v = 0; v < NUM_PARAMS; v++) {
2774*01826a49SYabin Cui if(longCommandWArg(&argument, g_shortParamNames[v]) || longCommandWArg(&argument, g_paramNames[v])) {
2775*01826a49SYabin Cui g_silenceParams[v] = 0;
2776*01826a49SYabin Cui found = 1;
2777*01826a49SYabin Cui }
2778*01826a49SYabin Cui }
2779*01826a49SYabin Cui if(longCommandWArg(&argument, "compressionParameters") || longCommandWArg(&argument, "cParams")) {
2780*01826a49SYabin Cui for(v = 0; v <= strt_ind; v++) {
2781*01826a49SYabin Cui g_silenceParams[v] = 0;
2782*01826a49SYabin Cui }
2783*01826a49SYabin Cui found = 1;
2784*01826a49SYabin Cui }
2785*01826a49SYabin Cui
2786*01826a49SYabin Cui
2787*01826a49SYabin Cui if(found) {
2788*01826a49SYabin Cui if(argument[0]==',') {
2789*01826a49SYabin Cui continue;
2790*01826a49SYabin Cui } else {
2791*01826a49SYabin Cui break;
2792*01826a49SYabin Cui }
2793*01826a49SYabin Cui }
2794*01826a49SYabin Cui DISPLAY("invalid parameter name parameter \n");
2795*01826a49SYabin Cui return 1;
2796*01826a49SYabin Cui }
2797*01826a49SYabin Cui
2798*01826a49SYabin Cui if (argument[0] != 0) {
2799*01826a49SYabin Cui DISPLAY("invalid --display format\n");
2800*01826a49SYabin Cui return 1; /* check the end of string */
2801*01826a49SYabin Cui }
2802*01826a49SYabin Cui continue;
2803*01826a49SYabin Cui } else if (argument[0]=='-') {
2804*01826a49SYabin Cui argument++;
2805*01826a49SYabin Cui
2806*01826a49SYabin Cui while (argument[0]!=0) {
2807*01826a49SYabin Cui
2808*01826a49SYabin Cui switch(argument[0])
2809*01826a49SYabin Cui {
2810*01826a49SYabin Cui /* Display help on usage */
2811*01826a49SYabin Cui case 'h' :
2812*01826a49SYabin Cui case 'H': usage(exename); usage_advanced(); return 0;
2813*01826a49SYabin Cui
2814*01826a49SYabin Cui /* Pause at the end (hidden option) */
2815*01826a49SYabin Cui case 'p': main_pause = 1; argument++; break;
2816*01826a49SYabin Cui
2817*01826a49SYabin Cui /* Sample compressibility (when no file provided) */
2818*01826a49SYabin Cui case 'P':
2819*01826a49SYabin Cui argument++;
2820*01826a49SYabin Cui { U32 const proba32 = readU32FromChar(&argument);
2821*01826a49SYabin Cui compressibility = (double)proba32 / 100.;
2822*01826a49SYabin Cui }
2823*01826a49SYabin Cui break;
2824*01826a49SYabin Cui
2825*01826a49SYabin Cui /* Run Single conf */
2826*01826a49SYabin Cui case 'S':
2827*01826a49SYabin Cui g_singleRun = 1;
2828*01826a49SYabin Cui argument++;
2829*01826a49SYabin Cui for ( ; ; ) {
2830*01826a49SYabin Cui switch(*argument)
2831*01826a49SYabin Cui {
2832*01826a49SYabin Cui case 'w':
2833*01826a49SYabin Cui argument++;
2834*01826a49SYabin Cui g_params.vals[wlog_ind] = readU32FromChar(&argument);
2835*01826a49SYabin Cui continue;
2836*01826a49SYabin Cui case 'c':
2837*01826a49SYabin Cui argument++;
2838*01826a49SYabin Cui g_params.vals[clog_ind] = readU32FromChar(&argument);
2839*01826a49SYabin Cui continue;
2840*01826a49SYabin Cui case 'h':
2841*01826a49SYabin Cui argument++;
2842*01826a49SYabin Cui g_params.vals[hlog_ind] = readU32FromChar(&argument);
2843*01826a49SYabin Cui continue;
2844*01826a49SYabin Cui case 's':
2845*01826a49SYabin Cui argument++;
2846*01826a49SYabin Cui g_params.vals[slog_ind] = readU32FromChar(&argument);
2847*01826a49SYabin Cui continue;
2848*01826a49SYabin Cui case 'l': /* search length */
2849*01826a49SYabin Cui argument++;
2850*01826a49SYabin Cui g_params.vals[mml_ind] = readU32FromChar(&argument);
2851*01826a49SYabin Cui continue;
2852*01826a49SYabin Cui case 't': /* target length */
2853*01826a49SYabin Cui argument++;
2854*01826a49SYabin Cui g_params.vals[tlen_ind] = readU32FromChar(&argument);
2855*01826a49SYabin Cui continue;
2856*01826a49SYabin Cui case 'S': /* strategy */
2857*01826a49SYabin Cui argument++;
2858*01826a49SYabin Cui g_params.vals[strt_ind] = readU32FromChar(&argument);
2859*01826a49SYabin Cui continue;
2860*01826a49SYabin Cui case 'f': /* forceAttachDict */
2861*01826a49SYabin Cui argument++;
2862*01826a49SYabin Cui g_params.vals[fadt_ind] = readU32FromChar(&argument);
2863*01826a49SYabin Cui continue;
2864*01826a49SYabin Cui case 'L':
2865*01826a49SYabin Cui { argument++;
2866*01826a49SYabin Cui cLevelRun = (int)readU32FromChar(&argument);
2867*01826a49SYabin Cui g_params = emptyParams();
2868*01826a49SYabin Cui continue;
2869*01826a49SYabin Cui }
2870*01826a49SYabin Cui default : ;
2871*01826a49SYabin Cui }
2872*01826a49SYabin Cui break;
2873*01826a49SYabin Cui }
2874*01826a49SYabin Cui
2875*01826a49SYabin Cui break;
2876*01826a49SYabin Cui
2877*01826a49SYabin Cui /* target level1 speed objective, in MB/s */
2878*01826a49SYabin Cui case 'T':
2879*01826a49SYabin Cui argument++;
2880*01826a49SYabin Cui g_target = readU32FromChar(&argument);
2881*01826a49SYabin Cui break;
2882*01826a49SYabin Cui
2883*01826a49SYabin Cui /* cut input into blocks */
2884*01826a49SYabin Cui case 'B':
2885*01826a49SYabin Cui argument++;
2886*01826a49SYabin Cui g_blockSize = readU32FromChar(&argument);
2887*01826a49SYabin Cui DISPLAY("using %u KB block size \n", (unsigned)(g_blockSize>>10));
2888*01826a49SYabin Cui break;
2889*01826a49SYabin Cui
2890*01826a49SYabin Cui /* caps runtime (in seconds) */
2891*01826a49SYabin Cui case 't':
2892*01826a49SYabin Cui argument++;
2893*01826a49SYabin Cui g_timeLimit_s = readU32FromChar(&argument);
2894*01826a49SYabin Cui break;
2895*01826a49SYabin Cui
2896*01826a49SYabin Cui case 's':
2897*01826a49SYabin Cui argument++;
2898*01826a49SYabin Cui separateFiles = 1;
2899*01826a49SYabin Cui break;
2900*01826a49SYabin Cui
2901*01826a49SYabin Cui case 'q':
2902*01826a49SYabin Cui while (argument[0] == 'q') { argument++; g_displayLevel--; }
2903*01826a49SYabin Cui break;
2904*01826a49SYabin Cui
2905*01826a49SYabin Cui case 'v':
2906*01826a49SYabin Cui while (argument[0] == 'v') { argument++; g_displayLevel++; }
2907*01826a49SYabin Cui break;
2908*01826a49SYabin Cui
2909*01826a49SYabin Cui /* load dictionary file (only applicable for optimizer rn) */
2910*01826a49SYabin Cui case 'D':
2911*01826a49SYabin Cui if(i == argc - 1) { /* last argument, return error. */
2912*01826a49SYabin Cui DISPLAY("Dictionary file expected but not given : %d\n", i);
2913*01826a49SYabin Cui return 1;
2914*01826a49SYabin Cui } else {
2915*01826a49SYabin Cui i++;
2916*01826a49SYabin Cui dictFileName = argv[i];
2917*01826a49SYabin Cui argument += strlen(argument);
2918*01826a49SYabin Cui }
2919*01826a49SYabin Cui break;
2920*01826a49SYabin Cui
2921*01826a49SYabin Cui /* Unknown command */
2922*01826a49SYabin Cui default : return badusage(exename);
2923*01826a49SYabin Cui }
2924*01826a49SYabin Cui }
2925*01826a49SYabin Cui continue;
2926*01826a49SYabin Cui } /* if (argument[0]=='-') */
2927*01826a49SYabin Cui
2928*01826a49SYabin Cui /* first provided filename is input */
2929*01826a49SYabin Cui if (!input_filename) { input_filename=argument; filenamesStart=i; continue; }
2930*01826a49SYabin Cui }
2931*01826a49SYabin Cui
2932*01826a49SYabin Cui /* Welcome message */
2933*01826a49SYabin Cui DISPLAYLEVEL(2, WELCOME_MESSAGE);
2934*01826a49SYabin Cui
2935*01826a49SYabin Cui if (filenamesStart==0) {
2936*01826a49SYabin Cui if (g_optimizer) {
2937*01826a49SYabin Cui DISPLAY("Optimizer Expects File\n");
2938*01826a49SYabin Cui return 1;
2939*01826a49SYabin Cui } else {
2940*01826a49SYabin Cui result = benchSample(compressibility, cLevelRun);
2941*01826a49SYabin Cui }
2942*01826a49SYabin Cui } else {
2943*01826a49SYabin Cui if(separateFiles) {
2944*01826a49SYabin Cui for(i = 0; i < argc - filenamesStart; i++) {
2945*01826a49SYabin Cui if (g_optimizer) {
2946*01826a49SYabin Cui result = optimizeForSize(argv+filenamesStart + i, 1, dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog);
2947*01826a49SYabin Cui if(result) { DISPLAY("Error on File %d", i); return result; }
2948*01826a49SYabin Cui } else {
2949*01826a49SYabin Cui result = benchFiles(argv+filenamesStart + i, 1, dictFileName, cLevelRun);
2950*01826a49SYabin Cui if(result) { DISPLAY("Error on File %d", i); return result; }
2951*01826a49SYabin Cui }
2952*01826a49SYabin Cui }
2953*01826a49SYabin Cui } else {
2954*01826a49SYabin Cui if (g_optimizer) {
2955*01826a49SYabin Cui assert(filenamesStart < argc);
2956*01826a49SYabin Cui result = optimizeForSize(argv+filenamesStart, (size_t)(argc-filenamesStart), dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog);
2957*01826a49SYabin Cui } else {
2958*01826a49SYabin Cui result = benchFiles(argv+filenamesStart, argc-filenamesStart, dictFileName, cLevelRun);
2959*01826a49SYabin Cui }
2960*01826a49SYabin Cui }
2961*01826a49SYabin Cui }
2962*01826a49SYabin Cui
2963*01826a49SYabin Cui if (main_pause) { int unused; printf("press enter...\n"); unused = getchar(); (void)unused; }
2964*01826a49SYabin Cui
2965*01826a49SYabin Cui return result;
2966*01826a49SYabin Cui }
2967