xref: /aosp_15_r20/external/zstd/programs/benchzstd.c (revision 01826a4963a0d8a59bc3812d29bdf0fb76416722)
1*01826a49SYabin Cui /*
2*01826a49SYabin Cui  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*01826a49SYabin Cui  * All rights reserved.
4*01826a49SYabin Cui  *
5*01826a49SYabin Cui  * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui  * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui  * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui  */
10*01826a49SYabin Cui 
11*01826a49SYabin Cui /* **************************************
12*01826a49SYabin Cui  *  Tuning parameters
13*01826a49SYabin Cui  ****************************************/
14*01826a49SYabin Cui #ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */
15*01826a49SYabin Cui #    define BMK_TIMETEST_DEFAULT_S 3
16*01826a49SYabin Cui #endif
17*01826a49SYabin Cui 
18*01826a49SYabin Cui /* *************************************
19*01826a49SYabin Cui  *  Includes
20*01826a49SYabin Cui  ***************************************/
21*01826a49SYabin Cui /* this must be included first */
22*01826a49SYabin Cui #include "platform.h" /* Large Files support, compiler specifics */
23*01826a49SYabin Cui 
24*01826a49SYabin Cui /* then following system includes */
25*01826a49SYabin Cui #include <assert.h> /* assert */
26*01826a49SYabin Cui #include <errno.h>
27*01826a49SYabin Cui #include <stdio.h>    /* fprintf, fopen */
28*01826a49SYabin Cui #include <stdlib.h>   /* malloc, free */
29*01826a49SYabin Cui #include <string.h>   /* memset, strerror */
30*01826a49SYabin Cui #include "util.h"     /* UTIL_getFileSize, UTIL_sleep */
31*01826a49SYabin Cui #include "../lib/common/mem.h"
32*01826a49SYabin Cui #include "benchfn.h"
33*01826a49SYabin Cui #include "timefn.h" /* UTIL_time_t */
34*01826a49SYabin Cui #ifndef ZSTD_STATIC_LINKING_ONLY
35*01826a49SYabin Cui #    define ZSTD_STATIC_LINKING_ONLY
36*01826a49SYabin Cui #endif
37*01826a49SYabin Cui #include "../lib/zstd.h"
38*01826a49SYabin Cui #include "datagen.h" /* RDG_genBuffer */
39*01826a49SYabin Cui #include "lorem.h"   /* LOREM_genBuffer */
40*01826a49SYabin Cui #ifndef XXH_INLINE_ALL
41*01826a49SYabin Cui #    define XXH_INLINE_ALL
42*01826a49SYabin Cui #endif
43*01826a49SYabin Cui #include "../lib/common/xxhash.h"
44*01826a49SYabin Cui #include "../lib/zstd_errors.h"
45*01826a49SYabin Cui #include "benchzstd.h"
46*01826a49SYabin Cui 
47*01826a49SYabin Cui /* *************************************
48*01826a49SYabin Cui  *  Constants
49*01826a49SYabin Cui  ***************************************/
50*01826a49SYabin Cui #ifndef ZSTD_GIT_COMMIT
51*01826a49SYabin Cui #    define ZSTD_GIT_COMMIT_STRING ""
52*01826a49SYabin Cui #else
53*01826a49SYabin Cui #    define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
54*01826a49SYabin Cui #endif
55*01826a49SYabin Cui 
56*01826a49SYabin Cui #define TIMELOOP_MICROSEC (1 * 1000000ULL)             /* 1 second */
57*01826a49SYabin Cui #define TIMELOOP_NANOSEC (1 * 1000000000ULL)           /* 1 second */
58*01826a49SYabin Cui #define ACTIVEPERIOD_MICROSEC (70 * TIMELOOP_MICROSEC) /* 70 seconds */
59*01826a49SYabin Cui #define COOLPERIOD_SEC 10
60*01826a49SYabin Cui 
61*01826a49SYabin Cui #define KB *(1 << 10)
62*01826a49SYabin Cui #define MB *(1 << 20)
63*01826a49SYabin Cui #define GB *(1U << 30)
64*01826a49SYabin Cui 
65*01826a49SYabin Cui #define BMK_RUNTEST_DEFAULT_MS 1000
66*01826a49SYabin Cui 
67*01826a49SYabin Cui static const size_t maxMemory = (sizeof(size_t) == 4)
68*01826a49SYabin Cui         ?
69*01826a49SYabin Cui         /* 32-bit */ (2 GB - 64 MB)
70*01826a49SYabin Cui         :
71*01826a49SYabin Cui         /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t) * 8) - 31));
72*01826a49SYabin Cui 
73*01826a49SYabin Cui /* *************************************
74*01826a49SYabin Cui  *  console display
75*01826a49SYabin Cui  ***************************************/
76*01826a49SYabin Cui #define DISPLAY(...)                  \
77*01826a49SYabin Cui     {                                 \
78*01826a49SYabin Cui         fprintf(stderr, __VA_ARGS__); \
79*01826a49SYabin Cui         fflush(NULL);                 \
80*01826a49SYabin Cui     }
81*01826a49SYabin Cui #define DISPLAYLEVEL(l, ...)  \
82*01826a49SYabin Cui     if (displayLevel >= l) {  \
83*01826a49SYabin Cui         DISPLAY(__VA_ARGS__); \
84*01826a49SYabin Cui     }
85*01826a49SYabin Cui /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : +
86*01826a49SYabin Cui  * progression;   4 : + information */
87*01826a49SYabin Cui #define OUTPUT(...)                   \
88*01826a49SYabin Cui     {                                 \
89*01826a49SYabin Cui         fprintf(stdout, __VA_ARGS__); \
90*01826a49SYabin Cui         fflush(NULL);                 \
91*01826a49SYabin Cui     }
92*01826a49SYabin Cui #define OUTPUTLEVEL(l, ...)  \
93*01826a49SYabin Cui     if (displayLevel >= l) { \
94*01826a49SYabin Cui         OUTPUT(__VA_ARGS__); \
95*01826a49SYabin Cui     }
96*01826a49SYabin Cui 
97*01826a49SYabin Cui /* *************************************
98*01826a49SYabin Cui  *  Exceptions
99*01826a49SYabin Cui  ***************************************/
100*01826a49SYabin Cui #ifndef DEBUG
101*01826a49SYabin Cui #    define DEBUG 0
102*01826a49SYabin Cui #endif
103*01826a49SYabin Cui #define DEBUGOUTPUT(...)          \
104*01826a49SYabin Cui     {                             \
105*01826a49SYabin Cui         if (DEBUG)                \
106*01826a49SYabin Cui             DISPLAY(__VA_ARGS__); \
107*01826a49SYabin Cui     }
108*01826a49SYabin Cui 
109*01826a49SYabin Cui #define RETURN_ERROR_INT(errorNum, ...)                \
110*01826a49SYabin Cui     {                                                  \
111*01826a49SYabin Cui         DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
112*01826a49SYabin Cui         DISPLAYLEVEL(1, "Error %i : ", errorNum);      \
113*01826a49SYabin Cui         DISPLAYLEVEL(1, __VA_ARGS__);                  \
114*01826a49SYabin Cui         DISPLAYLEVEL(1, " \n");                        \
115*01826a49SYabin Cui         return errorNum;                               \
116*01826a49SYabin Cui     }
117*01826a49SYabin Cui 
118*01826a49SYabin Cui #define CHECK_Z(zf)                                                  \
119*01826a49SYabin Cui     {                                                                \
120*01826a49SYabin Cui         size_t const zerr = zf;                                      \
121*01826a49SYabin Cui         if (ZSTD_isError(zerr)) {                                    \
122*01826a49SYabin Cui             DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);           \
123*01826a49SYabin Cui             DISPLAY("Error : ");                                     \
124*01826a49SYabin Cui             DISPLAY("%s failed : %s", #zf, ZSTD_getErrorName(zerr)); \
125*01826a49SYabin Cui             DISPLAY(" \n");                                          \
126*01826a49SYabin Cui             exit(1);                                                 \
127*01826a49SYabin Cui         }                                                            \
128*01826a49SYabin Cui     }
129*01826a49SYabin Cui 
130*01826a49SYabin Cui #define RETURN_ERROR(errorNum, retType, ...)           \
131*01826a49SYabin Cui     {                                                  \
132*01826a49SYabin Cui         retType r;                                     \
133*01826a49SYabin Cui         memset(&r, 0, sizeof(retType));                \
134*01826a49SYabin Cui         DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
135*01826a49SYabin Cui         DISPLAYLEVEL(1, "Error %i : ", errorNum);      \
136*01826a49SYabin Cui         DISPLAYLEVEL(1, __VA_ARGS__);                  \
137*01826a49SYabin Cui         DISPLAYLEVEL(1, " \n");                        \
138*01826a49SYabin Cui         r.tag = errorNum;                              \
139*01826a49SYabin Cui         return r;                                      \
140*01826a49SYabin Cui     }
141*01826a49SYabin Cui 
142*01826a49SYabin Cui /* replacement for snprintf(), which is not supported by C89
143*01826a49SYabin Cui  * sprintf() would be the supported one, but it's labelled unsafe,
144*01826a49SYabin Cui  * so some modern static analyzer will flag it as such, making it unusable.
145*01826a49SYabin Cui  * formatString_u() replaces snprintf() for the specific case where there are only %u arguments */
formatString_u(char * buffer,size_t buffer_size,const char * formatString,unsigned int value)146*01826a49SYabin Cui static int formatString_u(char* buffer, size_t buffer_size, const char* formatString, unsigned int value)
147*01826a49SYabin Cui {
148*01826a49SYabin Cui     size_t written = 0;
149*01826a49SYabin Cui     int i;
150*01826a49SYabin Cui     assert(value <= 100);
151*01826a49SYabin Cui 
152*01826a49SYabin Cui     for (i = 0; formatString[i] != '\0' && written < buffer_size - 1; ++i) {
153*01826a49SYabin Cui         if (formatString[i] != '%') {
154*01826a49SYabin Cui             buffer[written++] = formatString[i];
155*01826a49SYabin Cui             continue;
156*01826a49SYabin Cui         }
157*01826a49SYabin Cui 
158*01826a49SYabin Cui         if (formatString[++i] == 'u') {
159*01826a49SYabin Cui             /* Handle single digit */
160*01826a49SYabin Cui             if (value < 10) {
161*01826a49SYabin Cui                 buffer[written++] = '0' + (char)value;
162*01826a49SYabin Cui             } else if (value < 100) {
163*01826a49SYabin Cui                 /* Handle two digits */
164*01826a49SYabin Cui                 if (written >= buffer_size - 2) {
165*01826a49SYabin Cui                     return -1; /* buffer overflow */
166*01826a49SYabin Cui                 }
167*01826a49SYabin Cui                 buffer[written++] = '0' + (char)(value / 10);
168*01826a49SYabin Cui                 buffer[written++] = '0' + (char)(value % 10);
169*01826a49SYabin Cui             } else { /* 100 */
170*01826a49SYabin Cui                 if (written >= buffer_size - 3) {
171*01826a49SYabin Cui                     return -1; /* buffer overflow */
172*01826a49SYabin Cui                 }
173*01826a49SYabin Cui                 buffer[written++] = '1';
174*01826a49SYabin Cui                 buffer[written++] = '0';
175*01826a49SYabin Cui                 buffer[written++] = '0';
176*01826a49SYabin Cui             }
177*01826a49SYabin Cui         } else if (formatString[i] == '%') { /* Check for escaped percent sign */
178*01826a49SYabin Cui             buffer[written++] = '%';
179*01826a49SYabin Cui         } else {
180*01826a49SYabin Cui             return -1; /* unsupported format */
181*01826a49SYabin Cui         }
182*01826a49SYabin Cui     }
183*01826a49SYabin Cui 
184*01826a49SYabin Cui     if (written < buffer_size) {
185*01826a49SYabin Cui         buffer[written] = '\0';
186*01826a49SYabin Cui     } else {
187*01826a49SYabin Cui         buffer[0] = '\0'; /* Handle truncation */
188*01826a49SYabin Cui     }
189*01826a49SYabin Cui 
190*01826a49SYabin Cui     return (int)written;
191*01826a49SYabin Cui }
192*01826a49SYabin Cui 
193*01826a49SYabin Cui /* *************************************
194*01826a49SYabin Cui  *  Benchmark Parameters
195*01826a49SYabin Cui  ***************************************/
196*01826a49SYabin Cui 
BMK_initAdvancedParams(void)197*01826a49SYabin Cui BMK_advancedParams_t BMK_initAdvancedParams(void)
198*01826a49SYabin Cui {
199*01826a49SYabin Cui     BMK_advancedParams_t const res = {
200*01826a49SYabin Cui         BMK_both,               /* mode */
201*01826a49SYabin Cui         BMK_TIMETEST_DEFAULT_S, /* nbSeconds */
202*01826a49SYabin Cui         0,                      /* blockSize */
203*01826a49SYabin Cui         0,               /* targetCBlockSize */
204*01826a49SYabin Cui         0,                      /* nbWorkers */
205*01826a49SYabin Cui         0,                      /* realTime */
206*01826a49SYabin Cui         0,                      /* additionalParam */
207*01826a49SYabin Cui         0,                      /* ldmFlag */
208*01826a49SYabin Cui         0,                      /* ldmMinMatch */
209*01826a49SYabin Cui         0,                      /* ldmHashLog */
210*01826a49SYabin Cui         0,                      /* ldmBuckSizeLog */
211*01826a49SYabin Cui         0,                      /* ldmHashRateLog */
212*01826a49SYabin Cui         ZSTD_ps_auto,           /* literalCompressionMode */
213*01826a49SYabin Cui         0                       /* useRowMatchFinder */
214*01826a49SYabin Cui     };
215*01826a49SYabin Cui     return res;
216*01826a49SYabin Cui }
217*01826a49SYabin Cui 
218*01826a49SYabin Cui /* ********************************************************
219*01826a49SYabin Cui  *  Bench functions
220*01826a49SYabin Cui  **********************************************************/
221*01826a49SYabin Cui typedef struct {
222*01826a49SYabin Cui     const void* srcPtr;
223*01826a49SYabin Cui     size_t srcSize;
224*01826a49SYabin Cui     void* cPtr;
225*01826a49SYabin Cui     size_t cRoom;
226*01826a49SYabin Cui     size_t cSize;
227*01826a49SYabin Cui     void* resPtr;
228*01826a49SYabin Cui     size_t resSize;
229*01826a49SYabin Cui } blockParam_t;
230*01826a49SYabin Cui 
231*01826a49SYabin Cui #undef MIN
232*01826a49SYabin Cui #undef MAX
233*01826a49SYabin Cui #define MIN(a, b) ((a) < (b) ? (a) : (b))
234*01826a49SYabin Cui #define MAX(a, b) ((a) > (b) ? (a) : (b))
235*01826a49SYabin Cui 
BMK_initCCtx(ZSTD_CCtx * ctx,const void * dictBuffer,size_t dictBufferSize,int cLevel,const ZSTD_compressionParameters * comprParams,const BMK_advancedParams_t * adv)236*01826a49SYabin Cui static void BMK_initCCtx(
237*01826a49SYabin Cui         ZSTD_CCtx* ctx,
238*01826a49SYabin Cui         const void* dictBuffer,
239*01826a49SYabin Cui         size_t dictBufferSize,
240*01826a49SYabin Cui         int cLevel,
241*01826a49SYabin Cui         const ZSTD_compressionParameters* comprParams,
242*01826a49SYabin Cui         const BMK_advancedParams_t* adv)
243*01826a49SYabin Cui {
244*01826a49SYabin Cui     ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters);
245*01826a49SYabin Cui     if (adv->nbWorkers == 1) {
246*01826a49SYabin Cui         CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0));
247*01826a49SYabin Cui     } else {
248*01826a49SYabin Cui         CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers));
249*01826a49SYabin Cui     }
250*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel));
251*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(
252*01826a49SYabin Cui             ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder));
253*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(
254*01826a49SYabin Cui             ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag));
255*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch));
256*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
257*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(
258*01826a49SYabin Cui             ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog));
259*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(
260*01826a49SYabin Cui             ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog));
261*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(
262*01826a49SYabin Cui             ctx, ZSTD_c_windowLog, (int)comprParams->windowLog));
263*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(
264*01826a49SYabin Cui             ctx, ZSTD_c_hashLog, (int)comprParams->hashLog));
265*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(
266*01826a49SYabin Cui             ctx, ZSTD_c_chainLog, (int)comprParams->chainLog));
267*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(
268*01826a49SYabin Cui             ctx, ZSTD_c_searchLog, (int)comprParams->searchLog));
269*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(
270*01826a49SYabin Cui             ctx, ZSTD_c_minMatch, (int)comprParams->minMatch));
271*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(
272*01826a49SYabin Cui             ctx, ZSTD_c_targetLength, (int)comprParams->targetLength));
273*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(
274*01826a49SYabin Cui             ctx,
275*01826a49SYabin Cui             ZSTD_c_literalCompressionMode,
276*01826a49SYabin Cui             (int)adv->literalCompressionMode));
277*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(
278*01826a49SYabin Cui             ctx, ZSTD_c_strategy, (int)comprParams->strategy));
279*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_setParameter(
280*01826a49SYabin Cui             ctx, ZSTD_c_targetCBlockSize, (int)adv->targetCBlockSize));
281*01826a49SYabin Cui     CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize));
282*01826a49SYabin Cui }
283*01826a49SYabin Cui 
284*01826a49SYabin Cui static void
BMK_initDCtx(ZSTD_DCtx * dctx,const void * dictBuffer,size_t dictBufferSize)285*01826a49SYabin Cui BMK_initDCtx(ZSTD_DCtx* dctx, const void* dictBuffer, size_t dictBufferSize)
286*01826a49SYabin Cui {
287*01826a49SYabin Cui     CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
288*01826a49SYabin Cui     CHECK_Z(ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize));
289*01826a49SYabin Cui }
290*01826a49SYabin Cui 
291*01826a49SYabin Cui typedef struct {
292*01826a49SYabin Cui     ZSTD_CCtx* cctx;
293*01826a49SYabin Cui     const void* dictBuffer;
294*01826a49SYabin Cui     size_t dictBufferSize;
295*01826a49SYabin Cui     int cLevel;
296*01826a49SYabin Cui     const ZSTD_compressionParameters* comprParams;
297*01826a49SYabin Cui     const BMK_advancedParams_t* adv;
298*01826a49SYabin Cui } BMK_initCCtxArgs;
299*01826a49SYabin Cui 
local_initCCtx(void * payload)300*01826a49SYabin Cui static size_t local_initCCtx(void* payload)
301*01826a49SYabin Cui {
302*01826a49SYabin Cui     BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload;
303*01826a49SYabin Cui     BMK_initCCtx(
304*01826a49SYabin Cui             ag->cctx,
305*01826a49SYabin Cui             ag->dictBuffer,
306*01826a49SYabin Cui             ag->dictBufferSize,
307*01826a49SYabin Cui             ag->cLevel,
308*01826a49SYabin Cui             ag->comprParams,
309*01826a49SYabin Cui             ag->adv);
310*01826a49SYabin Cui     return 0;
311*01826a49SYabin Cui }
312*01826a49SYabin Cui 
313*01826a49SYabin Cui typedef struct {
314*01826a49SYabin Cui     ZSTD_DCtx* dctx;
315*01826a49SYabin Cui     const void* dictBuffer;
316*01826a49SYabin Cui     size_t dictBufferSize;
317*01826a49SYabin Cui } BMK_initDCtxArgs;
318*01826a49SYabin Cui 
local_initDCtx(void * payload)319*01826a49SYabin Cui static size_t local_initDCtx(void* payload)
320*01826a49SYabin Cui {
321*01826a49SYabin Cui     BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload;
322*01826a49SYabin Cui     BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
323*01826a49SYabin Cui     return 0;
324*01826a49SYabin Cui }
325*01826a49SYabin Cui 
326*01826a49SYabin Cui /* `addArgs` is the context */
local_defaultCompress(const void * srcBuffer,size_t srcSize,void * dstBuffer,size_t dstSize,void * addArgs)327*01826a49SYabin Cui static size_t local_defaultCompress(
328*01826a49SYabin Cui         const void* srcBuffer,
329*01826a49SYabin Cui         size_t srcSize,
330*01826a49SYabin Cui         void* dstBuffer,
331*01826a49SYabin Cui         size_t dstSize,
332*01826a49SYabin Cui         void* addArgs)
333*01826a49SYabin Cui {
334*01826a49SYabin Cui     ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs;
335*01826a49SYabin Cui     return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize);
336*01826a49SYabin Cui }
337*01826a49SYabin Cui 
338*01826a49SYabin Cui /* `addArgs` is the context */
local_defaultDecompress(const void * srcBuffer,size_t srcSize,void * dstBuffer,size_t dstCapacity,void * addArgs)339*01826a49SYabin Cui static size_t local_defaultDecompress(
340*01826a49SYabin Cui         const void* srcBuffer,
341*01826a49SYabin Cui         size_t srcSize,
342*01826a49SYabin Cui         void* dstBuffer,
343*01826a49SYabin Cui         size_t dstCapacity,
344*01826a49SYabin Cui         void* addArgs)
345*01826a49SYabin Cui {
346*01826a49SYabin Cui     size_t moreToFlush    = 1;
347*01826a49SYabin Cui     ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs;
348*01826a49SYabin Cui     ZSTD_inBuffer in;
349*01826a49SYabin Cui     ZSTD_outBuffer out;
350*01826a49SYabin Cui     in.src   = srcBuffer;
351*01826a49SYabin Cui     in.size  = srcSize;
352*01826a49SYabin Cui     in.pos   = 0;
353*01826a49SYabin Cui     out.dst  = dstBuffer;
354*01826a49SYabin Cui     out.size = dstCapacity;
355*01826a49SYabin Cui     out.pos  = 0;
356*01826a49SYabin Cui     while (moreToFlush) {
357*01826a49SYabin Cui         if (out.pos == out.size) {
358*01826a49SYabin Cui             return (size_t)-ZSTD_error_dstSize_tooSmall;
359*01826a49SYabin Cui         }
360*01826a49SYabin Cui         moreToFlush = ZSTD_decompressStream(dctx, &out, &in);
361*01826a49SYabin Cui         if (ZSTD_isError(moreToFlush)) {
362*01826a49SYabin Cui             return moreToFlush;
363*01826a49SYabin Cui         }
364*01826a49SYabin Cui     }
365*01826a49SYabin Cui     return out.pos;
366*01826a49SYabin Cui }
367*01826a49SYabin Cui 
368*01826a49SYabin Cui /* ================================================================= */
369*01826a49SYabin Cui /*      Benchmark Zstandard, mem-to-mem scenarios                    */
370*01826a49SYabin Cui /* ================================================================= */
371*01826a49SYabin Cui 
BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome)372*01826a49SYabin Cui int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome)
373*01826a49SYabin Cui {
374*01826a49SYabin Cui     return outcome.tag == 0;
375*01826a49SYabin Cui }
376*01826a49SYabin Cui 
BMK_extract_benchResult(BMK_benchOutcome_t outcome)377*01826a49SYabin Cui BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome)
378*01826a49SYabin Cui {
379*01826a49SYabin Cui     assert(outcome.tag == 0);
380*01826a49SYabin Cui     return outcome.internal_never_use_directly;
381*01826a49SYabin Cui }
382*01826a49SYabin Cui 
BMK_benchOutcome_error(void)383*01826a49SYabin Cui static BMK_benchOutcome_t BMK_benchOutcome_error(void)
384*01826a49SYabin Cui {
385*01826a49SYabin Cui     BMK_benchOutcome_t b;
386*01826a49SYabin Cui     memset(&b, 0, sizeof(b));
387*01826a49SYabin Cui     b.tag = 1;
388*01826a49SYabin Cui     return b;
389*01826a49SYabin Cui }
390*01826a49SYabin Cui 
BMK_benchOutcome_setValidResult(BMK_benchResult_t result)391*01826a49SYabin Cui static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(
392*01826a49SYabin Cui         BMK_benchResult_t result)
393*01826a49SYabin Cui {
394*01826a49SYabin Cui     BMK_benchOutcome_t b;
395*01826a49SYabin Cui     b.tag                         = 0;
396*01826a49SYabin Cui     b.internal_never_use_directly = result;
397*01826a49SYabin Cui     return b;
398*01826a49SYabin Cui }
399*01826a49SYabin Cui 
400*01826a49SYabin Cui /* benchMem with no allocation */
BMK_benchMemAdvancedNoAlloc(const void ** srcPtrs,size_t * srcSizes,void ** cPtrs,size_t * cCapacities,size_t * cSizes,void ** resPtrs,size_t * resSizes,void ** resultBufferPtr,void * compressedBuffer,size_t maxCompressedSize,BMK_timedFnState_t * timeStateCompress,BMK_timedFnState_t * timeStateDecompress,const void * srcBuffer,size_t srcSize,const size_t * fileSizes,unsigned nbFiles,const int cLevel,const ZSTD_compressionParameters * comprParams,const void * dictBuffer,size_t dictBufferSize,ZSTD_CCtx * cctx,ZSTD_DCtx * dctx,int displayLevel,const char * displayName,const BMK_advancedParams_t * adv)401*01826a49SYabin Cui static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
402*01826a49SYabin Cui         const void** srcPtrs,
403*01826a49SYabin Cui         size_t* srcSizes,
404*01826a49SYabin Cui         void** cPtrs,
405*01826a49SYabin Cui         size_t* cCapacities,
406*01826a49SYabin Cui         size_t* cSizes,
407*01826a49SYabin Cui         void** resPtrs,
408*01826a49SYabin Cui         size_t* resSizes,
409*01826a49SYabin Cui         void** resultBufferPtr,
410*01826a49SYabin Cui         void* compressedBuffer,
411*01826a49SYabin Cui         size_t maxCompressedSize,
412*01826a49SYabin Cui         BMK_timedFnState_t* timeStateCompress,
413*01826a49SYabin Cui         BMK_timedFnState_t* timeStateDecompress,
414*01826a49SYabin Cui 
415*01826a49SYabin Cui         const void* srcBuffer,
416*01826a49SYabin Cui         size_t srcSize,
417*01826a49SYabin Cui         const size_t* fileSizes,
418*01826a49SYabin Cui         unsigned nbFiles,
419*01826a49SYabin Cui         const int cLevel,
420*01826a49SYabin Cui         const ZSTD_compressionParameters* comprParams,
421*01826a49SYabin Cui         const void* dictBuffer,
422*01826a49SYabin Cui         size_t dictBufferSize,
423*01826a49SYabin Cui         ZSTD_CCtx* cctx,
424*01826a49SYabin Cui         ZSTD_DCtx* dctx,
425*01826a49SYabin Cui         int displayLevel,
426*01826a49SYabin Cui         const char* displayName,
427*01826a49SYabin Cui         const BMK_advancedParams_t* adv)
428*01826a49SYabin Cui {
429*01826a49SYabin Cui     size_t const blockSize =
430*01826a49SYabin Cui             ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly))
431*01826a49SYabin Cui                      ? adv->blockSize
432*01826a49SYabin Cui                      : srcSize)
433*01826a49SYabin Cui             + (!srcSize); /* avoid div by 0 */
434*01826a49SYabin Cui     BMK_benchResult_t benchResult;
435*01826a49SYabin Cui     size_t const loadedCompressedSize = srcSize;
436*01826a49SYabin Cui     size_t cSize                      = 0;
437*01826a49SYabin Cui     double ratio                      = 0.;
438*01826a49SYabin Cui     U32 nbBlocks;
439*01826a49SYabin Cui 
440*01826a49SYabin Cui     assert(cctx != NULL);
441*01826a49SYabin Cui     assert(dctx != NULL);
442*01826a49SYabin Cui 
443*01826a49SYabin Cui     /* init */
444*01826a49SYabin Cui     memset(&benchResult, 0, sizeof(benchResult));
445*01826a49SYabin Cui     if (strlen(displayName) > 17)
446*01826a49SYabin Cui         displayName +=
447*01826a49SYabin Cui                 strlen(displayName) - 17; /* display last 17 characters */
448*01826a49SYabin Cui     if (adv->mode == BMK_decodeOnly) {
449*01826a49SYabin Cui         /* benchmark only decompression : source must be already compressed */
450*01826a49SYabin Cui         const char* srcPtr = (const char*)srcBuffer;
451*01826a49SYabin Cui         U64 totalDSize64   = 0;
452*01826a49SYabin Cui         U32 fileNb;
453*01826a49SYabin Cui         for (fileNb = 0; fileNb < nbFiles; fileNb++) {
454*01826a49SYabin Cui             U64 const fSize64 =
455*01826a49SYabin Cui                     ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]);
456*01826a49SYabin Cui             if (fSize64 == ZSTD_CONTENTSIZE_UNKNOWN) {
457*01826a49SYabin Cui                 RETURN_ERROR(
458*01826a49SYabin Cui                         32,
459*01826a49SYabin Cui                         BMK_benchOutcome_t,
460*01826a49SYabin Cui                         "Decompressed size cannot be determined: cannot benchmark");
461*01826a49SYabin Cui             }
462*01826a49SYabin Cui             if (fSize64 == ZSTD_CONTENTSIZE_ERROR) {
463*01826a49SYabin Cui                 RETURN_ERROR(
464*01826a49SYabin Cui                         32,
465*01826a49SYabin Cui                         BMK_benchOutcome_t,
466*01826a49SYabin Cui                         "Error while trying to assess decompressed size: data may be invalid");
467*01826a49SYabin Cui             }
468*01826a49SYabin Cui             totalDSize64 += fSize64;
469*01826a49SYabin Cui             srcPtr += fileSizes[fileNb];
470*01826a49SYabin Cui         }
471*01826a49SYabin Cui         {
472*01826a49SYabin Cui             size_t const decodedSize = (size_t)totalDSize64;
473*01826a49SYabin Cui             assert((U64)decodedSize == totalDSize64); /* check overflow */
474*01826a49SYabin Cui             free(*resultBufferPtr);
475*01826a49SYabin Cui             if (totalDSize64 > decodedSize) { /* size_t overflow */
476*01826a49SYabin Cui                 RETURN_ERROR(
477*01826a49SYabin Cui                         32,
478*01826a49SYabin Cui                         BMK_benchOutcome_t,
479*01826a49SYabin Cui                         "decompressed size is too large for local system");
480*01826a49SYabin Cui             }
481*01826a49SYabin Cui             *resultBufferPtr = malloc(decodedSize);
482*01826a49SYabin Cui             if (!(*resultBufferPtr)) {
483*01826a49SYabin Cui                 RETURN_ERROR(
484*01826a49SYabin Cui                         33,
485*01826a49SYabin Cui                         BMK_benchOutcome_t,
486*01826a49SYabin Cui                         "allocation error: not enough memory");
487*01826a49SYabin Cui             }
488*01826a49SYabin Cui             cSize   = srcSize;
489*01826a49SYabin Cui             srcSize = decodedSize;
490*01826a49SYabin Cui             ratio   = (double)srcSize / (double)cSize;
491*01826a49SYabin Cui         }
492*01826a49SYabin Cui     }
493*01826a49SYabin Cui 
494*01826a49SYabin Cui     /* Init data blocks  */
495*01826a49SYabin Cui     {
496*01826a49SYabin Cui         const char* srcPtr = (const char*)srcBuffer;
497*01826a49SYabin Cui         char* cPtr         = (char*)compressedBuffer;
498*01826a49SYabin Cui         char* resPtr       = (char*)(*resultBufferPtr);
499*01826a49SYabin Cui         U32 fileNb;
500*01826a49SYabin Cui         for (nbBlocks = 0, fileNb = 0; fileNb < nbFiles; fileNb++) {
501*01826a49SYabin Cui             size_t remaining              = fileSizes[fileNb];
502*01826a49SYabin Cui             U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly)
503*01826a49SYabin Cui                     ? 1
504*01826a49SYabin Cui                     : (U32)((remaining + (blockSize - 1)) / blockSize);
505*01826a49SYabin Cui             U32 const blockEnd            = nbBlocks + nbBlocksforThisFile;
506*01826a49SYabin Cui             for (; nbBlocks < blockEnd; nbBlocks++) {
507*01826a49SYabin Cui                 size_t const thisBlockSize = MIN(remaining, blockSize);
508*01826a49SYabin Cui                 srcPtrs[nbBlocks]          = srcPtr;
509*01826a49SYabin Cui                 srcSizes[nbBlocks]         = thisBlockSize;
510*01826a49SYabin Cui                 cPtrs[nbBlocks]            = cPtr;
511*01826a49SYabin Cui                 cCapacities[nbBlocks]      = (adv->mode == BMK_decodeOnly)
512*01826a49SYabin Cui                              ? thisBlockSize
513*01826a49SYabin Cui                              : ZSTD_compressBound(thisBlockSize);
514*01826a49SYabin Cui                 resPtrs[nbBlocks]          = resPtr;
515*01826a49SYabin Cui                 resSizes[nbBlocks]         = (adv->mode == BMK_decodeOnly)
516*01826a49SYabin Cui                                 ? (size_t)ZSTD_findDecompressedSize(
517*01826a49SYabin Cui                                 srcPtr, thisBlockSize)
518*01826a49SYabin Cui                                 : thisBlockSize;
519*01826a49SYabin Cui                 srcPtr += thisBlockSize;
520*01826a49SYabin Cui                 cPtr += cCapacities[nbBlocks];
521*01826a49SYabin Cui                 resPtr += thisBlockSize;
522*01826a49SYabin Cui                 remaining -= thisBlockSize;
523*01826a49SYabin Cui                 if (adv->mode == BMK_decodeOnly) {
524*01826a49SYabin Cui                     cSizes[nbBlocks]  = thisBlockSize;
525*01826a49SYabin Cui                     benchResult.cSize = thisBlockSize;
526*01826a49SYabin Cui                 }
527*01826a49SYabin Cui             }
528*01826a49SYabin Cui         }
529*01826a49SYabin Cui     }
530*01826a49SYabin Cui 
531*01826a49SYabin Cui     /* warming up `compressedBuffer` */
532*01826a49SYabin Cui     if (adv->mode == BMK_decodeOnly) {
533*01826a49SYabin Cui         memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
534*01826a49SYabin Cui     } else {
535*01826a49SYabin Cui         RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
536*01826a49SYabin Cui     }
537*01826a49SYabin Cui 
538*01826a49SYabin Cui     if (!UTIL_support_MT_measurements() && adv->nbWorkers > 1) {
539*01826a49SYabin Cui         OUTPUTLEVEL(
540*01826a49SYabin Cui                 2,
541*01826a49SYabin Cui                 "Warning : time measurements may be incorrect in multithreading mode... \n")
542*01826a49SYabin Cui     }
543*01826a49SYabin Cui 
544*01826a49SYabin Cui     /* Bench */
545*01826a49SYabin Cui     {
546*01826a49SYabin Cui         U64 const crcOrig = (adv->mode == BMK_decodeOnly)
547*01826a49SYabin Cui                 ? 0
548*01826a49SYabin Cui                 : XXH64(srcBuffer, srcSize, 0);
549*01826a49SYabin Cui #define NB_MARKS 4
550*01826a49SYabin Cui         const char* marks[NB_MARKS] = { " |", " /", " =", " \\" };
551*01826a49SYabin Cui         U32 markNb                  = 0;
552*01826a49SYabin Cui         int compressionCompleted    = (adv->mode == BMK_decodeOnly);
553*01826a49SYabin Cui         int decompressionCompleted  = (adv->mode == BMK_compressOnly);
554*01826a49SYabin Cui         BMK_benchParams_t cbp, dbp;
555*01826a49SYabin Cui         BMK_initCCtxArgs cctxprep;
556*01826a49SYabin Cui         BMK_initDCtxArgs dctxprep;
557*01826a49SYabin Cui 
558*01826a49SYabin Cui         cbp.benchFn       = local_defaultCompress; /* ZSTD_compress2 */
559*01826a49SYabin Cui         cbp.benchPayload  = cctx;
560*01826a49SYabin Cui         cbp.initFn        = local_initCCtx; /* BMK_initCCtx */
561*01826a49SYabin Cui         cbp.initPayload   = &cctxprep;
562*01826a49SYabin Cui         cbp.errorFn       = ZSTD_isError;
563*01826a49SYabin Cui         cbp.blockCount    = nbBlocks;
564*01826a49SYabin Cui         cbp.srcBuffers    = srcPtrs;
565*01826a49SYabin Cui         cbp.srcSizes      = srcSizes;
566*01826a49SYabin Cui         cbp.dstBuffers    = cPtrs;
567*01826a49SYabin Cui         cbp.dstCapacities = cCapacities;
568*01826a49SYabin Cui         cbp.blockResults  = cSizes;
569*01826a49SYabin Cui 
570*01826a49SYabin Cui         cctxprep.cctx           = cctx;
571*01826a49SYabin Cui         cctxprep.dictBuffer     = dictBuffer;
572*01826a49SYabin Cui         cctxprep.dictBufferSize = dictBufferSize;
573*01826a49SYabin Cui         cctxprep.cLevel         = cLevel;
574*01826a49SYabin Cui         cctxprep.comprParams    = comprParams;
575*01826a49SYabin Cui         cctxprep.adv            = adv;
576*01826a49SYabin Cui 
577*01826a49SYabin Cui         dbp.benchFn       = local_defaultDecompress;
578*01826a49SYabin Cui         dbp.benchPayload  = dctx;
579*01826a49SYabin Cui         dbp.initFn        = local_initDCtx;
580*01826a49SYabin Cui         dbp.initPayload   = &dctxprep;
581*01826a49SYabin Cui         dbp.errorFn       = ZSTD_isError;
582*01826a49SYabin Cui         dbp.blockCount    = nbBlocks;
583*01826a49SYabin Cui         dbp.srcBuffers    = (const void* const*)cPtrs;
584*01826a49SYabin Cui         dbp.srcSizes      = cSizes;
585*01826a49SYabin Cui         dbp.dstBuffers    = resPtrs;
586*01826a49SYabin Cui         dbp.dstCapacities = resSizes;
587*01826a49SYabin Cui         dbp.blockResults  = NULL;
588*01826a49SYabin Cui 
589*01826a49SYabin Cui         dctxprep.dctx           = dctx;
590*01826a49SYabin Cui         dctxprep.dictBuffer     = dictBuffer;
591*01826a49SYabin Cui         dctxprep.dictBufferSize = dictBufferSize;
592*01826a49SYabin Cui 
593*01826a49SYabin Cui         OUTPUTLEVEL(2, "\r%70s\r", ""); /* blank line */
594*01826a49SYabin Cui         assert(srcSize < UINT_MAX);
595*01826a49SYabin Cui         OUTPUTLEVEL(
596*01826a49SYabin Cui                 2,
597*01826a49SYabin Cui                 "%2s-%-17.17s :%10u -> \r",
598*01826a49SYabin Cui                 marks[markNb],
599*01826a49SYabin Cui                 displayName,
600*01826a49SYabin Cui                 (unsigned)srcSize);
601*01826a49SYabin Cui 
602*01826a49SYabin Cui         while (!(compressionCompleted && decompressionCompleted)) {
603*01826a49SYabin Cui             if (!compressionCompleted) {
604*01826a49SYabin Cui                 BMK_runOutcome_t const cOutcome =
605*01826a49SYabin Cui                         BMK_benchTimedFn(timeStateCompress, cbp);
606*01826a49SYabin Cui 
607*01826a49SYabin Cui                 if (!BMK_isSuccessful_runOutcome(cOutcome)) {
608*01826a49SYabin Cui                     RETURN_ERROR(30, BMK_benchOutcome_t, "compression error");
609*01826a49SYabin Cui                 }
610*01826a49SYabin Cui 
611*01826a49SYabin Cui                 {
612*01826a49SYabin Cui                     BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome);
613*01826a49SYabin Cui                     cSize                       = cResult.sumOfReturn;
614*01826a49SYabin Cui                     ratio = (double)srcSize / (double)cSize;
615*01826a49SYabin Cui                     {
616*01826a49SYabin Cui                         BMK_benchResult_t newResult;
617*01826a49SYabin Cui                         newResult.cSpeed =
618*01826a49SYabin Cui                                 (U64)((double)srcSize * TIMELOOP_NANOSEC
619*01826a49SYabin Cui                                       / cResult.nanoSecPerRun);
620*01826a49SYabin Cui                         benchResult.cSize = cSize;
621*01826a49SYabin Cui                         if (newResult.cSpeed > benchResult.cSpeed)
622*01826a49SYabin Cui                             benchResult.cSpeed = newResult.cSpeed;
623*01826a49SYabin Cui                     }
624*01826a49SYabin Cui                 }
625*01826a49SYabin Cui 
626*01826a49SYabin Cui                 {
627*01826a49SYabin Cui                     int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
628*01826a49SYabin Cui                     assert(cSize < UINT_MAX);
629*01826a49SYabin Cui                     OUTPUTLEVEL(
630*01826a49SYabin Cui                             2,
631*01826a49SYabin Cui                             "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s \r",
632*01826a49SYabin Cui                             marks[markNb],
633*01826a49SYabin Cui                             displayName,
634*01826a49SYabin Cui                             (unsigned)srcSize,
635*01826a49SYabin Cui                             (unsigned)cSize,
636*01826a49SYabin Cui                             ratioAccuracy,
637*01826a49SYabin Cui                             ratio,
638*01826a49SYabin Cui                             benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1,
639*01826a49SYabin Cui                             (double)benchResult.cSpeed / MB_UNIT);
640*01826a49SYabin Cui                 }
641*01826a49SYabin Cui                 compressionCompleted =
642*01826a49SYabin Cui                         BMK_isCompleted_TimedFn(timeStateCompress);
643*01826a49SYabin Cui             }
644*01826a49SYabin Cui 
645*01826a49SYabin Cui             if (!decompressionCompleted) {
646*01826a49SYabin Cui                 BMK_runOutcome_t const dOutcome =
647*01826a49SYabin Cui                         BMK_benchTimedFn(timeStateDecompress, dbp);
648*01826a49SYabin Cui 
649*01826a49SYabin Cui                 if (!BMK_isSuccessful_runOutcome(dOutcome)) {
650*01826a49SYabin Cui                     RETURN_ERROR(30, BMK_benchOutcome_t, "decompression error");
651*01826a49SYabin Cui                 }
652*01826a49SYabin Cui 
653*01826a49SYabin Cui                 {
654*01826a49SYabin Cui                     BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
655*01826a49SYabin Cui                     U64 const newDSpeed =
656*01826a49SYabin Cui                             (U64)((double)srcSize * TIMELOOP_NANOSEC
657*01826a49SYabin Cui                                   / dResult.nanoSecPerRun);
658*01826a49SYabin Cui                     if (newDSpeed > benchResult.dSpeed)
659*01826a49SYabin Cui                         benchResult.dSpeed = newDSpeed;
660*01826a49SYabin Cui                 }
661*01826a49SYabin Cui 
662*01826a49SYabin Cui                 {
663*01826a49SYabin Cui                     int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
664*01826a49SYabin Cui                     OUTPUTLEVEL(
665*01826a49SYabin Cui                             2,
666*01826a49SYabin Cui                             "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s, %6.1f MB/s\r",
667*01826a49SYabin Cui                             marks[markNb],
668*01826a49SYabin Cui                             displayName,
669*01826a49SYabin Cui                             (unsigned)srcSize,
670*01826a49SYabin Cui                             (unsigned)cSize,
671*01826a49SYabin Cui                             ratioAccuracy,
672*01826a49SYabin Cui                             ratio,
673*01826a49SYabin Cui                             benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1,
674*01826a49SYabin Cui                             (double)benchResult.cSpeed / MB_UNIT,
675*01826a49SYabin Cui                             (double)benchResult.dSpeed / MB_UNIT);
676*01826a49SYabin Cui                 }
677*01826a49SYabin Cui                 decompressionCompleted =
678*01826a49SYabin Cui                         BMK_isCompleted_TimedFn(timeStateDecompress);
679*01826a49SYabin Cui             }
680*01826a49SYabin Cui             markNb = (markNb + 1) % NB_MARKS;
681*01826a49SYabin Cui         } /* while (!(compressionCompleted && decompressionCompleted)) */
682*01826a49SYabin Cui 
683*01826a49SYabin Cui         /* CRC Checking */
684*01826a49SYabin Cui         {
685*01826a49SYabin Cui             const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);
686*01826a49SYabin Cui             U64 const crcCheck       = XXH64(resultBuffer, srcSize, 0);
687*01826a49SYabin Cui             if ((adv->mode == BMK_both) && (crcOrig != crcCheck)) {
688*01826a49SYabin Cui                 size_t u;
689*01826a49SYabin Cui                 DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n",
690*01826a49SYabin Cui                         displayName,
691*01826a49SYabin Cui                         (unsigned)crcOrig,
692*01826a49SYabin Cui                         (unsigned)crcCheck);
693*01826a49SYabin Cui                 for (u = 0; u < srcSize; u++) {
694*01826a49SYabin Cui                     if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) {
695*01826a49SYabin Cui                         unsigned segNb, bNb, pos;
696*01826a49SYabin Cui                         size_t bacc = 0;
697*01826a49SYabin Cui                         DISPLAY("Decoding error at pos %u ", (unsigned)u);
698*01826a49SYabin Cui                         for (segNb = 0; segNb < nbBlocks; segNb++) {
699*01826a49SYabin Cui                             if (bacc + srcSizes[segNb] > u)
700*01826a49SYabin Cui                                 break;
701*01826a49SYabin Cui                             bacc += srcSizes[segNb];
702*01826a49SYabin Cui                         }
703*01826a49SYabin Cui                         pos = (U32)(u - bacc);
704*01826a49SYabin Cui                         bNb = pos / (128 KB);
705*01826a49SYabin Cui                         DISPLAY("(sample %u, block %u, pos %u) \n",
706*01826a49SYabin Cui                                 segNb,
707*01826a49SYabin Cui                                 bNb,
708*01826a49SYabin Cui                                 pos);
709*01826a49SYabin Cui                         {
710*01826a49SYabin Cui                             size_t const lowest = (u > 5) ? 5 : u;
711*01826a49SYabin Cui                             size_t n;
712*01826a49SYabin Cui                             DISPLAY("origin: ");
713*01826a49SYabin Cui                             for (n = lowest; n > 0; n--)
714*01826a49SYabin Cui                                 DISPLAY("%02X ",
715*01826a49SYabin Cui                                         ((const BYTE*)srcBuffer)[u - n]);
716*01826a49SYabin Cui                             DISPLAY(" :%02X:  ", ((const BYTE*)srcBuffer)[u]);
717*01826a49SYabin Cui                             for (n = 1; n < 3; n++)
718*01826a49SYabin Cui                                 DISPLAY("%02X ",
719*01826a49SYabin Cui                                         ((const BYTE*)srcBuffer)[u + n]);
720*01826a49SYabin Cui                             DISPLAY(" \n");
721*01826a49SYabin Cui                             DISPLAY("decode: ");
722*01826a49SYabin Cui                             for (n = lowest; n > 0; n--)
723*01826a49SYabin Cui                                 DISPLAY("%02X ", resultBuffer[u - n]);
724*01826a49SYabin Cui                             DISPLAY(" :%02X:  ", resultBuffer[u]);
725*01826a49SYabin Cui                             for (n = 1; n < 3; n++)
726*01826a49SYabin Cui                                 DISPLAY("%02X ", resultBuffer[u + n]);
727*01826a49SYabin Cui                             DISPLAY(" \n");
728*01826a49SYabin Cui                         }
729*01826a49SYabin Cui                         break;
730*01826a49SYabin Cui                     }
731*01826a49SYabin Cui                     if (u == srcSize - 1) { /* should never happen */
732*01826a49SYabin Cui                         DISPLAY("no difference detected\n");
733*01826a49SYabin Cui                     }
734*01826a49SYabin Cui                 } /* for (u=0; u<srcSize; u++) */
735*01826a49SYabin Cui             }     /* if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) */
736*01826a49SYabin Cui         }         /* CRC Checking */
737*01826a49SYabin Cui 
738*01826a49SYabin Cui         if (displayLevel
739*01826a49SYabin Cui             == 1) { /* hidden display mode -q, used by python speed benchmark */
740*01826a49SYabin Cui             double const cSpeed = (double)benchResult.cSpeed / MB_UNIT;
741*01826a49SYabin Cui             double const dSpeed = (double)benchResult.dSpeed / MB_UNIT;
742*01826a49SYabin Cui             if (adv->additionalParam) {
743*01826a49SYabin Cui                 OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n",
744*01826a49SYabin Cui                        cLevel,
745*01826a49SYabin Cui                        (int)cSize,
746*01826a49SYabin Cui                        ratio,
747*01826a49SYabin Cui                        cSpeed,
748*01826a49SYabin Cui                        dSpeed,
749*01826a49SYabin Cui                        displayName,
750*01826a49SYabin Cui                        adv->additionalParam);
751*01826a49SYabin Cui             } else {
752*01826a49SYabin Cui                 OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n",
753*01826a49SYabin Cui                        cLevel,
754*01826a49SYabin Cui                        (int)cSize,
755*01826a49SYabin Cui                        ratio,
756*01826a49SYabin Cui                        cSpeed,
757*01826a49SYabin Cui                        dSpeed,
758*01826a49SYabin Cui                        displayName);
759*01826a49SYabin Cui             }
760*01826a49SYabin Cui         }
761*01826a49SYabin Cui 
762*01826a49SYabin Cui         OUTPUTLEVEL(2, "%2i#\n", cLevel);
763*01826a49SYabin Cui     } /* Bench */
764*01826a49SYabin Cui 
765*01826a49SYabin Cui     benchResult.cMem =
766*01826a49SYabin Cui             (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);
767*01826a49SYabin Cui     return BMK_benchOutcome_setValidResult(benchResult);
768*01826a49SYabin Cui }
769*01826a49SYabin Cui 
BMK_benchMemAdvanced(const void * srcBuffer,size_t srcSize,void * dstBuffer,size_t dstCapacity,const size_t * fileSizes,unsigned nbFiles,int cLevel,const ZSTD_compressionParameters * comprParams,const void * dictBuffer,size_t dictBufferSize,int displayLevel,const char * displayName,const BMK_advancedParams_t * adv)770*01826a49SYabin Cui BMK_benchOutcome_t BMK_benchMemAdvanced(
771*01826a49SYabin Cui         const void* srcBuffer,
772*01826a49SYabin Cui         size_t srcSize,
773*01826a49SYabin Cui         void* dstBuffer,
774*01826a49SYabin Cui         size_t dstCapacity,
775*01826a49SYabin Cui         const size_t* fileSizes,
776*01826a49SYabin Cui         unsigned nbFiles,
777*01826a49SYabin Cui         int cLevel,
778*01826a49SYabin Cui         const ZSTD_compressionParameters* comprParams,
779*01826a49SYabin Cui         const void* dictBuffer,
780*01826a49SYabin Cui         size_t dictBufferSize,
781*01826a49SYabin Cui         int displayLevel,
782*01826a49SYabin Cui         const char* displayName,
783*01826a49SYabin Cui         const BMK_advancedParams_t* adv)
784*01826a49SYabin Cui 
785*01826a49SYabin Cui {
786*01826a49SYabin Cui     int const dstParamsError =
787*01826a49SYabin Cui             !dstBuffer ^ !dstCapacity; /* must be both NULL or none */
788*01826a49SYabin Cui 
789*01826a49SYabin Cui     size_t const blockSize =
790*01826a49SYabin Cui             ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly))
791*01826a49SYabin Cui                      ? adv->blockSize
792*01826a49SYabin Cui                      : srcSize)
793*01826a49SYabin Cui             + (!srcSize) /* avoid div by 0 */;
794*01826a49SYabin Cui     U32 const maxNbBlocks =
795*01826a49SYabin Cui             (U32)((srcSize + (blockSize - 1)) / blockSize) + nbFiles;
796*01826a49SYabin Cui 
797*01826a49SYabin Cui     /* these are the blockTable parameters, just split up */
798*01826a49SYabin Cui     const void** const srcPtrs =
799*01826a49SYabin Cui             (const void**)malloc(maxNbBlocks * sizeof(void*));
800*01826a49SYabin Cui     size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
801*01826a49SYabin Cui 
802*01826a49SYabin Cui     void** const cPtrs        = (void**)malloc(maxNbBlocks * sizeof(void*));
803*01826a49SYabin Cui     size_t* const cSizes      = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
804*01826a49SYabin Cui     size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
805*01826a49SYabin Cui 
806*01826a49SYabin Cui     void** const resPtrs   = (void**)malloc(maxNbBlocks * sizeof(void*));
807*01826a49SYabin Cui     size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
808*01826a49SYabin Cui 
809*01826a49SYabin Cui     BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(
810*01826a49SYabin Cui             adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
811*01826a49SYabin Cui     BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(
812*01826a49SYabin Cui             adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
813*01826a49SYabin Cui 
814*01826a49SYabin Cui     ZSTD_CCtx* const cctx = ZSTD_createCCtx();
815*01826a49SYabin Cui     ZSTD_DCtx* const dctx = ZSTD_createDCtx();
816*01826a49SYabin Cui 
817*01826a49SYabin Cui     const size_t maxCompressedSize = dstCapacity
818*01826a49SYabin Cui             ? dstCapacity
819*01826a49SYabin Cui             : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);
820*01826a49SYabin Cui 
821*01826a49SYabin Cui     void* const internalDstBuffer =
822*01826a49SYabin Cui             dstBuffer ? NULL : malloc(maxCompressedSize);
823*01826a49SYabin Cui     void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer;
824*01826a49SYabin Cui 
825*01826a49SYabin Cui     BMK_benchOutcome_t outcome =
826*01826a49SYabin Cui             BMK_benchOutcome_error(); /* error by default */
827*01826a49SYabin Cui 
828*01826a49SYabin Cui     void* resultBuffer = srcSize ? malloc(srcSize) : NULL;
829*01826a49SYabin Cui 
830*01826a49SYabin Cui     int const allocationincomplete = !srcPtrs || !srcSizes || !cPtrs || !cSizes
831*01826a49SYabin Cui             || !cCapacities || !resPtrs || !resSizes || !timeStateCompress
832*01826a49SYabin Cui             || !timeStateDecompress || !cctx || !dctx || !compressedBuffer
833*01826a49SYabin Cui             || !resultBuffer;
834*01826a49SYabin Cui 
835*01826a49SYabin Cui     if (!allocationincomplete && !dstParamsError) {
836*01826a49SYabin Cui         outcome = BMK_benchMemAdvancedNoAlloc(
837*01826a49SYabin Cui                 srcPtrs,
838*01826a49SYabin Cui                 srcSizes,
839*01826a49SYabin Cui                 cPtrs,
840*01826a49SYabin Cui                 cCapacities,
841*01826a49SYabin Cui                 cSizes,
842*01826a49SYabin Cui                 resPtrs,
843*01826a49SYabin Cui                 resSizes,
844*01826a49SYabin Cui                 &resultBuffer,
845*01826a49SYabin Cui                 compressedBuffer,
846*01826a49SYabin Cui                 maxCompressedSize,
847*01826a49SYabin Cui                 timeStateCompress,
848*01826a49SYabin Cui                 timeStateDecompress,
849*01826a49SYabin Cui                 srcBuffer,
850*01826a49SYabin Cui                 srcSize,
851*01826a49SYabin Cui                 fileSizes,
852*01826a49SYabin Cui                 nbFiles,
853*01826a49SYabin Cui                 cLevel,
854*01826a49SYabin Cui                 comprParams,
855*01826a49SYabin Cui                 dictBuffer,
856*01826a49SYabin Cui                 dictBufferSize,
857*01826a49SYabin Cui                 cctx,
858*01826a49SYabin Cui                 dctx,
859*01826a49SYabin Cui                 displayLevel,
860*01826a49SYabin Cui                 displayName,
861*01826a49SYabin Cui                 adv);
862*01826a49SYabin Cui     }
863*01826a49SYabin Cui 
864*01826a49SYabin Cui     /* clean up */
865*01826a49SYabin Cui     BMK_freeTimedFnState(timeStateCompress);
866*01826a49SYabin Cui     BMK_freeTimedFnState(timeStateDecompress);
867*01826a49SYabin Cui 
868*01826a49SYabin Cui     ZSTD_freeCCtx(cctx);
869*01826a49SYabin Cui     ZSTD_freeDCtx(dctx);
870*01826a49SYabin Cui 
871*01826a49SYabin Cui     free(internalDstBuffer);
872*01826a49SYabin Cui     free(resultBuffer);
873*01826a49SYabin Cui 
874*01826a49SYabin Cui     free((void*)srcPtrs);
875*01826a49SYabin Cui     free(srcSizes);
876*01826a49SYabin Cui     free(cPtrs);
877*01826a49SYabin Cui     free(cSizes);
878*01826a49SYabin Cui     free(cCapacities);
879*01826a49SYabin Cui     free(resPtrs);
880*01826a49SYabin Cui     free(resSizes);
881*01826a49SYabin Cui 
882*01826a49SYabin Cui     if (allocationincomplete) {
883*01826a49SYabin Cui         RETURN_ERROR(
884*01826a49SYabin Cui                 31, BMK_benchOutcome_t, "allocation error : not enough memory");
885*01826a49SYabin Cui     }
886*01826a49SYabin Cui 
887*01826a49SYabin Cui     if (dstParamsError) {
888*01826a49SYabin Cui         RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent");
889*01826a49SYabin Cui     }
890*01826a49SYabin Cui     return outcome;
891*01826a49SYabin Cui }
892*01826a49SYabin Cui 
BMK_benchMem(const void * srcBuffer,size_t srcSize,const size_t * fileSizes,unsigned nbFiles,int cLevel,const ZSTD_compressionParameters * comprParams,const void * dictBuffer,size_t dictBufferSize,int displayLevel,const char * displayName)893*01826a49SYabin Cui BMK_benchOutcome_t BMK_benchMem(
894*01826a49SYabin Cui         const void* srcBuffer,
895*01826a49SYabin Cui         size_t srcSize,
896*01826a49SYabin Cui         const size_t* fileSizes,
897*01826a49SYabin Cui         unsigned nbFiles,
898*01826a49SYabin Cui         int cLevel,
899*01826a49SYabin Cui         const ZSTD_compressionParameters* comprParams,
900*01826a49SYabin Cui         const void* dictBuffer,
901*01826a49SYabin Cui         size_t dictBufferSize,
902*01826a49SYabin Cui         int displayLevel,
903*01826a49SYabin Cui         const char* displayName)
904*01826a49SYabin Cui {
905*01826a49SYabin Cui     BMK_advancedParams_t const adv = BMK_initAdvancedParams();
906*01826a49SYabin Cui     return BMK_benchMemAdvanced(
907*01826a49SYabin Cui             srcBuffer,
908*01826a49SYabin Cui             srcSize,
909*01826a49SYabin Cui             NULL,
910*01826a49SYabin Cui             0,
911*01826a49SYabin Cui             fileSizes,
912*01826a49SYabin Cui             nbFiles,
913*01826a49SYabin Cui             cLevel,
914*01826a49SYabin Cui             comprParams,
915*01826a49SYabin Cui             dictBuffer,
916*01826a49SYabin Cui             dictBufferSize,
917*01826a49SYabin Cui             displayLevel,
918*01826a49SYabin Cui             displayName,
919*01826a49SYabin Cui             &adv);
920*01826a49SYabin Cui }
921*01826a49SYabin Cui 
BMK_benchCLevel(const void * srcBuffer,size_t benchedSize,const size_t * fileSizes,unsigned nbFiles,int cLevel,const ZSTD_compressionParameters * comprParams,const void * dictBuffer,size_t dictBufferSize,int displayLevel,const char * displayName,BMK_advancedParams_t const * const adv)922*01826a49SYabin Cui static BMK_benchOutcome_t BMK_benchCLevel(
923*01826a49SYabin Cui         const void* srcBuffer,
924*01826a49SYabin Cui         size_t benchedSize,
925*01826a49SYabin Cui         const size_t* fileSizes,
926*01826a49SYabin Cui         unsigned nbFiles,
927*01826a49SYabin Cui         int cLevel,
928*01826a49SYabin Cui         const ZSTD_compressionParameters* comprParams,
929*01826a49SYabin Cui         const void* dictBuffer,
930*01826a49SYabin Cui         size_t dictBufferSize,
931*01826a49SYabin Cui         int displayLevel,
932*01826a49SYabin Cui         const char* displayName,
933*01826a49SYabin Cui         BMK_advancedParams_t const* const adv)
934*01826a49SYabin Cui {
935*01826a49SYabin Cui     const char* pch = strrchr(displayName, '\\'); /* Windows */
936*01826a49SYabin Cui     if (!pch)
937*01826a49SYabin Cui         pch = strrchr(displayName, '/'); /* Linux */
938*01826a49SYabin Cui     if (pch)
939*01826a49SYabin Cui         displayName = pch + 1;
940*01826a49SYabin Cui 
941*01826a49SYabin Cui     if (adv->realTime) {
942*01826a49SYabin Cui         DISPLAYLEVEL(2, "Note : switching to real-time priority \n");
943*01826a49SYabin Cui         SET_REALTIME_PRIORITY;
944*01826a49SYabin Cui     }
945*01826a49SYabin Cui 
946*01826a49SYabin Cui     if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */
947*01826a49SYabin Cui         OUTPUT("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n",
948*01826a49SYabin Cui                ZSTD_VERSION_STRING,
949*01826a49SYabin Cui                ZSTD_GIT_COMMIT_STRING,
950*01826a49SYabin Cui                (unsigned)benchedSize,
951*01826a49SYabin Cui                adv->nbSeconds,
952*01826a49SYabin Cui                (unsigned)(adv->blockSize >> 10));
953*01826a49SYabin Cui 
954*01826a49SYabin Cui     return BMK_benchMemAdvanced(
955*01826a49SYabin Cui             srcBuffer,
956*01826a49SYabin Cui             benchedSize,
957*01826a49SYabin Cui             NULL,
958*01826a49SYabin Cui             0,
959*01826a49SYabin Cui             fileSizes,
960*01826a49SYabin Cui             nbFiles,
961*01826a49SYabin Cui             cLevel,
962*01826a49SYabin Cui             comprParams,
963*01826a49SYabin Cui             dictBuffer,
964*01826a49SYabin Cui             dictBufferSize,
965*01826a49SYabin Cui             displayLevel,
966*01826a49SYabin Cui             displayName,
967*01826a49SYabin Cui             adv);
968*01826a49SYabin Cui }
969*01826a49SYabin Cui 
BMK_syntheticTest(int cLevel,double compressibility,const ZSTD_compressionParameters * compressionParams,int displayLevel,const BMK_advancedParams_t * adv)970*01826a49SYabin Cui int BMK_syntheticTest(
971*01826a49SYabin Cui         int cLevel,
972*01826a49SYabin Cui         double compressibility,
973*01826a49SYabin Cui         const ZSTD_compressionParameters* compressionParams,
974*01826a49SYabin Cui         int displayLevel,
975*01826a49SYabin Cui         const BMK_advancedParams_t* adv)
976*01826a49SYabin Cui {
977*01826a49SYabin Cui     char nameBuff[20]        = { 0 };
978*01826a49SYabin Cui     const char* name         = nameBuff;
979*01826a49SYabin Cui     size_t const benchedSize = adv->blockSize ? adv->blockSize : 10000000;
980*01826a49SYabin Cui     void* srcBuffer;
981*01826a49SYabin Cui     BMK_benchOutcome_t res;
982*01826a49SYabin Cui 
983*01826a49SYabin Cui     if (cLevel > ZSTD_maxCLevel()) {
984*01826a49SYabin Cui         DISPLAYLEVEL(1, "Invalid Compression Level");
985*01826a49SYabin Cui         return 15;
986*01826a49SYabin Cui     }
987*01826a49SYabin Cui 
988*01826a49SYabin Cui     /* Memory allocation */
989*01826a49SYabin Cui     srcBuffer = malloc(benchedSize);
990*01826a49SYabin Cui     if (!srcBuffer) {
991*01826a49SYabin Cui         DISPLAYLEVEL(1, "allocation error : not enough memory");
992*01826a49SYabin Cui         return 16;
993*01826a49SYabin Cui     }
994*01826a49SYabin Cui 
995*01826a49SYabin Cui     /* Fill input buffer */
996*01826a49SYabin Cui     if (compressibility < 0.0) {
997*01826a49SYabin Cui         LOREM_genBuffer(srcBuffer, benchedSize, 0);
998*01826a49SYabin Cui         name = "Lorem ipsum";
999*01826a49SYabin Cui     } else {
1000*01826a49SYabin Cui         RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
1001*01826a49SYabin Cui         formatString_u(
1002*01826a49SYabin Cui                 nameBuff,
1003*01826a49SYabin Cui                 sizeof(nameBuff),
1004*01826a49SYabin Cui                 "Synthetic %u%%",
1005*01826a49SYabin Cui                 (unsigned)(compressibility * 100));
1006*01826a49SYabin Cui     }
1007*01826a49SYabin Cui 
1008*01826a49SYabin Cui     /* Bench */
1009*01826a49SYabin Cui     res = BMK_benchCLevel(
1010*01826a49SYabin Cui             srcBuffer,
1011*01826a49SYabin Cui             benchedSize,
1012*01826a49SYabin Cui             &benchedSize /* ? */,
1013*01826a49SYabin Cui             1 /* ? */,
1014*01826a49SYabin Cui             cLevel,
1015*01826a49SYabin Cui             compressionParams,
1016*01826a49SYabin Cui             NULL,
1017*01826a49SYabin Cui             0, /* dictionary */
1018*01826a49SYabin Cui             displayLevel,
1019*01826a49SYabin Cui             name,
1020*01826a49SYabin Cui             adv);
1021*01826a49SYabin Cui 
1022*01826a49SYabin Cui     /* clean up */
1023*01826a49SYabin Cui     free(srcBuffer);
1024*01826a49SYabin Cui 
1025*01826a49SYabin Cui     return !BMK_isSuccessful_benchOutcome(res);
1026*01826a49SYabin Cui }
1027*01826a49SYabin Cui 
BMK_findMaxMem(U64 requiredMem)1028*01826a49SYabin Cui static size_t BMK_findMaxMem(U64 requiredMem)
1029*01826a49SYabin Cui {
1030*01826a49SYabin Cui     size_t const step = 64 MB;
1031*01826a49SYabin Cui     BYTE* testmem     = NULL;
1032*01826a49SYabin Cui 
1033*01826a49SYabin Cui     requiredMem = (((requiredMem >> 26) + 1) << 26);
1034*01826a49SYabin Cui     requiredMem += step;
1035*01826a49SYabin Cui     if (requiredMem > maxMemory)
1036*01826a49SYabin Cui         requiredMem = maxMemory;
1037*01826a49SYabin Cui 
1038*01826a49SYabin Cui     do {
1039*01826a49SYabin Cui         testmem = (BYTE*)malloc((size_t)requiredMem);
1040*01826a49SYabin Cui         requiredMem -= step;
1041*01826a49SYabin Cui     } while (!testmem && requiredMem > 0);
1042*01826a49SYabin Cui 
1043*01826a49SYabin Cui     free(testmem);
1044*01826a49SYabin Cui     return (size_t)(requiredMem);
1045*01826a49SYabin Cui }
1046*01826a49SYabin Cui 
1047*01826a49SYabin Cui /*! BMK_loadFiles() :
1048*01826a49SYabin Cui  *  Loads `buffer` with content of files listed within `fileNamesTable`.
1049*01826a49SYabin Cui  *  At most, fills `buffer` entirely. */
BMK_loadFiles(void * buffer,size_t bufferSize,size_t * fileSizes,const char * const * fileNamesTable,unsigned nbFiles,int displayLevel)1050*01826a49SYabin Cui static int BMK_loadFiles(
1051*01826a49SYabin Cui         void* buffer,
1052*01826a49SYabin Cui         size_t bufferSize,
1053*01826a49SYabin Cui         size_t* fileSizes,
1054*01826a49SYabin Cui         const char* const* fileNamesTable,
1055*01826a49SYabin Cui         unsigned nbFiles,
1056*01826a49SYabin Cui         int displayLevel)
1057*01826a49SYabin Cui {
1058*01826a49SYabin Cui     size_t pos = 0, totalSize = 0;
1059*01826a49SYabin Cui     unsigned n;
1060*01826a49SYabin Cui     for (n = 0; n < nbFiles; n++) {
1061*01826a49SYabin Cui         U64 fileSize = UTIL_getFileSize(
1062*01826a49SYabin Cui                 fileNamesTable[n]); /* last file may be shortened */
1063*01826a49SYabin Cui         if (UTIL_isDirectory(fileNamesTable[n])) {
1064*01826a49SYabin Cui             DISPLAYLEVEL(
1065*01826a49SYabin Cui                     2, "Ignoring %s directory...       \n", fileNamesTable[n]);
1066*01826a49SYabin Cui             fileSizes[n] = 0;
1067*01826a49SYabin Cui             continue;
1068*01826a49SYabin Cui         }
1069*01826a49SYabin Cui         if (fileSize == UTIL_FILESIZE_UNKNOWN) {
1070*01826a49SYabin Cui             DISPLAYLEVEL(
1071*01826a49SYabin Cui                     2,
1072*01826a49SYabin Cui                     "Cannot evaluate size of %s, ignoring ... \n",
1073*01826a49SYabin Cui                     fileNamesTable[n]);
1074*01826a49SYabin Cui             fileSizes[n] = 0;
1075*01826a49SYabin Cui             continue;
1076*01826a49SYabin Cui         }
1077*01826a49SYabin Cui         {
1078*01826a49SYabin Cui             FILE* const f = fopen(fileNamesTable[n], "rb");
1079*01826a49SYabin Cui             if (f == NULL)
1080*01826a49SYabin Cui                 RETURN_ERROR_INT(
1081*01826a49SYabin Cui                         10, "impossible to open file %s", fileNamesTable[n]);
1082*01826a49SYabin Cui             OUTPUTLEVEL(2, "Loading %s...       \r", fileNamesTable[n]);
1083*01826a49SYabin Cui             if (fileSize > bufferSize - pos)
1084*01826a49SYabin Cui                 fileSize = bufferSize - pos,
1085*01826a49SYabin Cui                 nbFiles  = n; /* buffer too small - stop after this file */
1086*01826a49SYabin Cui             {
1087*01826a49SYabin Cui                 size_t const readSize =
1088*01826a49SYabin Cui                         fread(((char*)buffer) + pos, 1, (size_t)fileSize, f);
1089*01826a49SYabin Cui                 if (readSize != (size_t)fileSize)
1090*01826a49SYabin Cui                     RETURN_ERROR_INT(
1091*01826a49SYabin Cui                             11, "could not read %s", fileNamesTable[n]);
1092*01826a49SYabin Cui                 pos += readSize;
1093*01826a49SYabin Cui             }
1094*01826a49SYabin Cui             fileSizes[n] = (size_t)fileSize;
1095*01826a49SYabin Cui             totalSize += (size_t)fileSize;
1096*01826a49SYabin Cui             fclose(f);
1097*01826a49SYabin Cui         }
1098*01826a49SYabin Cui     }
1099*01826a49SYabin Cui 
1100*01826a49SYabin Cui     if (totalSize == 0)
1101*01826a49SYabin Cui         RETURN_ERROR_INT(12, "no data to bench");
1102*01826a49SYabin Cui     return 0;
1103*01826a49SYabin Cui }
1104*01826a49SYabin Cui 
BMK_benchFilesAdvanced(const char * const * fileNamesTable,unsigned nbFiles,const char * dictFileName,int cLevel,const ZSTD_compressionParameters * compressionParams,int displayLevel,const BMK_advancedParams_t * adv)1105*01826a49SYabin Cui int BMK_benchFilesAdvanced(
1106*01826a49SYabin Cui         const char* const* fileNamesTable,
1107*01826a49SYabin Cui         unsigned nbFiles,
1108*01826a49SYabin Cui         const char* dictFileName,
1109*01826a49SYabin Cui         int cLevel,
1110*01826a49SYabin Cui         const ZSTD_compressionParameters* compressionParams,
1111*01826a49SYabin Cui         int displayLevel,
1112*01826a49SYabin Cui         const BMK_advancedParams_t* adv)
1113*01826a49SYabin Cui {
1114*01826a49SYabin Cui     void* srcBuffer = NULL;
1115*01826a49SYabin Cui     size_t benchedSize;
1116*01826a49SYabin Cui     void* dictBuffer      = NULL;
1117*01826a49SYabin Cui     size_t dictBufferSize = 0;
1118*01826a49SYabin Cui     size_t* fileSizes     = NULL;
1119*01826a49SYabin Cui     BMK_benchOutcome_t res;
1120*01826a49SYabin Cui     U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
1121*01826a49SYabin Cui 
1122*01826a49SYabin Cui     if (!nbFiles) {
1123*01826a49SYabin Cui         DISPLAYLEVEL(1, "No Files to Benchmark");
1124*01826a49SYabin Cui         return 13;
1125*01826a49SYabin Cui     }
1126*01826a49SYabin Cui 
1127*01826a49SYabin Cui     if (cLevel > ZSTD_maxCLevel()) {
1128*01826a49SYabin Cui         DISPLAYLEVEL(1, "Invalid Compression Level");
1129*01826a49SYabin Cui         return 14;
1130*01826a49SYabin Cui     }
1131*01826a49SYabin Cui 
1132*01826a49SYabin Cui     if (totalSizeToLoad == UTIL_FILESIZE_UNKNOWN) {
1133*01826a49SYabin Cui         DISPLAYLEVEL(1, "Error loading files");
1134*01826a49SYabin Cui         return 15;
1135*01826a49SYabin Cui     }
1136*01826a49SYabin Cui 
1137*01826a49SYabin Cui     fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t));
1138*01826a49SYabin Cui     if (!fileSizes) {
1139*01826a49SYabin Cui         DISPLAYLEVEL(1, "not enough memory for fileSizes");
1140*01826a49SYabin Cui         return 16;
1141*01826a49SYabin Cui     }
1142*01826a49SYabin Cui 
1143*01826a49SYabin Cui     /* Load dictionary */
1144*01826a49SYabin Cui     if (dictFileName != NULL) {
1145*01826a49SYabin Cui         U64 const dictFileSize = UTIL_getFileSize(dictFileName);
1146*01826a49SYabin Cui         if (dictFileSize == UTIL_FILESIZE_UNKNOWN) {
1147*01826a49SYabin Cui             DISPLAYLEVEL(
1148*01826a49SYabin Cui                     1,
1149*01826a49SYabin Cui                     "error loading %s : %s \n",
1150*01826a49SYabin Cui                     dictFileName,
1151*01826a49SYabin Cui                     strerror(errno));
1152*01826a49SYabin Cui             free(fileSizes);
1153*01826a49SYabin Cui             DISPLAYLEVEL(1, "benchmark aborted");
1154*01826a49SYabin Cui             return 17;
1155*01826a49SYabin Cui         }
1156*01826a49SYabin Cui         if (dictFileSize > 64 MB) {
1157*01826a49SYabin Cui             free(fileSizes);
1158*01826a49SYabin Cui             DISPLAYLEVEL(1, "dictionary file %s too large", dictFileName);
1159*01826a49SYabin Cui             return 18;
1160*01826a49SYabin Cui         }
1161*01826a49SYabin Cui         dictBufferSize = (size_t)dictFileSize;
1162*01826a49SYabin Cui         dictBuffer     = malloc(dictBufferSize);
1163*01826a49SYabin Cui         if (dictBuffer == NULL) {
1164*01826a49SYabin Cui             free(fileSizes);
1165*01826a49SYabin Cui             DISPLAYLEVEL(
1166*01826a49SYabin Cui                     1,
1167*01826a49SYabin Cui                     "not enough memory for dictionary (%u bytes)",
1168*01826a49SYabin Cui                     (unsigned)dictBufferSize);
1169*01826a49SYabin Cui             return 19;
1170*01826a49SYabin Cui         }
1171*01826a49SYabin Cui 
1172*01826a49SYabin Cui         {
1173*01826a49SYabin Cui             int const errorCode = BMK_loadFiles(
1174*01826a49SYabin Cui                     dictBuffer,
1175*01826a49SYabin Cui                     dictBufferSize,
1176*01826a49SYabin Cui                     fileSizes,
1177*01826a49SYabin Cui                     &dictFileName /*?*/,
1178*01826a49SYabin Cui                     1 /*?*/,
1179*01826a49SYabin Cui                     displayLevel);
1180*01826a49SYabin Cui             if (errorCode) {
1181*01826a49SYabin Cui                 res = BMK_benchOutcome_error();
1182*01826a49SYabin Cui                 goto _cleanUp;
1183*01826a49SYabin Cui             }
1184*01826a49SYabin Cui         }
1185*01826a49SYabin Cui     }
1186*01826a49SYabin Cui 
1187*01826a49SYabin Cui     /* Memory allocation & restrictions */
1188*01826a49SYabin Cui     benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
1189*01826a49SYabin Cui     if ((U64)benchedSize > totalSizeToLoad)
1190*01826a49SYabin Cui         benchedSize = (size_t)totalSizeToLoad;
1191*01826a49SYabin Cui     if (benchedSize < totalSizeToLoad)
1192*01826a49SYabin Cui         DISPLAY("Not enough memory; testing %u MB only...\n",
1193*01826a49SYabin Cui                 (unsigned)(benchedSize >> 20));
1194*01826a49SYabin Cui 
1195*01826a49SYabin Cui     srcBuffer = benchedSize ? malloc(benchedSize) : NULL;
1196*01826a49SYabin Cui     if (!srcBuffer) {
1197*01826a49SYabin Cui         free(dictBuffer);
1198*01826a49SYabin Cui         free(fileSizes);
1199*01826a49SYabin Cui         DISPLAYLEVEL(1, "not enough memory for srcBuffer");
1200*01826a49SYabin Cui         return 20;
1201*01826a49SYabin Cui     }
1202*01826a49SYabin Cui 
1203*01826a49SYabin Cui     /* Load input buffer */
1204*01826a49SYabin Cui     {
1205*01826a49SYabin Cui         int const errorCode = BMK_loadFiles(
1206*01826a49SYabin Cui                 srcBuffer,
1207*01826a49SYabin Cui                 benchedSize,
1208*01826a49SYabin Cui                 fileSizes,
1209*01826a49SYabin Cui                 fileNamesTable,
1210*01826a49SYabin Cui                 nbFiles,
1211*01826a49SYabin Cui                 displayLevel);
1212*01826a49SYabin Cui         if (errorCode) {
1213*01826a49SYabin Cui             res = BMK_benchOutcome_error();
1214*01826a49SYabin Cui             goto _cleanUp;
1215*01826a49SYabin Cui         }
1216*01826a49SYabin Cui     }
1217*01826a49SYabin Cui 
1218*01826a49SYabin Cui     /* Bench */
1219*01826a49SYabin Cui     {
1220*01826a49SYabin Cui         char mfName[20] = { 0 };
1221*01826a49SYabin Cui         formatString_u(mfName, sizeof(mfName), " %u files", nbFiles);
1222*01826a49SYabin Cui         {
1223*01826a49SYabin Cui             const char* const displayName =
1224*01826a49SYabin Cui                     (nbFiles > 1) ? mfName : fileNamesTable[0];
1225*01826a49SYabin Cui             res = BMK_benchCLevel(
1226*01826a49SYabin Cui                     srcBuffer,
1227*01826a49SYabin Cui                     benchedSize,
1228*01826a49SYabin Cui                     fileSizes,
1229*01826a49SYabin Cui                     nbFiles,
1230*01826a49SYabin Cui                     cLevel,
1231*01826a49SYabin Cui                     compressionParams,
1232*01826a49SYabin Cui                     dictBuffer,
1233*01826a49SYabin Cui                     dictBufferSize,
1234*01826a49SYabin Cui                     displayLevel,
1235*01826a49SYabin Cui                     displayName,
1236*01826a49SYabin Cui                     adv);
1237*01826a49SYabin Cui         }
1238*01826a49SYabin Cui     }
1239*01826a49SYabin Cui 
1240*01826a49SYabin Cui _cleanUp:
1241*01826a49SYabin Cui     free(srcBuffer);
1242*01826a49SYabin Cui     free(dictBuffer);
1243*01826a49SYabin Cui     free(fileSizes);
1244*01826a49SYabin Cui     return !BMK_isSuccessful_benchOutcome(res);
1245*01826a49SYabin Cui }
1246*01826a49SYabin Cui 
BMK_benchFiles(const char * const * fileNamesTable,unsigned nbFiles,const char * dictFileName,int cLevel,const ZSTD_compressionParameters * compressionParams,int displayLevel)1247*01826a49SYabin Cui int BMK_benchFiles(
1248*01826a49SYabin Cui         const char* const* fileNamesTable,
1249*01826a49SYabin Cui         unsigned nbFiles,
1250*01826a49SYabin Cui         const char* dictFileName,
1251*01826a49SYabin Cui         int cLevel,
1252*01826a49SYabin Cui         const ZSTD_compressionParameters* compressionParams,
1253*01826a49SYabin Cui         int displayLevel)
1254*01826a49SYabin Cui {
1255*01826a49SYabin Cui     BMK_advancedParams_t const adv = BMK_initAdvancedParams();
1256*01826a49SYabin Cui     return BMK_benchFilesAdvanced(
1257*01826a49SYabin Cui             fileNamesTable,
1258*01826a49SYabin Cui             nbFiles,
1259*01826a49SYabin Cui             dictFileName,
1260*01826a49SYabin Cui             cLevel,
1261*01826a49SYabin Cui             compressionParams,
1262*01826a49SYabin Cui             displayLevel,
1263*01826a49SYabin Cui             &adv);
1264*01826a49SYabin Cui }
1265