xref: /aosp_15_r20/external/zstd/tests/fuzz/zstd_helpers.c (revision 01826a4963a0d8a59bc3812d29bdf0fb76416722)
1*01826a49SYabin Cui /*
2*01826a49SYabin Cui  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*01826a49SYabin Cui  * All rights reserved.
4*01826a49SYabin Cui  *
5*01826a49SYabin Cui  * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui  * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui  * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui  */
10*01826a49SYabin Cui 
11*01826a49SYabin Cui #define ZSTD_STATIC_LINKING_ONLY
12*01826a49SYabin Cui #define ZDICT_STATIC_LINKING_ONLY
13*01826a49SYabin Cui 
14*01826a49SYabin Cui #include <string.h>
15*01826a49SYabin Cui 
16*01826a49SYabin Cui #include "zstd_helpers.h"
17*01826a49SYabin Cui #include "fuzz_helpers.h"
18*01826a49SYabin Cui #include "zstd.h"
19*01826a49SYabin Cui #include "zdict.h"
20*01826a49SYabin Cui #include "sequence_producer.h"
21*01826a49SYabin Cui #include "fuzz_third_party_seq_prod.h"
22*01826a49SYabin Cui 
23*01826a49SYabin Cui const int kMinClevel = -3;
24*01826a49SYabin Cui const int kMaxClevel = 19;
25*01826a49SYabin Cui 
26*01826a49SYabin Cui void* FUZZ_seqProdState = NULL;
27*01826a49SYabin Cui 
set(ZSTD_CCtx * cctx,ZSTD_cParameter param,int value)28*01826a49SYabin Cui static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value)
29*01826a49SYabin Cui {
30*01826a49SYabin Cui     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value));
31*01826a49SYabin Cui }
32*01826a49SYabin Cui 
produceParamValue(unsigned min,unsigned max,FUZZ_dataProducer_t * producer)33*01826a49SYabin Cui static unsigned produceParamValue(unsigned min, unsigned max,
34*01826a49SYabin Cui                                   FUZZ_dataProducer_t *producer) {
35*01826a49SYabin Cui     return FUZZ_dataProducer_uint32Range(producer, min, max);
36*01826a49SYabin Cui }
37*01826a49SYabin Cui 
setRand(ZSTD_CCtx * cctx,ZSTD_cParameter param,unsigned min,unsigned max,FUZZ_dataProducer_t * producer)38*01826a49SYabin Cui static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min,
39*01826a49SYabin Cui                     unsigned max, FUZZ_dataProducer_t *producer) {
40*01826a49SYabin Cui     unsigned const value = produceParamValue(min, max, producer);
41*01826a49SYabin Cui     set(cctx, param, value);
42*01826a49SYabin Cui }
43*01826a49SYabin Cui 
FUZZ_randomCParams(size_t srcSize,FUZZ_dataProducer_t * producer)44*01826a49SYabin Cui ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer)
45*01826a49SYabin Cui {
46*01826a49SYabin Cui     /* Select compression parameters */
47*01826a49SYabin Cui     ZSTD_compressionParameters cParams;
48*01826a49SYabin Cui     cParams.windowLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, 15);
49*01826a49SYabin Cui     cParams.hashLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_HASHLOG_MIN, 15);
50*01826a49SYabin Cui     cParams.chainLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_CHAINLOG_MIN, 16);
51*01826a49SYabin Cui     cParams.searchLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_SEARCHLOG_MIN, 9);
52*01826a49SYabin Cui     cParams.minMatch = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN,
53*01826a49SYabin Cui                                           ZSTD_MINMATCH_MAX);
54*01826a49SYabin Cui     cParams.targetLength = FUZZ_dataProducer_uint32Range(producer, 0, 512);
55*01826a49SYabin Cui     cParams.strategy = FUZZ_dataProducer_uint32Range(producer, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX);
56*01826a49SYabin Cui     return ZSTD_adjustCParams(cParams, srcSize, 0);
57*01826a49SYabin Cui }
58*01826a49SYabin Cui 
FUZZ_randomFParams(FUZZ_dataProducer_t * producer)59*01826a49SYabin Cui ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer)
60*01826a49SYabin Cui {
61*01826a49SYabin Cui     /* Select frame parameters */
62*01826a49SYabin Cui     ZSTD_frameParameters fParams;
63*01826a49SYabin Cui     fParams.contentSizeFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
64*01826a49SYabin Cui     fParams.checksumFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
65*01826a49SYabin Cui     fParams.noDictIDFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
66*01826a49SYabin Cui     return fParams;
67*01826a49SYabin Cui }
68*01826a49SYabin Cui 
FUZZ_randomParams(size_t srcSize,FUZZ_dataProducer_t * producer)69*01826a49SYabin Cui ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer)
70*01826a49SYabin Cui {
71*01826a49SYabin Cui     ZSTD_parameters params;
72*01826a49SYabin Cui     params.cParams = FUZZ_randomCParams(srcSize, producer);
73*01826a49SYabin Cui     params.fParams = FUZZ_randomFParams(producer);
74*01826a49SYabin Cui     return params;
75*01826a49SYabin Cui }
76*01826a49SYabin Cui 
setSequenceProducerParams(ZSTD_CCtx * cctx,FUZZ_dataProducer_t * producer)77*01826a49SYabin Cui static void setSequenceProducerParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) {
78*01826a49SYabin Cui #ifdef FUZZ_THIRD_PARTY_SEQ_PROD
79*01826a49SYabin Cui     ZSTD_registerSequenceProducer(
80*01826a49SYabin Cui         cctx,
81*01826a49SYabin Cui         FUZZ_seqProdState,
82*01826a49SYabin Cui         FUZZ_thirdPartySeqProd
83*01826a49SYabin Cui     );
84*01826a49SYabin Cui #else
85*01826a49SYabin Cui     ZSTD_registerSequenceProducer(
86*01826a49SYabin Cui         cctx,
87*01826a49SYabin Cui         NULL,
88*01826a49SYabin Cui         simpleSequenceProducer
89*01826a49SYabin Cui     );
90*01826a49SYabin Cui #endif
91*01826a49SYabin Cui 
92*01826a49SYabin Cui #ifdef FUZZ_THIRD_PARTY_SEQ_PROD
93*01826a49SYabin Cui     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableSeqProducerFallback, 1));
94*01826a49SYabin Cui #else
95*01826a49SYabin Cui     setRand(cctx, ZSTD_c_enableSeqProducerFallback, 0, 1, producer);
96*01826a49SYabin Cui #endif
97*01826a49SYabin Cui     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0));
98*01826a49SYabin Cui     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_disable));
99*01826a49SYabin Cui }
100*01826a49SYabin Cui 
FUZZ_setRandomParameters(ZSTD_CCtx * cctx,size_t srcSize,FUZZ_dataProducer_t * producer)101*01826a49SYabin Cui void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer)
102*01826a49SYabin Cui {
103*01826a49SYabin Cui     ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer);
104*01826a49SYabin Cui     set(cctx, ZSTD_c_windowLog, cParams.windowLog);
105*01826a49SYabin Cui     set(cctx, ZSTD_c_hashLog, cParams.hashLog);
106*01826a49SYabin Cui     set(cctx, ZSTD_c_chainLog, cParams.chainLog);
107*01826a49SYabin Cui     set(cctx, ZSTD_c_searchLog, cParams.searchLog);
108*01826a49SYabin Cui     set(cctx, ZSTD_c_minMatch, cParams.minMatch);
109*01826a49SYabin Cui     set(cctx, ZSTD_c_targetLength, cParams.targetLength);
110*01826a49SYabin Cui     set(cctx, ZSTD_c_strategy, cParams.strategy);
111*01826a49SYabin Cui     /* Select frame parameters */
112*01826a49SYabin Cui     setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, producer);
113*01826a49SYabin Cui     setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer);
114*01826a49SYabin Cui     setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer);
115*01826a49SYabin Cui     /* Select long distance matching parameters */
116*01826a49SYabin Cui     setRand(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_auto, ZSTD_ps_disable, producer);
117*01826a49SYabin Cui     setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer);
118*01826a49SYabin Cui     setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN,
119*01826a49SYabin Cui             ZSTD_LDM_MINMATCH_MAX, producer);
120*01826a49SYabin Cui     setRand(cctx, ZSTD_c_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX,
121*01826a49SYabin Cui             producer);
122*01826a49SYabin Cui     setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN,
123*01826a49SYabin Cui             ZSTD_LDM_HASHRATELOG_MAX, producer);
124*01826a49SYabin Cui     /* Set misc parameters */
125*01826a49SYabin Cui #ifndef ZSTD_MULTITHREAD
126*01826a49SYabin Cui     // To reproduce with or without ZSTD_MULTITHREAD, we are going to use
127*01826a49SYabin Cui     // the same amount of entropy.
128*01826a49SYabin Cui     unsigned const nbWorkers_value = produceParamValue(0, 2, producer);
129*01826a49SYabin Cui     unsigned const rsyncable_value = produceParamValue(0, 1, producer);
130*01826a49SYabin Cui     (void)nbWorkers_value;
131*01826a49SYabin Cui     (void)rsyncable_value;
132*01826a49SYabin Cui     set(cctx, ZSTD_c_nbWorkers, 0);
133*01826a49SYabin Cui     set(cctx, ZSTD_c_rsyncable, 0);
134*01826a49SYabin Cui #else
135*01826a49SYabin Cui     setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer);
136*01826a49SYabin Cui     setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer);
137*01826a49SYabin Cui #endif
138*01826a49SYabin Cui     setRand(cctx, ZSTD_c_useRowMatchFinder, 0, 2, producer);
139*01826a49SYabin Cui     setRand(cctx, ZSTD_c_enableDedicatedDictSearch, 0, 1, producer);
140*01826a49SYabin Cui     setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer);
141*01826a49SYabin Cui     setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer);
142*01826a49SYabin Cui     setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
143*01826a49SYabin Cui     setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer);
144*01826a49SYabin Cui     setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
145*01826a49SYabin Cui     setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer);
146*01826a49SYabin Cui     setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer);
147*01826a49SYabin Cui     setRand(cctx, ZSTD_c_validateSequences, 0, 1, producer);
148*01826a49SYabin Cui     setRand(cctx, ZSTD_c_searchForExternalRepcodes, 0, 2, producer);
149*01826a49SYabin Cui     if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
150*01826a49SYabin Cui       setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
151*01826a49SYabin Cui     }
152*01826a49SYabin Cui     if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
153*01826a49SYabin Cui       setRand(cctx, ZSTD_c_targetCBlockSize, ZSTD_TARGETCBLOCKSIZE_MIN, ZSTD_TARGETCBLOCKSIZE_MAX, producer);
154*01826a49SYabin Cui     }
155*01826a49SYabin Cui 
156*01826a49SYabin Cui #ifdef FUZZ_THIRD_PARTY_SEQ_PROD
157*01826a49SYabin Cui     setSequenceProducerParams(cctx, producer);
158*01826a49SYabin Cui #else
159*01826a49SYabin Cui     if (FUZZ_dataProducer_uint32Range(producer, 0, 10) == 1) {
160*01826a49SYabin Cui         setSequenceProducerParams(cctx, producer);
161*01826a49SYabin Cui     } else {
162*01826a49SYabin Cui         ZSTD_registerSequenceProducer(cctx, NULL, NULL);
163*01826a49SYabin Cui     }
164*01826a49SYabin Cui #endif
165*01826a49SYabin Cui }
166*01826a49SYabin Cui 
FUZZ_train(void const * src,size_t srcSize,FUZZ_dataProducer_t * producer)167*01826a49SYabin Cui FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer)
168*01826a49SYabin Cui {
169*01826a49SYabin Cui     size_t const dictSize = MAX(srcSize / 8, 1024);
170*01826a49SYabin Cui     size_t const totalSampleSize = dictSize * 11;
171*01826a49SYabin Cui     FUZZ_dict_t dict = { FUZZ_malloc(dictSize), dictSize };
172*01826a49SYabin Cui     char* const samples = (char*)FUZZ_malloc(totalSampleSize);
173*01826a49SYabin Cui     unsigned nbSamples = 100;
174*01826a49SYabin Cui     size_t* const samplesSizes = (size_t*)FUZZ_malloc(sizeof(size_t) * nbSamples);
175*01826a49SYabin Cui     size_t pos = 0;
176*01826a49SYabin Cui     size_t sample = 0;
177*01826a49SYabin Cui     ZDICT_fastCover_params_t params;
178*01826a49SYabin Cui 
179*01826a49SYabin Cui     for (sample = 0; sample < nbSamples; ++sample) {
180*01826a49SYabin Cui       size_t const remaining = totalSampleSize - pos;
181*01826a49SYabin Cui       size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1);
182*01826a49SYabin Cui       size_t const limit = MIN(srcSize - offset, remaining);
183*01826a49SYabin Cui       size_t const toCopy = MIN(limit, remaining / (nbSamples - sample));
184*01826a49SYabin Cui       memcpy(samples + pos, (const char*)src + offset, toCopy);
185*01826a49SYabin Cui       pos += toCopy;
186*01826a49SYabin Cui       samplesSizes[sample] = toCopy;
187*01826a49SYabin Cui     }
188*01826a49SYabin Cui     memset(samples + pos, 0, totalSampleSize - pos);
189*01826a49SYabin Cui 
190*01826a49SYabin Cui     memset(&params, 0, sizeof(params));
191*01826a49SYabin Cui     params.accel = 5;
192*01826a49SYabin Cui     params.k = 40;
193*01826a49SYabin Cui     params.d = 8;
194*01826a49SYabin Cui     params.f = 14;
195*01826a49SYabin Cui     params.zParams.compressionLevel = 1;
196*01826a49SYabin Cui     dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize,
197*01826a49SYabin Cui         samples, samplesSizes, nbSamples, params);
198*01826a49SYabin Cui     if (ZSTD_isError(dict.size)) {
199*01826a49SYabin Cui         free(dict.buff);
200*01826a49SYabin Cui         memset(&dict, 0, sizeof(dict));
201*01826a49SYabin Cui     }
202*01826a49SYabin Cui 
203*01826a49SYabin Cui     free(samplesSizes);
204*01826a49SYabin Cui     free(samples);
205*01826a49SYabin Cui 
206*01826a49SYabin Cui     return dict;
207*01826a49SYabin Cui }
208