xref: /aosp_15_r20/external/zstd/tests/seqgen.c (revision 01826a4963a0d8a59bc3812d29bdf0fb76416722)
1*01826a49SYabin Cui /*
2*01826a49SYabin Cui  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*01826a49SYabin Cui  * All rights reserved.
4*01826a49SYabin Cui  *
5*01826a49SYabin Cui  * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui  * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui  * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui  */
10*01826a49SYabin Cui 
11*01826a49SYabin Cui #include "seqgen.h"
12*01826a49SYabin Cui #include "mem.h"
13*01826a49SYabin Cui #include <string.h>
14*01826a49SYabin Cui 
15*01826a49SYabin Cui #define MIN(a, b)  ((a) < (b) ? (a) : (b))
16*01826a49SYabin Cui 
17*01826a49SYabin Cui static const size_t kMatchBytes = 128;
18*01826a49SYabin Cui 
19*01826a49SYabin Cui #define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
SEQ_randByte(unsigned * src)20*01826a49SYabin Cui static BYTE SEQ_randByte(unsigned* src)
21*01826a49SYabin Cui {
22*01826a49SYabin Cui     static const U32 prime1 = 2654435761U;
23*01826a49SYabin Cui     static const U32 prime2 = 2246822519U;
24*01826a49SYabin Cui     U32 rand32 = *src;
25*01826a49SYabin Cui     rand32 *= prime1;
26*01826a49SYabin Cui     rand32 ^= prime2;
27*01826a49SYabin Cui     rand32  = SEQ_rotl32(rand32, 13);
28*01826a49SYabin Cui     *src = rand32;
29*01826a49SYabin Cui     return (BYTE)(rand32 >> 5);
30*01826a49SYabin Cui }
31*01826a49SYabin Cui 
SEQ_initStream(unsigned seed)32*01826a49SYabin Cui SEQ_stream SEQ_initStream(unsigned seed)
33*01826a49SYabin Cui {
34*01826a49SYabin Cui     SEQ_stream stream;
35*01826a49SYabin Cui     stream.state = 0;
36*01826a49SYabin Cui     XXH64_reset(&stream.xxh, 0);
37*01826a49SYabin Cui     stream.seed = seed;
38*01826a49SYabin Cui     return stream;
39*01826a49SYabin Cui }
40*01826a49SYabin Cui 
41*01826a49SYabin Cui /* Generates a single guard byte, then match length + 1 of a different byte,
42*01826a49SYabin Cui  * then another guard byte.
43*01826a49SYabin Cui  */
SEQ_gen_matchLength(SEQ_stream * stream,unsigned value,SEQ_outBuffer * out)44*01826a49SYabin Cui static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value,
45*01826a49SYabin Cui                                   SEQ_outBuffer* out)
46*01826a49SYabin Cui {
47*01826a49SYabin Cui     typedef enum {
48*01826a49SYabin Cui         ml_first_byte = 0,
49*01826a49SYabin Cui         ml_match_bytes,
50*01826a49SYabin Cui         ml_last_byte,
51*01826a49SYabin Cui     } ml_state;
52*01826a49SYabin Cui     BYTE* const ostart = (BYTE*)out->dst;
53*01826a49SYabin Cui     BYTE* const oend = ostart + out->size;
54*01826a49SYabin Cui     BYTE* op = ostart + out->pos;
55*01826a49SYabin Cui 
56*01826a49SYabin Cui     switch ((ml_state)stream->state) {
57*01826a49SYabin Cui     case ml_first_byte:
58*01826a49SYabin Cui         /* Generate a single byte and pick a different byte for the match */
59*01826a49SYabin Cui         if (op >= oend) {
60*01826a49SYabin Cui             stream->bytesLeft = 1;
61*01826a49SYabin Cui             break;
62*01826a49SYabin Cui         }
63*01826a49SYabin Cui         *op = SEQ_randByte(&stream->seed) & 0xFF;
64*01826a49SYabin Cui         do {
65*01826a49SYabin Cui             stream->saved = SEQ_randByte(&stream->seed) & 0xFF;
66*01826a49SYabin Cui         } while (*op == stream->saved);
67*01826a49SYabin Cui         ++op;
68*01826a49SYabin Cui         /* State transition */
69*01826a49SYabin Cui         stream->state = ml_match_bytes;
70*01826a49SYabin Cui         stream->bytesLeft = value + 1;
71*01826a49SYabin Cui     /* fall-through */
72*01826a49SYabin Cui     case ml_match_bytes: {
73*01826a49SYabin Cui         /* Copy matchLength + 1 bytes to the output buffer */
74*01826a49SYabin Cui         size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
75*01826a49SYabin Cui         if (setLength > 0) {
76*01826a49SYabin Cui             memset(op, stream->saved, setLength);
77*01826a49SYabin Cui             op += setLength;
78*01826a49SYabin Cui             stream->bytesLeft -= setLength;
79*01826a49SYabin Cui         }
80*01826a49SYabin Cui         if (stream->bytesLeft > 0)
81*01826a49SYabin Cui             break;
82*01826a49SYabin Cui         /* State transition */
83*01826a49SYabin Cui         stream->state = ml_last_byte;
84*01826a49SYabin Cui     }
85*01826a49SYabin Cui     /* fall-through */
86*01826a49SYabin Cui     case ml_last_byte:
87*01826a49SYabin Cui         /* Generate a single byte and pick a different byte for the match */
88*01826a49SYabin Cui         if (op >= oend) {
89*01826a49SYabin Cui             stream->bytesLeft = 1;
90*01826a49SYabin Cui             break;
91*01826a49SYabin Cui         }
92*01826a49SYabin Cui         do {
93*01826a49SYabin Cui             *op = SEQ_randByte(&stream->seed) & 0xFF;
94*01826a49SYabin Cui         } while (*op == stream->saved);
95*01826a49SYabin Cui         ++op;
96*01826a49SYabin Cui         /* State transition */
97*01826a49SYabin Cui     /* fall-through */
98*01826a49SYabin Cui     default:
99*01826a49SYabin Cui         stream->state = 0;
100*01826a49SYabin Cui         stream->bytesLeft = 0;
101*01826a49SYabin Cui         break;
102*01826a49SYabin Cui     }
103*01826a49SYabin Cui     XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
104*01826a49SYabin Cui     out->pos = op - ostart;
105*01826a49SYabin Cui     return stream->bytesLeft;
106*01826a49SYabin Cui }
107*01826a49SYabin Cui 
108*01826a49SYabin Cui /* Saves the current seed then generates kMatchBytes random bytes >= 128.
109*01826a49SYabin Cui  * Generates literal length - kMatchBytes random bytes < 128.
110*01826a49SYabin Cui  * Generates another kMatchBytes using the saved seed to generate a match.
111*01826a49SYabin Cui  * This way the match is easy to find for the compressors.
112*01826a49SYabin Cui  */
SEQ_gen_litLength(SEQ_stream * stream,unsigned value,SEQ_outBuffer * out)113*01826a49SYabin Cui static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
114*01826a49SYabin Cui {
115*01826a49SYabin Cui     typedef enum {
116*01826a49SYabin Cui         ll_start = 0,
117*01826a49SYabin Cui         ll_run_bytes,
118*01826a49SYabin Cui         ll_literals,
119*01826a49SYabin Cui         ll_run_match,
120*01826a49SYabin Cui     } ll_state;
121*01826a49SYabin Cui     BYTE* const ostart = (BYTE*)out->dst;
122*01826a49SYabin Cui     BYTE* const oend = ostart + out->size;
123*01826a49SYabin Cui     BYTE* op = ostart + out->pos;
124*01826a49SYabin Cui 
125*01826a49SYabin Cui     switch ((ll_state)stream->state) {
126*01826a49SYabin Cui     case ll_start:
127*01826a49SYabin Cui         stream->state = ll_run_bytes;
128*01826a49SYabin Cui         stream->saved = stream->seed;
129*01826a49SYabin Cui         stream->bytesLeft = MIN(kMatchBytes, value);
130*01826a49SYabin Cui     /* fall-through */
131*01826a49SYabin Cui     case ll_run_bytes:
132*01826a49SYabin Cui         while (stream->bytesLeft > 0 && op < oend) {
133*01826a49SYabin Cui             *op++ = SEQ_randByte(&stream->seed) | 0x80;
134*01826a49SYabin Cui             --stream->bytesLeft;
135*01826a49SYabin Cui         }
136*01826a49SYabin Cui         if (stream->bytesLeft > 0)
137*01826a49SYabin Cui             break;
138*01826a49SYabin Cui         /* State transition */
139*01826a49SYabin Cui         stream->state = ll_literals;
140*01826a49SYabin Cui         stream->bytesLeft = value - MIN(kMatchBytes, value);
141*01826a49SYabin Cui     /* fall-through */
142*01826a49SYabin Cui     case ll_literals:
143*01826a49SYabin Cui         while (stream->bytesLeft > 0 && op < oend) {
144*01826a49SYabin Cui             *op++ = SEQ_randByte(&stream->seed) & 0x7F;
145*01826a49SYabin Cui             --stream->bytesLeft;
146*01826a49SYabin Cui         }
147*01826a49SYabin Cui         if (stream->bytesLeft > 0)
148*01826a49SYabin Cui             break;
149*01826a49SYabin Cui         /* State transition */
150*01826a49SYabin Cui         stream->state = ll_run_match;
151*01826a49SYabin Cui         stream->bytesLeft = MIN(kMatchBytes, value);
152*01826a49SYabin Cui     /* fall-through */
153*01826a49SYabin Cui     case ll_run_match: {
154*01826a49SYabin Cui         while (stream->bytesLeft > 0 && op < oend) {
155*01826a49SYabin Cui             *op++ = SEQ_randByte(&stream->saved) | 0x80;
156*01826a49SYabin Cui             --stream->bytesLeft;
157*01826a49SYabin Cui         }
158*01826a49SYabin Cui         if (stream->bytesLeft > 0)
159*01826a49SYabin Cui             break;
160*01826a49SYabin Cui     }
161*01826a49SYabin Cui     /* fall-through */
162*01826a49SYabin Cui     default:
163*01826a49SYabin Cui         stream->state = 0;
164*01826a49SYabin Cui         stream->bytesLeft = 0;
165*01826a49SYabin Cui         break;
166*01826a49SYabin Cui     }
167*01826a49SYabin Cui     XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
168*01826a49SYabin Cui     out->pos = op - ostart;
169*01826a49SYabin Cui     return stream->bytesLeft;
170*01826a49SYabin Cui }
171*01826a49SYabin Cui 
172*01826a49SYabin Cui /* Saves the current seed then generates kMatchBytes random bytes >= 128.
173*01826a49SYabin Cui  * Generates offset - kMatchBytes of zeros to get a large offset without
174*01826a49SYabin Cui  * polluting the hash tables.
175*01826a49SYabin Cui  * Generates another kMatchBytes using the saved seed to generate a with the
176*01826a49SYabin Cui  * required offset.
177*01826a49SYabin Cui  */
SEQ_gen_offset(SEQ_stream * stream,unsigned value,SEQ_outBuffer * out)178*01826a49SYabin Cui static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
179*01826a49SYabin Cui {
180*01826a49SYabin Cui     typedef enum {
181*01826a49SYabin Cui         of_start = 0,
182*01826a49SYabin Cui         of_run_bytes,
183*01826a49SYabin Cui         of_offset,
184*01826a49SYabin Cui         of_run_match,
185*01826a49SYabin Cui     } of_state;
186*01826a49SYabin Cui     BYTE* const ostart = (BYTE*)out->dst;
187*01826a49SYabin Cui     BYTE* const oend = ostart + out->size;
188*01826a49SYabin Cui     BYTE* op = ostart + out->pos;
189*01826a49SYabin Cui 
190*01826a49SYabin Cui     switch ((of_state)stream->state) {
191*01826a49SYabin Cui     case of_start:
192*01826a49SYabin Cui         stream->state = of_run_bytes;
193*01826a49SYabin Cui         stream->saved = stream->seed;
194*01826a49SYabin Cui         stream->bytesLeft = MIN(value, kMatchBytes);
195*01826a49SYabin Cui     /* fall-through */
196*01826a49SYabin Cui     case of_run_bytes: {
197*01826a49SYabin Cui         while (stream->bytesLeft > 0 && op < oend) {
198*01826a49SYabin Cui             *op++ = SEQ_randByte(&stream->seed) | 0x80;
199*01826a49SYabin Cui             --stream->bytesLeft;
200*01826a49SYabin Cui         }
201*01826a49SYabin Cui         if (stream->bytesLeft > 0)
202*01826a49SYabin Cui             break;
203*01826a49SYabin Cui         /* State transition */
204*01826a49SYabin Cui         stream->state = of_offset;
205*01826a49SYabin Cui         stream->bytesLeft = value - MIN(value, kMatchBytes);
206*01826a49SYabin Cui     }
207*01826a49SYabin Cui     /* fall-through */
208*01826a49SYabin Cui     case of_offset: {
209*01826a49SYabin Cui         /* Copy matchLength + 1 bytes to the output buffer */
210*01826a49SYabin Cui         size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
211*01826a49SYabin Cui         if (setLength > 0) {
212*01826a49SYabin Cui             memset(op, 0, setLength);
213*01826a49SYabin Cui             op += setLength;
214*01826a49SYabin Cui             stream->bytesLeft -= setLength;
215*01826a49SYabin Cui         }
216*01826a49SYabin Cui         if (stream->bytesLeft > 0)
217*01826a49SYabin Cui             break;
218*01826a49SYabin Cui         /* State transition */
219*01826a49SYabin Cui         stream->state = of_run_match;
220*01826a49SYabin Cui         stream->bytesLeft = MIN(value, kMatchBytes);
221*01826a49SYabin Cui     }
222*01826a49SYabin Cui     /* fall-through */
223*01826a49SYabin Cui     case of_run_match: {
224*01826a49SYabin Cui         while (stream->bytesLeft > 0 && op < oend) {
225*01826a49SYabin Cui             *op++ = SEQ_randByte(&stream->saved) | 0x80;
226*01826a49SYabin Cui             --stream->bytesLeft;
227*01826a49SYabin Cui         }
228*01826a49SYabin Cui         if (stream->bytesLeft > 0)
229*01826a49SYabin Cui             break;
230*01826a49SYabin Cui     }
231*01826a49SYabin Cui     /* fall-through */
232*01826a49SYabin Cui     default:
233*01826a49SYabin Cui         stream->state = 0;
234*01826a49SYabin Cui         stream->bytesLeft = 0;
235*01826a49SYabin Cui         break;
236*01826a49SYabin Cui     }
237*01826a49SYabin Cui     XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
238*01826a49SYabin Cui     out->pos = op - ostart;
239*01826a49SYabin Cui     return stream->bytesLeft;
240*01826a49SYabin Cui }
241*01826a49SYabin Cui 
242*01826a49SYabin Cui /* Returns the number of bytes left to generate.
243*01826a49SYabin Cui  * Must pass the same type/value until it returns 0.
244*01826a49SYabin Cui  */
SEQ_gen(SEQ_stream * stream,SEQ_gen_type type,unsigned value,SEQ_outBuffer * out)245*01826a49SYabin Cui size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out)
246*01826a49SYabin Cui {
247*01826a49SYabin Cui     switch (type) {
248*01826a49SYabin Cui         case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out);
249*01826a49SYabin Cui         case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out);
250*01826a49SYabin Cui         case SEQ_gen_of: return SEQ_gen_offset(stream, value, out);
251*01826a49SYabin Cui         case SEQ_gen_max: /* fall-through */
252*01826a49SYabin Cui         default: return 0;
253*01826a49SYabin Cui     }
254*01826a49SYabin Cui }
255*01826a49SYabin Cui 
256*01826a49SYabin Cui /* Returns the xxhash of the data produced so far */
SEQ_digest(SEQ_stream const * stream)257*01826a49SYabin Cui XXH64_hash_t SEQ_digest(SEQ_stream const* stream)
258*01826a49SYabin Cui {
259*01826a49SYabin Cui     return XXH64_digest(&stream->xxh);
260*01826a49SYabin Cui }
261