1 /*
2 LZ4io.c - LZ4 File/Stream Interface
3 Copyright (C) Yann Collet 2011-2024
4
5 GPL v2 License
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License along
18 with this program; if not, write to the Free Software Foundation, Inc.,
19 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20
21 You can contact the author at :
22 - LZ4 source repository : https://github.com/lz4/lz4
23 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
24 */
25 /*
26 Note : this is stand-alone program.
27 It is not part of LZ4 compression library, it is a user code of the LZ4 library.
28 - The license of LZ4 library is BSD.
29 - The license of xxHash library is BSD.
30 - The license of this source file is GPLv2.
31 */
32
33
34 /*-************************************
35 * Compiler options
36 **************************************/
37 #ifdef _MSC_VER /* Visual Studio */
38 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
39 #endif
40 #if defined(__MINGW32__) && !defined(_POSIX_SOURCE)
41 # define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */
42 #endif
43
44
45 /*****************************
46 * Includes
47 *****************************/
48 #include "platform.h" /* Large File Support, SET_BINARY_MODE, SET_SPARSE_FILE_MODE, PLATFORM_POSIX_VERSION, __64BIT__ */
49 #include "timefn.h" /* TIME_ */
50 #include "util.h" /* UTIL_getFileStat, UTIL_setFileStat */
51 #include <stdio.h> /* fprintf, fopen, fread, stdin, stdout, fflush, getchar */
52 #include <stdlib.h> /* malloc, free */
53 #include <string.h> /* strerror, strcmp, strlen */
54 #include <time.h> /* clock_t, for cpu-time */
55 #include <sys/types.h> /* stat64 */
56 #include <sys/stat.h> /* stat64 */
57 #include "lz4conf.h" /* compile-time constants */
58 #include "lz4io.h"
59 #include "lz4.h" /* required for legacy format */
60 #include "lz4hc.h" /* required for legacy format */
61 #define LZ4F_STATIC_LINKING_ONLY
62 #include "lz4frame.h" /* LZ4F_* */
63 #include "xxhash.h" /* frame checksum (MT mode) */
64
65
66 /*****************************
67 * Constants
68 *****************************/
69 #define KB *(1 <<10)
70 #define MB *(1 <<20)
71 #define GB *(1U<<30)
72
73 #define _1BIT 0x01
74 #define _2BITS 0x03
75 #define _3BITS 0x07
76 #define _4BITS 0x0F
77 #define _8BITS 0xFF
78
79 #define MAGICNUMBER_SIZE 4
80 #define LZ4IO_MAGICNUMBER 0x184D2204
81 #define LZ4IO_SKIPPABLE0 0x184D2A50
82 #define LZ4IO_SKIPPABLEMASK 0xFFFFFFF0
83 #define LEGACY_MAGICNUMBER 0x184C2102
84
85 #define CACHELINE 64
86 #define LEGACY_BLOCKSIZE (8 MB)
87 #define MIN_STREAM_BUFSIZE (192 KB)
88 #define LZ4IO_BLOCKSIZEID_DEFAULT 7
89 #define LZ4_MAX_DICT_SIZE (64 KB)
90
91 #undef MIN
92 #define MIN(a,b) ((a)<(b)?(a):(b))
93
94 /**************************************
95 * Time and Display
96 **************************************/
97 #define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
98 #define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__)
99 #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); if (g_displayLevel>=4) fflush(stderr); }
100 static int g_displayLevel = 0; /* 0 : no display ; 1: errors ; 2 : + result + interaction + warnings ; 3 : + progression; 4 : + information */
101
102 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
103 if ( (TIME_clockSpan_ns(g_time) > refreshRate) \
104 || (g_displayLevel>=4) ) { \
105 g_time = TIME_getTime(); \
106 DISPLAY(__VA_ARGS__); \
107 if (g_displayLevel>=4) fflush(stderr); \
108 } }
109 static const Duration_ns refreshRate = 200000000;
110 static TIME_t g_time = { 0 };
111
cpuLoad_sec(clock_t cpuStart)112 static double cpuLoad_sec(clock_t cpuStart)
113 {
114 #ifdef _WIN32
115 FILETIME creationTime, exitTime, kernelTime, userTime;
116 (void)cpuStart;
117 GetProcessTimes(GetCurrentProcess(), &creationTime, &exitTime, &kernelTime, &userTime);
118 assert(kernelTime.dwHighDateTime == 0);
119 assert(userTime.dwHighDateTime == 0);
120 return ((double)kernelTime.dwLowDateTime + (double)userTime.dwLowDateTime) * 100. / 1000000000.;
121 #else
122 return (double)(clock() - cpuStart) / CLOCKS_PER_SEC;
123 #endif
124 }
125
LZ4IO_finalTimeDisplay(TIME_t timeStart,clock_t cpuStart,unsigned long long size)126 static void LZ4IO_finalTimeDisplay(TIME_t timeStart, clock_t cpuStart, unsigned long long size)
127 {
128 #if LZ4IO_MULTITHREAD
129 if (!TIME_support_MT_measurements()) {
130 DISPLAYLEVEL(5, "time measurements not compatible with multithreading \n");
131 } else
132 #endif
133 {
134 Duration_ns duration_ns = TIME_clockSpan_ns(timeStart);
135 double const seconds = (double)(duration_ns + !duration_ns) / (double)1000000000.;
136 double const cpuLoad_s = cpuLoad_sec(cpuStart);
137 DISPLAYLEVEL(3,"Done in %.2f s ==> %.2f MiB/s (cpu load : %.0f%%)\n", seconds,
138 (double)size / seconds / 1024. / 1024.,
139 (cpuLoad_s / seconds) * 100.);
140 }
141 }
142
143 /**************************************
144 * Exceptions
145 ***************************************/
146 #ifndef DEBUG
147 # define DEBUG 0
148 #endif
149 #define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
150 #define END_PROCESS(error, ...) \
151 { \
152 DEBUGOUTPUT("Error in %s, line %i : \n", __FILE__, __LINE__); \
153 DISPLAYLEVEL(1, "Error %i : ", error); \
154 DISPLAYLEVEL(1, __VA_ARGS__); \
155 DISPLAYLEVEL(1, " \n"); \
156 fflush(NULL); \
157 exit(error); \
158 }
159
160 #define LZ4IO_STATIC_ASSERT(c) { enum { LZ4IO_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */
161
162
163 /* ************************************************** */
164 /* ****************** Init functions ******************** */
165 /* ************************************************** */
166
LZ4IO_defaultNbWorkers(void)167 int LZ4IO_defaultNbWorkers(void)
168 {
169 #if LZ4IO_MULTITHREAD
170 int const nbCores = UTIL_countCores();
171 int const spared = 1 + ((unsigned)nbCores >> 3);
172 if (nbCores <= spared) return 1;
173 return nbCores - spared;
174 #else
175 return 1;
176 #endif
177 }
178
179 /* ************************************************** */
180 /* ****************** Parameters ******************** */
181 /* ************************************************** */
182
183 struct LZ4IO_prefs_s {
184 int passThrough;
185 int overwrite;
186 int testMode;
187 int blockSizeId;
188 size_t blockSize;
189 int blockChecksum;
190 int streamChecksum;
191 int blockIndependence;
192 int sparseFileSupport;
193 int contentSizeFlag;
194 int useDictionary;
195 unsigned favorDecSpeed;
196 const char* dictionaryFilename;
197 int removeSrcFile;
198 int nbWorkers;
199 };
200
LZ4IO_freePreferences(LZ4IO_prefs_t * prefs)201 void LZ4IO_freePreferences(LZ4IO_prefs_t* prefs)
202 {
203 free(prefs);
204 }
205
LZ4IO_defaultPreferences(void)206 LZ4IO_prefs_t* LZ4IO_defaultPreferences(void)
207 {
208 LZ4IO_prefs_t* const prefs = (LZ4IO_prefs_t*)malloc(sizeof(*prefs));
209 if (!prefs) END_PROCESS(11, "Can't even allocate LZ4IO preferences");
210 prefs->passThrough = 0;
211 prefs->overwrite = 1;
212 prefs->testMode = 0;
213 prefs->blockSizeId = LZ4IO_BLOCKSIZEID_DEFAULT;
214 prefs->blockSize = 0;
215 prefs->blockChecksum = 0;
216 prefs->streamChecksum = 1;
217 prefs->blockIndependence = 1;
218 prefs->sparseFileSupport = 1;
219 prefs->contentSizeFlag = 0;
220 prefs->useDictionary = 0;
221 prefs->favorDecSpeed = 0;
222 prefs->dictionaryFilename = NULL;
223 prefs->removeSrcFile = 0;
224 prefs->nbWorkers = LZ4IO_defaultNbWorkers();
225 return prefs;
226 }
227
LZ4IO_setNbWorkers(LZ4IO_prefs_t * const prefs,int nbWorkers)228 int LZ4IO_setNbWorkers(LZ4IO_prefs_t* const prefs, int nbWorkers)
229 {
230 if (nbWorkers < 1 ) nbWorkers = 1;
231 nbWorkers = MIN(nbWorkers, LZ4_NBWORKERS_MAX);
232 prefs->nbWorkers = nbWorkers;
233 return nbWorkers;
234 }
235
LZ4IO_setDictionaryFilename(LZ4IO_prefs_t * const prefs,const char * dictionaryFilename)236 int LZ4IO_setDictionaryFilename(LZ4IO_prefs_t* const prefs, const char* dictionaryFilename)
237 {
238 prefs->dictionaryFilename = dictionaryFilename;
239 prefs->useDictionary = dictionaryFilename != NULL;
240 return prefs->useDictionary;
241 }
242
243 /* Default setting : passThrough = 0; return : passThrough mode (0/1) */
LZ4IO_setPassThrough(LZ4IO_prefs_t * const prefs,int yes)244 int LZ4IO_setPassThrough(LZ4IO_prefs_t* const prefs, int yes)
245 {
246 prefs->passThrough = (yes!=0);
247 return prefs->passThrough;
248 }
249
250 /* Default setting : overwrite = 1; return : overwrite mode (0/1) */
LZ4IO_setOverwrite(LZ4IO_prefs_t * const prefs,int yes)251 int LZ4IO_setOverwrite(LZ4IO_prefs_t* const prefs, int yes)
252 {
253 prefs->overwrite = (yes!=0);
254 return prefs->overwrite;
255 }
256
257 /* Default setting : testMode = 0; return : testMode (0/1) */
LZ4IO_setTestMode(LZ4IO_prefs_t * const prefs,int yes)258 int LZ4IO_setTestMode(LZ4IO_prefs_t* const prefs, int yes)
259 {
260 prefs->testMode = (yes!=0);
261 return prefs->testMode;
262 }
263
264 /* blockSizeID : valid values : 4-5-6-7 */
LZ4IO_setBlockSizeID(LZ4IO_prefs_t * const prefs,unsigned bsid)265 size_t LZ4IO_setBlockSizeID(LZ4IO_prefs_t* const prefs, unsigned bsid)
266 {
267 static const size_t blockSizeTable[] = { 64 KB, 256 KB, 1 MB, 4 MB };
268 static const unsigned minBlockSizeID = 4;
269 static const unsigned maxBlockSizeID = 7;
270 if ((bsid < minBlockSizeID) || (bsid > maxBlockSizeID)) return 0;
271 prefs->blockSizeId = (int)bsid;
272 prefs->blockSize = blockSizeTable[(unsigned)prefs->blockSizeId-minBlockSizeID];
273 return prefs->blockSize;
274 }
275
LZ4IO_setBlockSize(LZ4IO_prefs_t * const prefs,size_t blockSize)276 size_t LZ4IO_setBlockSize(LZ4IO_prefs_t* const prefs, size_t blockSize)
277 {
278 static const size_t minBlockSize = 32;
279 static const size_t maxBlockSize = 4 MB;
280 unsigned bsid = 0;
281 if (blockSize < minBlockSize) blockSize = minBlockSize;
282 if (blockSize > maxBlockSize) blockSize = maxBlockSize;
283 prefs->blockSize = blockSize;
284 blockSize--;
285 /* find which of { 64k, 256k, 1MB, 4MB } is closest to blockSize */
286 while (blockSize >>= 2)
287 bsid++;
288 if (bsid < 7) bsid = 7;
289 prefs->blockSizeId = (int)(bsid-3);
290 return prefs->blockSize;
291 }
292
293 /* Default setting : 1 == independent blocks */
LZ4IO_setBlockMode(LZ4IO_prefs_t * const prefs,LZ4IO_blockMode_t blockMode)294 int LZ4IO_setBlockMode(LZ4IO_prefs_t* const prefs, LZ4IO_blockMode_t blockMode)
295 {
296 prefs->blockIndependence = (blockMode == LZ4IO_blockIndependent);
297 return prefs->blockIndependence;
298 }
299
300 /* Default setting : 0 == no block checksum */
LZ4IO_setBlockChecksumMode(LZ4IO_prefs_t * const prefs,int enable)301 int LZ4IO_setBlockChecksumMode(LZ4IO_prefs_t* const prefs, int enable)
302 {
303 prefs->blockChecksum = (enable != 0);
304 return prefs->blockChecksum;
305 }
306
307 /* Default setting : 1 == checksum enabled */
LZ4IO_setStreamChecksumMode(LZ4IO_prefs_t * const prefs,int enable)308 int LZ4IO_setStreamChecksumMode(LZ4IO_prefs_t* const prefs, int enable)
309 {
310 prefs->streamChecksum = (enable != 0);
311 return prefs->streamChecksum;
312 }
313
314 /* Default setting : 0 (no notification) */
LZ4IO_setNotificationLevel(int level)315 int LZ4IO_setNotificationLevel(int level)
316 {
317 g_displayLevel = level;
318 return g_displayLevel;
319 }
320
321 /* Default setting : 1 (auto: enabled on file, disabled on stdout) */
LZ4IO_setSparseFile(LZ4IO_prefs_t * const prefs,int enable)322 int LZ4IO_setSparseFile(LZ4IO_prefs_t* const prefs, int enable)
323 {
324 prefs->sparseFileSupport = 2*(enable!=0); /* 2==force enable */
325 return prefs->sparseFileSupport;
326 }
327
328 /* Default setting : 0 (disabled) */
LZ4IO_setContentSize(LZ4IO_prefs_t * const prefs,int enable)329 int LZ4IO_setContentSize(LZ4IO_prefs_t* const prefs, int enable)
330 {
331 prefs->contentSizeFlag = (enable!=0);
332 return prefs->contentSizeFlag;
333 }
334
335 /* Default setting : 0 (disabled) */
LZ4IO_favorDecSpeed(LZ4IO_prefs_t * const prefs,int favor)336 void LZ4IO_favorDecSpeed(LZ4IO_prefs_t* const prefs, int favor)
337 {
338 prefs->favorDecSpeed = (favor!=0);
339 }
340
LZ4IO_setRemoveSrcFile(LZ4IO_prefs_t * const prefs,unsigned flag)341 void LZ4IO_setRemoveSrcFile(LZ4IO_prefs_t* const prefs, unsigned flag)
342 {
343 prefs->removeSrcFile = (flag>0);
344 }
345
346
347 /* ************************************************************************ **
348 ** ********************** String functions ********************* **
349 ** ************************************************************************ */
350
LZ4IO_isDevNull(const char * s)351 static int LZ4IO_isDevNull(const char* s)
352 {
353 return UTIL_sameString(s, nulmark);
354 }
355
LZ4IO_isStdin(const char * s)356 static int LZ4IO_isStdin(const char* s)
357 {
358 return UTIL_sameString(s, stdinmark);
359 }
360
LZ4IO_isStdout(const char * s)361 static int LZ4IO_isStdout(const char* s)
362 {
363 return UTIL_sameString(s, stdoutmark);
364 }
365
366
367 /* ************************************************************************ **
368 ** ********************** LZ4 File / Pipe compression ********************* **
369 ** ************************************************************************ */
370
LZ4IO_isSkippableMagicNumber(unsigned int magic)371 static int LZ4IO_isSkippableMagicNumber(unsigned int magic) {
372 return (magic & LZ4IO_SKIPPABLEMASK) == LZ4IO_SKIPPABLE0;
373 }
374
375
376 /** LZ4IO_openSrcFile() :
377 * condition : `srcFileName` must be non-NULL.
378 * @result : FILE* to `dstFileName`, or NULL if it fails */
LZ4IO_openSrcFile(const char * srcFileName)379 static FILE* LZ4IO_openSrcFile(const char* srcFileName)
380 {
381 FILE* f;
382
383 if (LZ4IO_isStdin(srcFileName)) {
384 DISPLAYLEVEL(4,"Using stdin for input \n");
385 f = stdin;
386 SET_BINARY_MODE(stdin);
387 return f;
388 }
389
390 if (UTIL_isDirectory(srcFileName)) {
391 DISPLAYLEVEL(1, "lz4: %s is a directory -- ignored \n", srcFileName);
392 return NULL;
393 }
394
395 f = fopen(srcFileName, "rb");
396 if (f==NULL) DISPLAYLEVEL(1, "%s: %s \n", srcFileName, strerror(errno));
397 return f;
398 }
399
400 /** FIO_openDstFile() :
401 * prefs is writable, because sparseFileSupport might be updated.
402 * condition : `dstFileName` must be non-NULL.
403 * @result : FILE* to `dstFileName`, or NULL if it fails */
404 static FILE*
LZ4IO_openDstFile(const char * dstFileName,const LZ4IO_prefs_t * const prefs)405 LZ4IO_openDstFile(const char* dstFileName, const LZ4IO_prefs_t* const prefs)
406 {
407 FILE* f;
408 assert(dstFileName != NULL);
409
410 if (LZ4IO_isStdout(dstFileName)) {
411 DISPLAYLEVEL(4, "Using stdout for output \n");
412 f = stdout;
413 SET_BINARY_MODE(stdout);
414 if (prefs->sparseFileSupport==1) {
415 DISPLAYLEVEL(4, "Sparse File Support automatically disabled on stdout ;"
416 " to force-enable it, add --sparse command \n");
417 }
418 } else {
419 if (!prefs->overwrite && !LZ4IO_isDevNull(dstFileName)) {
420 /* Check if destination file already exists */
421 FILE* const testf = fopen( dstFileName, "rb" );
422 if (testf != NULL) { /* dest exists, prompt for overwrite authorization */
423 fclose(testf);
424 if (g_displayLevel <= 1) { /* No interaction possible */
425 DISPLAY("%s already exists; not overwritten \n", dstFileName);
426 return NULL;
427 }
428 DISPLAY("%s already exists; do you want to overwrite (y/N) ? ", dstFileName);
429 { int ch = getchar();
430 if ((ch!='Y') && (ch!='y')) {
431 DISPLAY(" not overwritten \n");
432 return NULL;
433 }
434 while ((ch!=EOF) && (ch!='\n')) ch = getchar(); /* flush rest of input line */
435 } } }
436 f = fopen( dstFileName, "wb" );
437 if (f==NULL) DISPLAYLEVEL(1, "%s: %s\n", dstFileName, strerror(errno));
438 }
439
440 /* sparse file */
441 { int const sparseMode = (prefs->sparseFileSupport - (f==stdout)) > 0;
442 if (f && sparseMode) { SET_SPARSE_FILE_MODE(f); }
443 }
444
445 return f;
446 }
447
448
449 /***************************************
450 * MT I/O
451 ***************************************/
452
453 #include "threadpool.h"
454
455 typedef struct {
456 void* buf;
457 size_t size;
458 unsigned long long rank;
459 } BufferDesc;
460
461 typedef struct {
462 unsigned long long expectedRank;
463 BufferDesc* buffers;
464 size_t capacity;
465 size_t blockSize;
466 unsigned long long totalCSize;
467 } WriteRegister;
468
WR_destroy(WriteRegister * wr)469 static void WR_destroy(WriteRegister* wr)
470 {
471 free(wr->buffers);
472 }
473
474 #define WR_INITIAL_BUFFER_POOL_SIZE 16
475 /* Note: WR_init() can fail (allocation)
476 * check that wr->buffers!= NULL for success */
WR_init(size_t blockSize)477 static WriteRegister WR_init(size_t blockSize)
478 {
479 WriteRegister wr = { 0, NULL, WR_INITIAL_BUFFER_POOL_SIZE, 0, 0 };
480 wr.buffers = (BufferDesc*)calloc(1, WR_INITIAL_BUFFER_POOL_SIZE * sizeof(BufferDesc));
481 wr.blockSize = blockSize;
482 return wr;
483 }
484
WR_addBufDesc(WriteRegister * wr,const BufferDesc * bd)485 static void WR_addBufDesc(WriteRegister* wr, const BufferDesc* bd)
486 {
487 if (wr->buffers[wr->capacity-1].buf != NULL) {
488 /* buffer capacity is full : extend it */
489 size_t const oldCapacity = wr->capacity;
490 size_t const addedCapacity = MIN(oldCapacity, 256);
491 size_t const newCapacity = oldCapacity + addedCapacity;
492 size_t const newSize = newCapacity * sizeof(BufferDesc);
493 void* const newBuf = realloc(wr->buffers, newSize);
494 if (newBuf == NULL) {
495 END_PROCESS(39, "cannot extend register of buffers")
496 }
497 wr->buffers = (BufferDesc*)newBuf;
498 memset(wr->buffers + oldCapacity, 0, addedCapacity * sizeof(BufferDesc));
499 wr->buffers[oldCapacity] = bd[0];
500 wr->capacity = newCapacity;
501 } else {
502 /* at least one position (the last one) is free, i.e. buffer==NULL */
503 size_t n;
504 for (n=0; n<wr->capacity; n++) {
505 if (wr->buffers[n].buf == NULL) {
506 wr->buffers[n] = bd[0];
507 break;
508 }
509 }
510 assert(n != wr->capacity);
511 }
512 }
513
WR_isPresent(WriteRegister * wr,unsigned long long id)514 static int WR_isPresent(WriteRegister* wr, unsigned long long id)
515 {
516 size_t n;
517 for (n=0; n<wr->capacity; n++) {
518 if (wr->buffers[n].buf == NULL) {
519 /* no more buffers stored */
520 return 0;
521 }
522 if (wr->buffers[n].rank == id)
523 return 1;
524 }
525 return 0;
526 }
527
528 /* Note: requires @id to exist! */
WR_getBufID(WriteRegister * wr,unsigned long long id)529 static BufferDesc WR_getBufID(WriteRegister* wr, unsigned long long id)
530 {
531 size_t n;
532 for (n=0; n<wr->capacity; n++) {
533 if (wr->buffers[n].buf == NULL) {
534 /* no more buffers stored */
535 break;
536 }
537 if (wr->buffers[n].rank == id)
538 return wr->buffers[n];
539 }
540 END_PROCESS(41, "buffer ID not found");
541 }
542
WR_removeBuffID(WriteRegister * wr,unsigned long long id)543 static void WR_removeBuffID(WriteRegister* wr, unsigned long long id)
544 {
545 size_t n;
546 for (n=0; n<wr->capacity; n++) {
547 if (wr->buffers[n].buf == NULL) {
548 /* no more buffers stored */
549 return;
550 }
551 if (wr->buffers[n].rank == id) {
552 free(wr->buffers[n].buf);
553 break;
554 }
555 }
556 /* overwrite buffer descriptor, scale others down*/
557 n++;
558 for (; n < wr->capacity; n++) {
559 wr->buffers[n-1] = wr->buffers[n];
560 if (wr->buffers[n].buf == NULL)
561 return;
562 }
563 { BufferDesc const nullBd = { NULL, 0, 0 };
564 wr->buffers[wr->capacity-1] = nullBd;
565 }
566 }
567
568 typedef struct {
569 WriteRegister* wr;
570 void* cBuf;
571 size_t cSize;
572 unsigned long long blockNb;
573 FILE* out;
574 } WriteJobDesc;
575
LZ4IO_writeBuffer(BufferDesc bufDesc,FILE * out)576 static void LZ4IO_writeBuffer(BufferDesc bufDesc, FILE* out)
577 {
578 size_t const size = bufDesc.size;
579 if (fwrite(bufDesc.buf, 1, size, out) != size) {
580 END_PROCESS(38, "Write error : cannot write compressed block");
581 }
582 }
583
LZ4IO_checkWriteOrder(void * arg)584 static void LZ4IO_checkWriteOrder(void* arg)
585 {
586 WriteJobDesc* const wjd = (WriteJobDesc*)arg;
587 size_t const cSize = wjd->cSize;
588 WriteRegister* const wr = wjd->wr;
589
590 if (wjd->blockNb != wr->expectedRank) {
591 /* incorrect order : let's store this buffer for later write */
592 BufferDesc bd;
593 bd.buf = wjd->cBuf;
594 bd.size = wjd->cSize;
595 bd.rank = wjd->blockNb;
596 WR_addBufDesc(wr, &bd);
597 free(wjd); /* because wjd is pod */
598 return;
599 }
600
601 /* expected block ID : let's write this block */
602 { BufferDesc bd;
603 bd.buf = wjd->cBuf;
604 bd.size = wjd->cSize;
605 bd.rank = wjd->blockNb;
606 LZ4IO_writeBuffer(bd, wjd->out);
607 }
608 wr->expectedRank++;
609 wr->totalCSize += cSize;
610 free(wjd->cBuf);
611 /* and check for more blocks, previously saved */
612 while (WR_isPresent(wr, wr->expectedRank)) {
613 BufferDesc const bd = WR_getBufID(wr, wr->expectedRank);
614 LZ4IO_writeBuffer(bd, wjd->out);
615 wr->totalCSize += bd.size;
616 WR_removeBuffID(wr, wr->expectedRank);
617 wr->expectedRank++;
618 }
619 free(wjd); /* because wjd is pod */
620 { unsigned long long const processedSize = (unsigned long long)(wr->expectedRank-1) * wr->blockSize;
621 DISPLAYUPDATE(2, "\rRead : %u MiB ==> %.2f%% ",
622 (unsigned)(processedSize >> 20),
623 (double)wr->totalCSize / (double)processedSize * 100.);
624 }
625 }
626
627 typedef size_t (*compress_f)(
628 const void* parameters,
629 void* dst,
630 size_t dstCapacity,
631 const void* src,
632 size_t srcSize,
633 size_t prefixSize);
634
635 typedef struct {
636 TPool* wpool;
637 void* buffer;
638 size_t prefixSize;
639 size_t inSize;
640 unsigned long long blockNb;
641 compress_f compress;
642 const void* compressParameters;
643 FILE* fout;
644 WriteRegister* wr;
645 size_t maxCBlockSize;
646 int lastBlock;
647 } CompressJobDesc;
648
LZ4IO_compressChunk(void * arg)649 static void LZ4IO_compressChunk(void* arg)
650 {
651 CompressJobDesc* const cjd = (CompressJobDesc*)arg;
652 size_t const outCapacity = cjd->maxCBlockSize;
653 void* const out_buff = malloc(outCapacity);
654 if (!out_buff)
655 END_PROCESS(33, "Allocation error : can't allocate output buffer to compress new chunk");
656 { char* const inBuff = (char*)cjd->buffer + cjd->prefixSize;
657 size_t const cSize = cjd->compress(cjd->compressParameters, out_buff, outCapacity, inBuff, cjd->inSize, cjd->prefixSize);
658
659 /* check for write */
660 { WriteJobDesc* const wjd = (WriteJobDesc*)malloc(sizeof(*wjd));
661 if (wjd == NULL) {
662 END_PROCESS(35, "Allocation error : can't describe new write job");
663 }
664 wjd->cBuf = out_buff;
665 wjd->cSize = (size_t)cSize;
666 wjd->blockNb = cjd->blockNb;
667 wjd->out = cjd->fout;
668 wjd->wr = cjd->wr;
669 TPool_submitJob(cjd->wpool, LZ4IO_checkWriteOrder, wjd);
670 } }
671 }
672
LZ4IO_compressAndFreeChunk(void * arg)673 static void LZ4IO_compressAndFreeChunk(void* arg)
674 {
675 CompressJobDesc* const cjd = (CompressJobDesc*)arg;
676 LZ4IO_compressChunk(arg);
677 /* clean up */
678 free(cjd->buffer);
679 free(cjd); /* because cjd is pod */
680 }
681
682 /* one ReadTracker per file to compress */
683 typedef struct {
684 TPool* tPool;
685 TPool* wpool;
686 FILE* fin;
687 size_t chunkSize;
688 unsigned long long totalReadSize;
689 unsigned long long blockNb;
690 XXH32_state_t* xxh32;
691 compress_f compress;
692 const void* compressParameters;
693 void* prefix; /* if it exists, assumed to be filled with 64 KB */
694 FILE* fout;
695 WriteRegister* wr;
696 size_t maxCBlockSize;
697 } ReadTracker;
698
LZ4IO_readAndProcess(void * arg)699 static void LZ4IO_readAndProcess(void* arg)
700 {
701 ReadTracker* const rjd = (ReadTracker*)arg;
702 size_t const chunkSize = rjd->chunkSize;
703 size_t const prefixSize = (rjd->prefix != NULL) * 64 KB;
704 size_t const bufferSize = chunkSize + prefixSize;
705 void* const buffer = malloc(bufferSize);
706 if (!buffer)
707 END_PROCESS(31, "Allocation error : can't allocate buffer to read new chunk");
708 if (prefixSize) {
709 assert(prefixSize == 64 KB);
710 memcpy(buffer, rjd->prefix, prefixSize);
711 }
712 { char* const in_buff = (char*)buffer + prefixSize;
713 size_t const inSize = fread(in_buff, (size_t)1, chunkSize, rjd->fin);
714 if (inSize > chunkSize) {
715 END_PROCESS(32, "Read error (read %u > %u [chunk size])", (unsigned)inSize, (unsigned)chunkSize);
716 }
717 rjd->totalReadSize += inSize;
718 /* special case: nothing left: stop read operation */
719 if (inSize == 0) {
720 free(buffer);
721 return;
722 }
723 /* process read input */
724 { CompressJobDesc* const cjd = (CompressJobDesc*)malloc(sizeof(*cjd));
725 if (cjd==NULL) {
726 END_PROCESS(33, "Allocation error : can't describe new compression job");
727 }
728 if (rjd->xxh32) {
729 XXH32_update(rjd->xxh32, in_buff, inSize);
730 }
731 if (rjd->prefix) {
732 /* dependent blocks mode */
733 memcpy(rjd->prefix, in_buff + inSize - 64 KB, 64 KB);
734 }
735 cjd->wpool = rjd->wpool;
736 cjd->buffer = buffer; /* transfer ownership */
737 cjd->prefixSize = prefixSize;
738 cjd->inSize = inSize;
739 cjd->blockNb = rjd->blockNb;
740 cjd->compress = rjd->compress;
741 cjd->compressParameters = rjd->compressParameters;
742 cjd->fout = rjd->fout;
743 cjd->wr = rjd->wr;
744 cjd->maxCBlockSize = rjd->maxCBlockSize;
745 cjd->lastBlock = inSize < chunkSize;
746 TPool_submitJob(rjd->tPool, LZ4IO_compressAndFreeChunk, cjd);
747 if (inSize == chunkSize) {
748 /* likely more => read another chunk */
749 rjd->blockNb++;
750 TPool_submitJob(rjd->tPool, LZ4IO_readAndProcess, rjd);
751 } } }
752 }
753
754
755 /***************************************
756 * Legacy Compression
757 ***************************************/
758
759 /* Size in bytes of a legacy block header in little-endian format */
760 #define LZ4IO_LEGACY_BLOCK_HEADER_SIZE 4
761 #define LZ4IO_LEGACY_BLOCK_SIZE_MAX (8 MB)
762
763 /* unoptimized version; solves endianness & alignment issues */
LZ4IO_writeLE32(void * p,unsigned value32)764 static void LZ4IO_writeLE32 (void* p, unsigned value32)
765 {
766 unsigned char* const dstPtr = (unsigned char*)p;
767 dstPtr[0] = (unsigned char)value32;
768 dstPtr[1] = (unsigned char)(value32 >> 8);
769 dstPtr[2] = (unsigned char)(value32 >> 16);
770 dstPtr[3] = (unsigned char)(value32 >> 24);
771 }
772
773
774 typedef struct {
775 int cLevel;
776 } CompressLegacyState;
777
LZ4IO_compressBlockLegacy_fast(const void * params,void * dst,size_t dstCapacity,const void * src,size_t srcSize,size_t prefixSize)778 static size_t LZ4IO_compressBlockLegacy_fast(
779 const void* params,
780 void* dst,
781 size_t dstCapacity,
782 const void* src,
783 size_t srcSize,
784 size_t prefixSize
785 )
786 {
787 const CompressLegacyState* const clevel = (const CompressLegacyState*)params;
788 int const acceleration = (clevel->cLevel < 0) ? -clevel->cLevel : 0;
789 int const cSize = LZ4_compress_fast((const char*)src, (char*)dst + LZ4IO_LEGACY_BLOCK_HEADER_SIZE, (int)srcSize, (int)dstCapacity, acceleration);
790 if (cSize < 0)
791 END_PROCESS(51, "fast compression failed");
792 LZ4IO_writeLE32(dst, (unsigned)cSize);
793 assert(prefixSize == 0); (void)prefixSize;
794 return (size_t) cSize + LZ4IO_LEGACY_BLOCK_HEADER_SIZE;
795 }
796
LZ4IO_compressBlockLegacy_HC(const void * params,void * dst,size_t dstCapacity,const void * src,size_t srcSize,size_t prefixSize)797 static size_t LZ4IO_compressBlockLegacy_HC(
798 const void* params,
799 void* dst,
800 size_t dstCapacity,
801 const void* src,
802 size_t srcSize,
803 size_t prefixSize
804 )
805 {
806 const CompressLegacyState* const cs = (const CompressLegacyState*)params;
807 int const clevel = cs->cLevel;
808 int const cSize = LZ4_compress_HC((const char*)src, (char*)dst + LZ4IO_LEGACY_BLOCK_HEADER_SIZE, (int)srcSize, (int)dstCapacity, clevel);
809 if (cSize < 0)
810 END_PROCESS(52, "HC compression failed");
811 LZ4IO_writeLE32(dst, (unsigned)cSize);
812 assert(prefixSize == 0); (void)prefixSize;
813 return (size_t) cSize + LZ4IO_LEGACY_BLOCK_HEADER_SIZE;
814 }
815
816 /* LZ4IO_compressLegacy_internal :
817 * Implementation of LZ4IO_compressFilename_Legacy.
818 * @return: 0 if success, !0 if error
819 */
LZ4IO_compressLegacy_internal(unsigned long long * readSize,const char * input_filename,const char * output_filename,int compressionlevel,const LZ4IO_prefs_t * prefs)820 static int LZ4IO_compressLegacy_internal(unsigned long long* readSize,
821 const char* input_filename,
822 const char* output_filename,
823 int compressionlevel,
824 const LZ4IO_prefs_t* prefs)
825 {
826 int clResult = 0;
827 compress_f const compressionFunction = (compressionlevel < 3) ? LZ4IO_compressBlockLegacy_fast : LZ4IO_compressBlockLegacy_HC;
828 FILE* const finput = LZ4IO_openSrcFile(input_filename);
829 FILE* foutput = NULL;
830 TPool* const tPool = TPool_create(prefs->nbWorkers, 4);
831 TPool* const wPool = TPool_create(1, 4);
832 WriteRegister wr = WR_init(LEGACY_BLOCKSIZE);
833
834 /* Init & checks */
835 *readSize = 0;
836 if (finput == NULL) {
837 /* read file error : recoverable */
838 clResult = 1;
839 goto _cfl_clean;
840 }
841 foutput = LZ4IO_openDstFile(output_filename, prefs);
842 if (foutput == NULL) {
843 /* write file error : recoverable */
844 clResult = 1;
845 goto _cfl_clean;
846 }
847 if (tPool == NULL || wPool == NULL)
848 END_PROCESS(21, "threadpool creation error ");
849 if (wr.buffers == NULL)
850 END_PROCESS(22, "can't allocate write register");
851
852
853 /* Write Archive Header */
854 { char outHeader[MAGICNUMBER_SIZE];
855 LZ4IO_writeLE32(outHeader, LEGACY_MAGICNUMBER);
856 if (fwrite(outHeader, 1, MAGICNUMBER_SIZE, foutput) != MAGICNUMBER_SIZE)
857 END_PROCESS(23, "Write error : cannot write header");
858 }
859 wr.totalCSize = MAGICNUMBER_SIZE;
860
861 { CompressLegacyState cls;
862 ReadTracker rjd;
863 cls.cLevel = compressionlevel;
864 rjd.tPool = tPool;
865 rjd.wpool = wPool;
866 rjd.fin = finput;
867 rjd.chunkSize = LEGACY_BLOCKSIZE;
868 rjd.totalReadSize = 0;
869 rjd.blockNb = 0;
870 rjd.xxh32 = NULL;
871 rjd.compress = compressionFunction;
872 rjd.compressParameters = &cls;
873 rjd.prefix = NULL;
874 rjd.fout = foutput;
875 rjd.wr = ≀
876 rjd.maxCBlockSize = (size_t)LZ4_compressBound(LEGACY_BLOCKSIZE) + LZ4IO_LEGACY_BLOCK_HEADER_SIZE;
877 /* Ignite the job chain */
878 TPool_submitJob(tPool, LZ4IO_readAndProcess, &rjd);
879 /* Wait for all completion */
880 TPool_jobsCompleted(tPool);
881 TPool_jobsCompleted(wPool);
882
883 /* Status */
884 DISPLAYLEVEL(2, "\r%79s\r", ""); /* blank line */
885 DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%% \n",
886 rjd.totalReadSize, wr.totalCSize,
887 (double)wr.totalCSize / (double)(rjd.totalReadSize + !rjd.totalReadSize) * 100.);
888 *readSize = rjd.totalReadSize;
889 }
890
891 /* Close & Free */
892 _cfl_clean:
893 WR_destroy(&wr);
894 TPool_free(wPool);
895 TPool_free(tPool);
896 if (finput) fclose(finput);
897 if (foutput && !LZ4IO_isStdout(output_filename)) fclose(foutput); /* do not close stdout */
898
899 return clResult;
900 }
901
902 /* LZ4IO_compressFilename_Legacy :
903 * This function is intentionally "hidden" (not published in .h)
904 * It generates compressed streams using the old 'legacy' format
905 * @return: 0 if success, !0 if error
906 */
LZ4IO_compressFilename_Legacy(const char * input_filename,const char * output_filename,int compressionlevel,const LZ4IO_prefs_t * prefs)907 int LZ4IO_compressFilename_Legacy(const char* input_filename,
908 const char* output_filename,
909 int compressionlevel,
910 const LZ4IO_prefs_t* prefs)
911 {
912 TIME_t const timeStart = TIME_getTime();
913 clock_t const cpuStart = clock();
914 unsigned long long processed = 0;
915 int r = LZ4IO_compressLegacy_internal(&processed, input_filename, output_filename, compressionlevel, prefs);
916 LZ4IO_finalTimeDisplay(timeStart, cpuStart, processed);
917 return r;
918 }
919
920 #define FNSPACE 30
921 /* LZ4IO_compressMultipleFilenames_Legacy :
922 * This function is intentionally "hidden" (not published in .h)
923 * It generates multiple compressed streams using the old 'legacy' format */
LZ4IO_compressMultipleFilenames_Legacy(const char ** inFileNamesTable,int ifntSize,const char * suffix,int compressionLevel,const LZ4IO_prefs_t * prefs)924 int LZ4IO_compressMultipleFilenames_Legacy(
925 const char** inFileNamesTable, int ifntSize,
926 const char* suffix,
927 int compressionLevel, const LZ4IO_prefs_t* prefs)
928 {
929 TIME_t const timeStart = TIME_getTime();
930 clock_t const cpuStart = clock();
931 unsigned long long totalProcessed = 0;
932 int i;
933 int missed_files = 0;
934 char* dstFileName = (char*)malloc(FNSPACE);
935 size_t ofnSize = FNSPACE;
936 const size_t suffixSize = strlen(suffix);
937
938 if (dstFileName == NULL) return ifntSize; /* not enough memory */
939
940 /* loop on each file */
941 for (i=0; i<ifntSize; i++) {
942 unsigned long long processed = 0;
943 size_t const ifnSize = strlen(inFileNamesTable[i]);
944 if (LZ4IO_isStdout(suffix)) {
945 missed_files += LZ4IO_compressLegacy_internal(&processed,
946 inFileNamesTable[i], stdoutmark,
947 compressionLevel, prefs);
948 totalProcessed += processed;
949 continue;
950 }
951
952 if (ofnSize <= ifnSize+suffixSize+1) {
953 free(dstFileName);
954 ofnSize = ifnSize + 20;
955 dstFileName = (char*)malloc(ofnSize);
956 if (dstFileName==NULL) {
957 return ifntSize;
958 } }
959 strcpy(dstFileName, inFileNamesTable[i]);
960 strcat(dstFileName, suffix);
961
962 missed_files += LZ4IO_compressLegacy_internal(&processed,
963 inFileNamesTable[i], dstFileName,
964 compressionLevel, prefs);
965 totalProcessed += processed;
966 }
967
968 /* Close & Free */
969 LZ4IO_finalTimeDisplay(timeStart, cpuStart, totalProcessed);
970 free(dstFileName);
971
972 return missed_files;
973 }
974
975 /*********************************************
976 * Compression using Frame format
977 *********************************************/
978 typedef struct {
979 void* srcBuffer;
980 size_t srcBufferSize;
981 void* dstBuffer;
982 size_t dstBufferSize;
983 LZ4F_compressionContext_t ctx;
984 LZ4F_preferences_t preparedPrefs;
985 LZ4F_CDict* cdict;
986 TPool* tPool;
987 TPool* wPool; /* writer thread */
988 } cRess_t;
989
LZ4IO_freeCResources(cRess_t ress)990 static void LZ4IO_freeCResources(cRess_t ress)
991 {
992 TPool_free(ress.tPool);
993 TPool_free(ress.wPool);
994
995 free(ress.srcBuffer);
996 free(ress.dstBuffer);
997
998 LZ4F_freeCDict(ress.cdict);
999 ress.cdict = NULL;
1000
1001 { LZ4F_errorCode_t const errorCode = LZ4F_freeCompressionContext(ress.ctx);
1002 if (LZ4F_isError(errorCode)) END_PROCESS(35, "Error : can't free LZ4F context resource : %s", LZ4F_getErrorName(errorCode)); }
1003 }
1004
LZ4IO_createDict(size_t * dictSize,const char * dictFilename)1005 static void* LZ4IO_createDict(size_t* dictSize, const char* dictFilename)
1006 {
1007 size_t readSize;
1008 size_t dictEnd = 0;
1009 size_t dictLen = 0;
1010 size_t dictStart;
1011 size_t circularBufSize = LZ4_MAX_DICT_SIZE;
1012 char* circularBuf = (char*)malloc(circularBufSize);
1013 char* dictBuf;
1014 FILE* dictFile;
1015
1016 if (!dictFilename)
1017 END_PROCESS(26, "Dictionary error : no filename provided");
1018 if (!circularBuf)
1019 END_PROCESS(25, "Allocation error : not enough memory for circular buffer");
1020
1021 dictFile = LZ4IO_openSrcFile(dictFilename);
1022 if (!dictFile)
1023 END_PROCESS(27, "Dictionary error : could not open dictionary file");
1024
1025 /* opportunistically seek to the part of the file we care about.
1026 * If this fails it's not a problem since we'll just read everything anyways. */
1027 if (!LZ4IO_isStdin(dictFilename)) {
1028 (void)UTIL_fseek(dictFile, -LZ4_MAX_DICT_SIZE, SEEK_END);
1029 }
1030
1031 do {
1032 readSize = fread(circularBuf + dictEnd, 1, circularBufSize - dictEnd, dictFile);
1033 dictEnd = (dictEnd + readSize) % circularBufSize;
1034 dictLen += readSize;
1035 } while (readSize>0);
1036
1037 if (dictLen > LZ4_MAX_DICT_SIZE) {
1038 dictLen = LZ4_MAX_DICT_SIZE;
1039 }
1040
1041 *dictSize = dictLen;
1042
1043 dictStart = (circularBufSize + dictEnd - dictLen) % circularBufSize;
1044
1045 if (dictStart == 0) {
1046 /* We're in the simple case where the dict starts at the beginning of our circular buffer. */
1047 dictBuf = circularBuf;
1048 circularBuf = NULL;
1049 } else {
1050 /* Otherwise, we will alloc a new buffer and copy our dict into that. */
1051 dictBuf = (char *)malloc(dictLen ? dictLen : 1);
1052 if (!dictBuf) END_PROCESS(28, "Allocation error : not enough memory");
1053
1054 memcpy(dictBuf, circularBuf + dictStart, circularBufSize - dictStart);
1055 memcpy(dictBuf + circularBufSize - dictStart, circularBuf, dictLen - (circularBufSize - dictStart));
1056 }
1057
1058 fclose(dictFile);
1059 free(circularBuf);
1060
1061 return dictBuf;
1062 }
1063
LZ4IO_createCDict(const LZ4IO_prefs_t * io_prefs)1064 static LZ4F_CDict* LZ4IO_createCDict(const LZ4IO_prefs_t* io_prefs)
1065 {
1066 size_t dictionarySize;
1067 void* dictionaryBuffer;
1068 LZ4F_CDict* cdict;
1069 if (!io_prefs->useDictionary) return NULL;
1070 dictionaryBuffer = LZ4IO_createDict(&dictionarySize, io_prefs->dictionaryFilename);
1071 if (!dictionaryBuffer) END_PROCESS(29, "Dictionary error : could not create dictionary");
1072 cdict = LZ4F_createCDict(dictionaryBuffer, dictionarySize);
1073 free(dictionaryBuffer);
1074 return cdict;
1075 }
1076
LZ4IO_createCResources(const LZ4IO_prefs_t * io_prefs)1077 static cRess_t LZ4IO_createCResources(const LZ4IO_prefs_t* io_prefs)
1078 {
1079 const size_t chunkSize = 4 MB;
1080 cRess_t ress;
1081 memset(&ress, 0, sizeof(ress));
1082
1083 /* set compression advanced parameters */
1084 ress.preparedPrefs.autoFlush = 1;
1085 ress.preparedPrefs.frameInfo.blockMode = (LZ4F_blockMode_t)io_prefs->blockIndependence;
1086 ress.preparedPrefs.frameInfo.blockSizeID = (LZ4F_blockSizeID_t)io_prefs->blockSizeId;
1087 ress.preparedPrefs.frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)io_prefs->blockChecksum;
1088 ress.preparedPrefs.frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)io_prefs->streamChecksum;
1089 ress.preparedPrefs.favorDecSpeed = io_prefs->favorDecSpeed;
1090
1091 /* Allocate compression state */
1092 { LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&(ress.ctx), LZ4F_VERSION);
1093 if (LZ4F_isError(errorCode))
1094 END_PROCESS(30, "Allocation error : can't create LZ4F context : %s", LZ4F_getErrorName(errorCode));
1095 }
1096 assert(ress.ctx != NULL);
1097
1098 /* Allocate Buffers */
1099 ress.srcBuffer = malloc(chunkSize);
1100 ress.srcBufferSize = chunkSize;
1101 ress.dstBufferSize = LZ4F_compressFrameBound(chunkSize, &ress.preparedPrefs);
1102 ress.dstBuffer = malloc(ress.dstBufferSize);
1103 if (!ress.srcBuffer || !ress.dstBuffer)
1104 END_PROCESS(31, "Allocation error : can't allocate buffers");
1105
1106 ress.cdict = LZ4IO_createCDict(io_prefs);
1107
1108 /* will be created it needed */
1109 ress.tPool = NULL;
1110 ress.wPool = NULL;
1111
1112 return ress;
1113 }
1114
1115 typedef struct {
1116 const LZ4F_preferences_t* prefs;
1117 const LZ4F_CDict* cdict;
1118 } LZ4IO_CfcParameters;
1119
LZ4IO_compressFrameChunk(const void * params,void * dst,size_t dstCapacity,const void * src,size_t srcSize,size_t prefixSize)1120 static size_t LZ4IO_compressFrameChunk(const void* params,
1121 void* dst, size_t dstCapacity,
1122 const void* src, size_t srcSize,
1123 size_t prefixSize)
1124 {
1125 const LZ4IO_CfcParameters* const cfcp = (const LZ4IO_CfcParameters*)params;
1126 LZ4F_cctx* cctx = NULL;
1127 { LZ4F_errorCode_t const ccr = LZ4F_createCompressionContext(&cctx, LZ4F_VERSION);
1128 if (cctx==NULL || LZ4F_isError(ccr))
1129 END_PROCESS(51, "unable to create a LZ4F compression context");
1130 }
1131 /* init state, and writes frame header, will be overwritten at next stage. */
1132 if (prefixSize) {
1133 size_t const whr = LZ4F_compressBegin_usingDict(cctx, dst, dstCapacity, (const char*)src - prefixSize, prefixSize, cfcp->prefs);
1134 if (LZ4F_isError(whr))
1135 END_PROCESS(52, "error initializing LZ4F compression context with prefix");
1136 assert(prefixSize == 64 KB);
1137 } else {
1138 size_t const whr = LZ4F_compressBegin_usingCDict(cctx, dst, dstCapacity, cfcp->cdict, cfcp->prefs);
1139 if (LZ4F_isError(whr))
1140 END_PROCESS(53, "error initializing LZ4F compression context");
1141 }
1142 /* let's now compress, overwriting unused header */
1143 { size_t const cSize = LZ4F_compressUpdate(cctx, dst, dstCapacity, src, srcSize, NULL);
1144 if (LZ4F_isError(cSize))
1145 END_PROCESS(55, "error compressing with LZ4F_compressUpdate");
1146
1147 LZ4F_freeCompressionContext(cctx);
1148 return (size_t) cSize;
1149 }
1150 }
1151
1152 /*
1153 * LZ4IO_compressFilename_extRess()
1154 * result : 0 : compression completed correctly
1155 * 1 : missing or pb opening srcFileName
1156 */
1157 int
LZ4IO_compressFilename_extRess_MT(unsigned long long * inStreamSize,cRess_t * ress,const char * srcFileName,const char * dstFileName,int compressionLevel,const LZ4IO_prefs_t * const io_prefs)1158 LZ4IO_compressFilename_extRess_MT(unsigned long long* inStreamSize,
1159 cRess_t* ress,
1160 const char* srcFileName, const char* dstFileName,
1161 int compressionLevel,
1162 const LZ4IO_prefs_t* const io_prefs)
1163 {
1164 unsigned long long filesize = 0;
1165 unsigned long long compressedfilesize = 0;
1166 FILE* dstFile;
1167 void* const srcBuffer = ress->srcBuffer;
1168 void* const dstBuffer = ress->dstBuffer;
1169 const size_t dstBufferSize = ress->dstBufferSize;
1170 const size_t chunkSize = 4 MB; /* each job should be "sufficiently large" */
1171 size_t readSize;
1172 LZ4F_compressionContext_t ctx = ress->ctx; /* just a pointer */
1173 LZ4F_preferences_t prefs;
1174
1175 /* Init */
1176 FILE* const srcFile = LZ4IO_openSrcFile(srcFileName);
1177 if (srcFile == NULL) return 1;
1178 dstFile = LZ4IO_openDstFile(dstFileName, io_prefs);
1179 if (dstFile == NULL) { fclose(srcFile); return 1; }
1180
1181 /* Adjust compression parameters */
1182 prefs = ress->preparedPrefs;
1183 prefs.compressionLevel = compressionLevel;
1184 if (io_prefs->contentSizeFlag) {
1185 U64 const fileSize = UTIL_getOpenFileSize(srcFile);
1186 prefs.frameInfo.contentSize = fileSize; /* == 0 if input == stdin */
1187 if (fileSize==0)
1188 DISPLAYLEVEL(3, "Warning : cannot determine input content size \n");
1189 }
1190
1191 /* read first chunk */
1192 assert(chunkSize <= ress->srcBufferSize);
1193 readSize = fread(srcBuffer, (size_t)1, chunkSize, srcFile);
1194 if (ferror(srcFile))
1195 END_PROCESS(40, "Error reading first chunk (%u bytes) of '%s' ", (unsigned)chunkSize, srcFileName);
1196 filesize += readSize;
1197
1198 /* single-block file */
1199 if (readSize < chunkSize) {
1200 /* Compress in single pass */
1201 size_t const cSize = LZ4F_compressFrame_usingCDict(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, ress->cdict, &prefs);
1202 if (LZ4F_isError(cSize))
1203 END_PROCESS(41, "Compression failed : %s", LZ4F_getErrorName(cSize));
1204 compressedfilesize = cSize;
1205 DISPLAYUPDATE(2, "\rRead : %u MiB ==> %.2f%% ",
1206 (unsigned)(filesize>>20), (double)compressedfilesize/(double)(filesize+!filesize)*100); /* avoid division by zero */
1207
1208 /* Write Block */
1209 if (fwrite(dstBuffer, 1, cSize, dstFile) != cSize) {
1210 END_PROCESS(42, "Write error : failed writing single-block compressed frame");
1211 } }
1212
1213 else
1214
1215 /* multiple-blocks file */
1216 { WriteRegister wr = WR_init(chunkSize);
1217 void* prefixBuffer = NULL;
1218
1219 int checksum = (int)prefs.frameInfo.contentChecksumFlag;
1220 XXH32_state_t* xxh32 = NULL;
1221
1222 LZ4IO_CfcParameters cfcp;
1223 ReadTracker rjd;
1224
1225 if (ress->tPool == NULL) {
1226 ress->tPool = TPool_create(io_prefs->nbWorkers, 4);
1227 assert(ress->wPool == NULL);
1228 ress->wPool = TPool_create(1, 4);
1229 if (ress->tPool == NULL || ress->wPool == NULL)
1230 END_PROCESS(43, "can't create threadpools");
1231 }
1232 cfcp.prefs = &prefs;
1233 cfcp.cdict = ress->cdict;
1234 rjd.tPool = ress->tPool;
1235 rjd.wpool = ress->wPool;
1236 rjd.fin = srcFile;
1237 rjd.chunkSize = chunkSize;
1238 rjd.totalReadSize = 0;
1239 rjd.blockNb = 0;
1240 rjd.xxh32 = xxh32;
1241 rjd.compress = LZ4IO_compressFrameChunk;
1242 rjd.compressParameters = &cfcp;
1243 rjd.prefix = NULL;
1244 rjd.fout = dstFile;
1245 rjd.wr = ≀
1246 rjd.maxCBlockSize = LZ4F_compressFrameBound(chunkSize, &prefs);
1247
1248 /* process frame checksum externally */
1249 if (checksum) {
1250 xxh32 = XXH32_createState();
1251 if (xxh32==NULL)
1252 END_PROCESS(42, "could not init checksum");
1253 XXH32_reset(xxh32, 0);
1254 XXH32_update(xxh32, srcBuffer, readSize);
1255 rjd.xxh32 = xxh32;
1256 }
1257
1258 /* block dependency */
1259 if (prefs.frameInfo.blockMode == LZ4F_blockLinked) {
1260 prefixBuffer = malloc(64 KB);
1261 if (prefixBuffer==NULL)
1262 END_PROCESS(43, "cannot allocate small dictionary buffer");
1263 rjd.prefix = prefixBuffer;
1264 }
1265
1266 /* Write Frame Header */
1267 /* note: simplification: do not employ dictionary when input size >= 4 MB,
1268 * the benefit is very limited anyway, and is not worth the dependency cost */
1269 { size_t const headerSize = LZ4F_compressBegin(ctx, dstBuffer, dstBufferSize, &prefs);
1270 if (LZ4F_isError(headerSize))
1271 END_PROCESS(44, "File header generation failed : %s", LZ4F_getErrorName(headerSize));
1272 if (fwrite(dstBuffer, 1, headerSize, dstFile) != headerSize)
1273 END_PROCESS(45, "Write error : cannot write header");
1274 compressedfilesize = headerSize;
1275 }
1276 /* avoid duplicating effort to process content checksum (done externally) */
1277 prefs.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum;
1278
1279 /* process first block */
1280 { CompressJobDesc cjd;
1281 cjd.wpool = ress->wPool;
1282 cjd.buffer = srcBuffer;
1283 cjd.prefixSize = 0;
1284 cjd.inSize = readSize;
1285 cjd.blockNb = 0;
1286 cjd.compress = LZ4IO_compressFrameChunk;
1287 cjd.compressParameters = &cfcp;
1288 cjd.fout = dstFile;
1289 cjd.wr = ≀
1290 cjd.maxCBlockSize = rjd.maxCBlockSize;
1291 cjd.lastBlock = 0;
1292 TPool_submitJob(ress->tPool, LZ4IO_compressChunk, &cjd);
1293 rjd.totalReadSize = readSize;
1294 rjd.blockNb = 1;
1295 if (prefixBuffer) {
1296 assert(readSize >= 64 KB);
1297 memcpy(prefixBuffer, (char*)srcBuffer + readSize - 64 KB, 64 KB);
1298 }
1299
1300 /* Start the job chain */
1301 TPool_submitJob(ress->tPool, LZ4IO_readAndProcess, &rjd);
1302
1303 /* Wait for all completion */
1304 TPool_jobsCompleted(ress->tPool);
1305 TPool_jobsCompleted(ress->wPool);
1306 compressedfilesize += wr.totalCSize;
1307 }
1308
1309 /* End of Frame mark */
1310 { size_t endSize = 4;
1311 assert(dstBufferSize >= 8);
1312 memset(dstBuffer, 0, 4);
1313 if (checksum) {
1314 /* handle frame checksum externally
1315 * note: LZ4F_compressEnd already wrote a (bogus) checksum */
1316 U32 const crc = XXH32_digest(xxh32);
1317 LZ4IO_writeLE32( (char*)dstBuffer + 4, crc);
1318 endSize = 8;
1319 }
1320 if (fwrite(dstBuffer, 1, endSize, dstFile) != endSize)
1321 END_PROCESS(49, "Write error : cannot write end of frame");
1322 compressedfilesize += endSize;
1323 filesize = rjd.totalReadSize;
1324 }
1325
1326 /* clean up*/
1327 free(prefixBuffer);
1328 XXH32_freeState(xxh32);
1329 WR_destroy(&wr);
1330 }
1331
1332 /* Release file handlers */
1333 fclose (srcFile);
1334 if (!LZ4IO_isStdout(dstFileName)) fclose(dstFile); /* do not close stdout */
1335
1336 /* Copy owner, file permissions and modification time */
1337 { stat_t statbuf;
1338 if (!LZ4IO_isStdin(srcFileName)
1339 && !LZ4IO_isStdout(dstFileName)
1340 && !LZ4IO_isDevNull(dstFileName)
1341 && UTIL_getFileStat(srcFileName, &statbuf)) {
1342 UTIL_setFileStat(dstFileName, &statbuf);
1343 } }
1344
1345 if (io_prefs->removeSrcFile) { /* remove source file : --rm */
1346 if (remove(srcFileName))
1347 END_PROCESS(50, "Remove error : %s: %s", srcFileName, strerror(errno));
1348 }
1349
1350 /* Final Status */
1351 DISPLAYLEVEL(2, "\r%79s\r", "");
1352 DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
1353 filesize, compressedfilesize,
1354 (double)compressedfilesize / (double)(filesize + !filesize /* avoid division by zero */ ) * 100.);
1355 *inStreamSize = filesize;
1356
1357 return 0;
1358 }
1359
1360 /*
1361 * LZ4IO_compressFilename_extRess()
1362 * result : 0 : compression completed correctly
1363 * 1 : missing or pb opening srcFileName
1364 */
1365 int
LZ4IO_compressFilename_extRess_ST(unsigned long long * inStreamSize,const cRess_t * ress,const char * srcFileName,const char * dstFileName,int compressionLevel,const LZ4IO_prefs_t * const io_prefs)1366 LZ4IO_compressFilename_extRess_ST(unsigned long long* inStreamSize,
1367 const cRess_t* ress,
1368 const char* srcFileName, const char* dstFileName,
1369 int compressionLevel,
1370 const LZ4IO_prefs_t* const io_prefs)
1371 {
1372 unsigned long long filesize = 0;
1373 unsigned long long compressedfilesize = 0;
1374 FILE* dstFile;
1375 void* const srcBuffer = ress->srcBuffer;
1376 void* const dstBuffer = ress->dstBuffer;
1377 const size_t dstBufferSize = ress->dstBufferSize;
1378 const size_t blockSize = io_prefs->blockSize;
1379 size_t readSize;
1380 LZ4F_compressionContext_t ctx = ress->ctx; /* just a pointer */
1381 LZ4F_preferences_t prefs;
1382
1383 /* Init */
1384 FILE* const srcFile = LZ4IO_openSrcFile(srcFileName);
1385 if (srcFile == NULL) return 1;
1386 dstFile = LZ4IO_openDstFile(dstFileName, io_prefs);
1387 if (dstFile == NULL) { fclose(srcFile); return 1; }
1388 memset(&prefs, 0, sizeof(prefs));
1389
1390 /* Adjust compression parameters */
1391 prefs = ress->preparedPrefs;
1392 prefs.compressionLevel = compressionLevel;
1393 if (io_prefs->contentSizeFlag) {
1394 U64 const fileSize = UTIL_getOpenFileSize(srcFile);
1395 prefs.frameInfo.contentSize = fileSize; /* == 0 if input == stdin */
1396 if (fileSize==0)
1397 DISPLAYLEVEL(3, "Warning : cannot determine input content size \n");
1398 }
1399
1400 /* read first block */
1401 readSize = fread(srcBuffer, (size_t)1, blockSize, srcFile);
1402 if (ferror(srcFile)) END_PROCESS(40, "Error reading %s ", srcFileName);
1403 filesize += readSize;
1404
1405 /* single-block file */
1406 if (readSize < blockSize) {
1407 /* Compress in single pass */
1408 size_t const cSize = LZ4F_compressFrame_usingCDict(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, ress->cdict, &prefs);
1409 if (LZ4F_isError(cSize))
1410 END_PROCESS(41, "Compression failed : %s", LZ4F_getErrorName(cSize));
1411 compressedfilesize = cSize;
1412 DISPLAYUPDATE(2, "\rRead : %u MiB ==> %.2f%% ",
1413 (unsigned)(filesize>>20), (double)compressedfilesize/(double)(filesize+!filesize)*100); /* avoid division by zero */
1414
1415 /* Write Block */
1416 if (fwrite(dstBuffer, 1, cSize, dstFile) != cSize) {
1417 END_PROCESS(42, "Write error : failed writing single-block compressed frame");
1418 } }
1419
1420 else
1421
1422 /* multiple-blocks file */
1423 {
1424 /* Write Frame Header */
1425 size_t const headerSize = LZ4F_compressBegin_usingCDict(ctx, dstBuffer, dstBufferSize, ress->cdict, &prefs);
1426 if (LZ4F_isError(headerSize))
1427 END_PROCESS(43, "File header generation failed : %s", LZ4F_getErrorName(headerSize));
1428 if (fwrite(dstBuffer, 1, headerSize, dstFile) != headerSize)
1429 END_PROCESS(44, "Write error : cannot write header");
1430 compressedfilesize += headerSize;
1431
1432 /* Main Loop - one block at a time */
1433 while (readSize>0) {
1434 size_t const outSize = LZ4F_compressUpdate(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, NULL);
1435 if (LZ4F_isError(outSize))
1436 END_PROCESS(45, "Compression failed : %s", LZ4F_getErrorName(outSize));
1437 compressedfilesize += outSize;
1438 DISPLAYUPDATE(2, "\rRead : %u MiB ==> %.2f%% ",
1439 (unsigned)(filesize>>20),
1440 (double)compressedfilesize / (double)filesize * 100.);
1441
1442 /* Write Block */
1443 if (fwrite(dstBuffer, 1, outSize, dstFile) != outSize)
1444 END_PROCESS(46, "Write error : cannot write compressed block");
1445
1446 /* Read next block */
1447 readSize = fread(srcBuffer, (size_t)1, (size_t)blockSize, srcFile);
1448 filesize += readSize;
1449 }
1450 if (ferror(srcFile)) END_PROCESS(47, "Error reading %s ", srcFileName);
1451
1452 /* End of Frame mark */
1453 { size_t const endSize = LZ4F_compressEnd(ctx, dstBuffer, dstBufferSize, NULL);
1454 if (LZ4F_isError(endSize))
1455 END_PROCESS(48, "End of frame error : %s", LZ4F_getErrorName(endSize));
1456 if (fwrite(dstBuffer, 1, endSize, dstFile) != endSize)
1457 END_PROCESS(49, "Write error : cannot write end of frame");
1458 compressedfilesize += endSize;
1459 }
1460 }
1461
1462 /* Release file handlers */
1463 fclose (srcFile);
1464 if (!LZ4IO_isStdout(dstFileName)) fclose(dstFile); /* do not close stdout */
1465
1466 /* Copy owner, file permissions and modification time */
1467 { stat_t statbuf;
1468 if (!LZ4IO_isStdin(srcFileName)
1469 && !LZ4IO_isStdout(dstFileName)
1470 && !LZ4IO_isDevNull(dstFileName)
1471 && UTIL_getFileStat(srcFileName, &statbuf)) {
1472 UTIL_setFileStat(dstFileName, &statbuf);
1473 } }
1474
1475 if (io_prefs->removeSrcFile) { /* remove source file : --rm */
1476 if (remove(srcFileName))
1477 END_PROCESS(50, "Remove error : %s: %s", srcFileName, strerror(errno));
1478 }
1479
1480 /* Final Status */
1481 DISPLAYLEVEL(2, "\r%79s\r", "");
1482 DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
1483 filesize, compressedfilesize,
1484 (double)compressedfilesize / (double)(filesize + !filesize /* avoid division by zero */ ) * 100.);
1485 *inStreamSize = filesize;
1486
1487 return 0;
1488 }
1489
1490 static int
LZ4IO_compressFilename_extRess(unsigned long long * inStreamSize,cRess_t * ress,const char * srcFileName,const char * dstFileName,int compressionLevel,const LZ4IO_prefs_t * const io_prefs)1491 LZ4IO_compressFilename_extRess(unsigned long long* inStreamSize,
1492 cRess_t* ress,
1493 const char* srcFileName, const char* dstFileName,
1494 int compressionLevel,
1495 const LZ4IO_prefs_t* const io_prefs)
1496 {
1497 if (LZ4IO_MULTITHREAD)
1498 return LZ4IO_compressFilename_extRess_MT(inStreamSize, ress, srcFileName, dstFileName, compressionLevel, io_prefs);
1499 /* Only single-thread available */
1500 return LZ4IO_compressFilename_extRess_ST(inStreamSize, ress, srcFileName, dstFileName, compressionLevel, io_prefs);
1501 }
1502
LZ4IO_compressFilename(const char * srcFileName,const char * dstFileName,int compressionLevel,const LZ4IO_prefs_t * prefs)1503 int LZ4IO_compressFilename(const char* srcFileName, const char* dstFileName, int compressionLevel, const LZ4IO_prefs_t* prefs)
1504 {
1505 TIME_t const timeStart = TIME_getTime();
1506 clock_t const cpuStart = clock();
1507 cRess_t ress = LZ4IO_createCResources(prefs);
1508 unsigned long long processed;
1509
1510 int const result = LZ4IO_compressFilename_extRess(&processed, &ress, srcFileName, dstFileName, compressionLevel, prefs);
1511
1512 /* Free resources */
1513 LZ4IO_freeCResources(ress);
1514
1515 /* Final Status */
1516 LZ4IO_finalTimeDisplay(timeStart, cpuStart, processed);
1517
1518 return result;
1519 }
1520
LZ4IO_compressMultipleFilenames(const char ** inFileNamesTable,int ifntSize,const char * suffix,int compressionLevel,const LZ4IO_prefs_t * prefs)1521 int LZ4IO_compressMultipleFilenames(
1522 const char** inFileNamesTable, int ifntSize,
1523 const char* suffix,
1524 int compressionLevel,
1525 const LZ4IO_prefs_t* prefs)
1526 {
1527 int i;
1528 int missed_files = 0;
1529 char* dstFileName = (char*)malloc(FNSPACE);
1530 size_t ofnSize = FNSPACE;
1531 const size_t suffixSize = strlen(suffix);
1532 cRess_t ress;
1533 unsigned long long totalProcessed = 0;
1534 TIME_t timeStart = TIME_getTime();
1535 clock_t cpuStart = clock();
1536
1537 if (dstFileName == NULL) return ifntSize; /* not enough memory */
1538 ress = LZ4IO_createCResources(prefs);
1539
1540 /* loop on each file */
1541 for (i=0; i<ifntSize; i++) {
1542 unsigned long long processed;
1543 size_t const ifnSize = strlen(inFileNamesTable[i]);
1544 if (LZ4IO_isStdout(suffix)) {
1545 missed_files += LZ4IO_compressFilename_extRess(&processed, &ress,
1546 inFileNamesTable[i], stdoutmark,
1547 compressionLevel, prefs);
1548 totalProcessed += processed;
1549 continue;
1550 }
1551 /* suffix != stdout => compress into a file => generate its name */
1552 if (ofnSize <= ifnSize+suffixSize+1) {
1553 free(dstFileName);
1554 ofnSize = ifnSize + 20;
1555 dstFileName = (char*)malloc(ofnSize);
1556 if (dstFileName==NULL) {
1557 LZ4IO_freeCResources(ress);
1558 return ifntSize;
1559 } }
1560 strcpy(dstFileName, inFileNamesTable[i]);
1561 strcat(dstFileName, suffix);
1562
1563 missed_files += LZ4IO_compressFilename_extRess(&processed, &ress,
1564 inFileNamesTable[i], dstFileName,
1565 compressionLevel, prefs);
1566 totalProcessed += processed;
1567 }
1568
1569 /* Close & Free */
1570 LZ4IO_freeCResources(ress);
1571 free(dstFileName);
1572 LZ4IO_finalTimeDisplay(timeStart, cpuStart, totalProcessed);
1573
1574 return missed_files;
1575 }
1576
1577
1578 /* ********************************************************************* */
1579 /* ********************** LZ4 file-stream Decompression **************** */
1580 /* ********************************************************************* */
1581
1582 /* It's presumed that @p points to a memory space of size >= 4 */
LZ4IO_readLE32(const void * p)1583 static unsigned LZ4IO_readLE32 (const void* p)
1584 {
1585 const unsigned char* const srcPtr = (const unsigned char*)p;
1586 unsigned value32 = srcPtr[0];
1587 value32 += (unsigned)srcPtr[1] << 8;
1588 value32 += (unsigned)srcPtr[2] << 16;
1589 value32 += (unsigned)srcPtr[3] << 24;
1590 return value32;
1591 }
1592
1593
1594 static unsigned
LZ4IO_fwriteSparse(FILE * file,const void * buffer,size_t bufferSize,int sparseFileSupport,unsigned storedSkips)1595 LZ4IO_fwriteSparse(FILE* file,
1596 const void* buffer, size_t bufferSize,
1597 int sparseFileSupport,
1598 unsigned storedSkips)
1599 {
1600 const size_t sizeT = sizeof(size_t);
1601 const size_t maskT = sizeT -1 ;
1602 const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */
1603 const size_t* ptrT = bufferT;
1604 size_t bufferSizeT = bufferSize / sizeT;
1605 const size_t* const bufferTEnd = bufferT + bufferSizeT;
1606 const size_t segmentSizeT = (32 KB) / sizeT;
1607 int const sparseMode = (sparseFileSupport - (file==stdout)) > 0;
1608
1609 if (!sparseMode) { /* normal write */
1610 size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
1611 if (sizeCheck != bufferSize) END_PROCESS(70, "Write error : cannot write decoded block");
1612 return 0;
1613 }
1614
1615 /* avoid int overflow */
1616 if (storedSkips > 1 GB) {
1617 int const seekResult = UTIL_fseek(file, 1 GB, SEEK_CUR);
1618 if (seekResult != 0) END_PROCESS(71, "1 GB skip error (sparse file support)");
1619 storedSkips -= 1 GB;
1620 }
1621
1622 while (ptrT < bufferTEnd) {
1623 size_t seg0SizeT = segmentSizeT;
1624 size_t nb0T;
1625
1626 /* count leading zeros */
1627 if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT;
1628 bufferSizeT -= seg0SizeT;
1629 for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ;
1630 storedSkips += (unsigned)(nb0T * sizeT);
1631
1632 if (nb0T != seg0SizeT) { /* not all 0s */
1633 errno = 0;
1634 { int const seekResult = UTIL_fseek(file, storedSkips, SEEK_CUR);
1635 if (seekResult) END_PROCESS(72, "Sparse skip error(%d): %s ; try --no-sparse", (int)errno, strerror(errno));
1636 }
1637 storedSkips = 0;
1638 seg0SizeT -= nb0T;
1639 ptrT += nb0T;
1640 { size_t const sizeCheck = fwrite(ptrT, sizeT, seg0SizeT, file);
1641 if (sizeCheck != seg0SizeT) END_PROCESS(73, "Write error : cannot write decoded block");
1642 } }
1643 ptrT += seg0SizeT;
1644 }
1645
1646 if (bufferSize & maskT) { /* size not multiple of sizeT : implies end of block */
1647 const char* const restStart = (const char*)bufferTEnd;
1648 const char* restPtr = restStart;
1649 size_t const restSize = bufferSize & maskT;
1650 const char* const restEnd = restStart + restSize;
1651 for (; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ;
1652 storedSkips += (unsigned) (restPtr - restStart);
1653 if (restPtr != restEnd) {
1654 int const seekResult = UTIL_fseek(file, storedSkips, SEEK_CUR);
1655 if (seekResult) END_PROCESS(74, "Sparse skip error ; try --no-sparse");
1656 storedSkips = 0;
1657 { size_t const sizeCheck = fwrite(restPtr, 1, (size_t)(restEnd - restPtr), file);
1658 if (sizeCheck != (size_t)(restEnd - restPtr)) END_PROCESS(75, "Write error : cannot write decoded end of block");
1659 } }
1660 }
1661
1662 return storedSkips;
1663 }
1664
LZ4IO_fwriteSparseEnd(FILE * file,unsigned storedSkips)1665 static void LZ4IO_fwriteSparseEnd(FILE* file, unsigned storedSkips)
1666 {
1667 if (storedSkips>0) { /* implies sparseFileSupport>0 */
1668 const char lastZeroByte[1] = { 0 };
1669 if (UTIL_fseek(file, storedSkips-1, SEEK_CUR) != 0)
1670 END_PROCESS(69, "Final skip error (sparse file)\n");
1671 if (fwrite(lastZeroByte, 1, 1, file) != 1)
1672 END_PROCESS(69, "Write error : cannot write last zero\n");
1673 }
1674 }
1675
1676
1677 static unsigned g_magicRead = 0; /* out-parameter of LZ4IO_decodeLegacyStream() */
1678
1679 #if LZ4IO_MULTITHREAD
1680
1681 typedef struct {
1682 void* buffer;
1683 size_t size;
1684 FILE* f;
1685 int sparseEnable;
1686 unsigned* storedSkips;
1687 const unsigned long long* totalSize;
1688 } ChunkToWrite;
1689
LZ4IO_writeDecodedChunk(void * arg)1690 static void LZ4IO_writeDecodedChunk(void* arg)
1691 {
1692 ChunkToWrite* const ctw = (ChunkToWrite*)arg;
1693 assert(ctw != NULL);
1694
1695 /* note: works because only 1 thread */
1696 *ctw->storedSkips = LZ4IO_fwriteSparse(ctw->f, ctw->buffer, ctw->size, ctw->sparseEnable, *ctw->storedSkips); /* success or die */
1697 DISPLAYUPDATE(2, "\rDecompressed : %u MiB ", (unsigned)(ctw->totalSize[0] >>20));
1698
1699 /* clean up */
1700 free(ctw);
1701 }
1702
1703 typedef struct {
1704 void* inBuffer;
1705 size_t inSize;
1706 void* outBuffer;
1707 unsigned long long* totalSize;
1708 TPool* wPool;
1709 FILE* foutput;
1710 int sparseEnable;
1711 unsigned* storedSkips;
1712 } LegacyBlockInput;
1713
LZ4IO_decompressBlockLegacy(void * arg)1714 static void LZ4IO_decompressBlockLegacy(void* arg)
1715 {
1716 int decodedSize;
1717 LegacyBlockInput* const lbi = (LegacyBlockInput*)arg;
1718
1719 decodedSize = LZ4_decompress_safe((const char*)lbi->inBuffer, (char*)lbi->outBuffer, (int)lbi->inSize, LEGACY_BLOCKSIZE);
1720 if (decodedSize < 0) END_PROCESS(64, "Decoding Failed ! Corrupted input detected !");
1721 *lbi->totalSize += (unsigned long long)decodedSize; /* note: works because only 1 thread */
1722
1723 /* push to write thread */
1724 { ChunkToWrite* const ctw = (ChunkToWrite*)malloc(sizeof(*ctw));
1725 if (ctw==NULL) {
1726 END_PROCESS(33, "Allocation error : can't describe new write job");
1727 }
1728 ctw->buffer = lbi->outBuffer;
1729 ctw->size = (size_t)decodedSize;
1730 ctw->f = lbi->foutput;
1731 ctw->sparseEnable = lbi->sparseEnable;
1732 ctw->storedSkips = lbi->storedSkips;
1733 ctw->totalSize = lbi->totalSize;
1734 TPool_submitJob(lbi->wPool, LZ4IO_writeDecodedChunk, ctw);
1735 }
1736
1737 /* clean up */
1738 free(lbi);
1739 }
1740
1741 static unsigned long long
LZ4IO_decodeLegacyStream(FILE * finput,FILE * foutput,const LZ4IO_prefs_t * prefs)1742 LZ4IO_decodeLegacyStream(FILE* finput, FILE* foutput, const LZ4IO_prefs_t* prefs)
1743 {
1744 unsigned long long streamSize = 0;
1745 unsigned storedSkips = 0;
1746
1747 TPool* const tPool = TPool_create(1, 1);
1748 TPool* const wPool = TPool_create(1, 1);
1749 #define NB_BUFFSETS 4 /* 1 being read, 1 being processed, 1 being written, 1 being queued */
1750 void* inBuffs[NB_BUFFSETS];
1751 void* outBuffs[NB_BUFFSETS];
1752 int bSetNb;
1753
1754 if (tPool == NULL || wPool == NULL)
1755 END_PROCESS(21, "threadpool creation error ");
1756
1757 /* allocate buffers up front */
1758 for (bSetNb=0; bSetNb<NB_BUFFSETS; bSetNb++) {
1759 inBuffs[bSetNb] = malloc((size_t)LZ4_compressBound(LEGACY_BLOCKSIZE));
1760 outBuffs[bSetNb] = malloc(LEGACY_BLOCKSIZE);
1761 if (!inBuffs[bSetNb] || !outBuffs[bSetNb])
1762 END_PROCESS(31, "Allocation error : can't allocate buffer for legacy decoding");
1763 }
1764
1765 /* Main Loop */
1766 for (bSetNb = 0;;bSetNb = (bSetNb+1) % NB_BUFFSETS) {
1767 char header[LZ4IO_LEGACY_BLOCK_HEADER_SIZE];
1768 unsigned int blockSize;
1769
1770 /* Block Size */
1771 { size_t const sizeCheck = fread(header, 1, LZ4IO_LEGACY_BLOCK_HEADER_SIZE, finput);
1772 if (sizeCheck == 0) break; /* Nothing to read : file read is completed */
1773 if (sizeCheck != LZ4IO_LEGACY_BLOCK_HEADER_SIZE)
1774 END_PROCESS(61, "Error: cannot read block size in Legacy format");
1775 }
1776 blockSize = LZ4IO_readLE32(header); /* Convert to Little Endian */
1777 if (blockSize > LZ4_COMPRESSBOUND(LEGACY_BLOCKSIZE)) {
1778 /* Cannot read next block : maybe new stream ? */
1779 g_magicRead = blockSize;
1780 break;
1781 }
1782
1783 /* Read Block */
1784 { size_t const sizeCheck = fread(inBuffs[bSetNb], 1, blockSize, finput);
1785 if (sizeCheck != blockSize)
1786 END_PROCESS(63, "Read error : cannot access compressed block !");
1787 /* push to decoding thread */
1788 { LegacyBlockInput* const lbi = (LegacyBlockInput*)malloc(sizeof(*lbi));
1789 if (lbi==NULL)
1790 END_PROCESS(64, "Allocation error : not enough memory to allocate job descriptor");
1791 lbi->inBuffer = inBuffs[bSetNb];
1792 lbi->inSize = blockSize;
1793 lbi->outBuffer = outBuffs[bSetNb];
1794 lbi->wPool = wPool;
1795 lbi->totalSize = &streamSize;
1796 lbi->foutput = foutput;
1797 lbi->sparseEnable = prefs->sparseFileSupport;
1798 lbi->storedSkips = &storedSkips;
1799 TPool_submitJob(tPool, LZ4IO_decompressBlockLegacy, lbi);
1800 }
1801 }
1802 }
1803 if (ferror(finput)) END_PROCESS(65, "Read error : ferror");
1804
1805 /* Wait for all completion */
1806 TPool_jobsCompleted(tPool);
1807 TPool_jobsCompleted(wPool);
1808
1809 /* flush last zeroes */
1810 LZ4IO_fwriteSparseEnd(foutput, storedSkips);
1811
1812 /* Free */
1813 TPool_free(wPool);
1814 TPool_free(tPool);
1815 for (bSetNb=0; bSetNb<NB_BUFFSETS; bSetNb++) {
1816 free(inBuffs[bSetNb]);
1817 free(outBuffs[bSetNb]);
1818 }
1819
1820 return streamSize;
1821 }
1822
1823 #else
1824
1825 static unsigned long long
LZ4IO_decodeLegacyStream(FILE * finput,FILE * foutput,const LZ4IO_prefs_t * prefs)1826 LZ4IO_decodeLegacyStream(FILE* finput, FILE* foutput, const LZ4IO_prefs_t* prefs)
1827 {
1828 unsigned long long streamSize = 0;
1829 unsigned storedSkips = 0;
1830
1831 /* Allocate Memory */
1832 char* const in_buff = (char*)malloc((size_t)LZ4_compressBound(LEGACY_BLOCKSIZE));
1833 char* const out_buff = (char*)malloc(LEGACY_BLOCKSIZE);
1834 if (!in_buff || !out_buff) END_PROCESS(61, "Allocation error : not enough memory");
1835
1836 /* Main Loop */
1837 while (1) {
1838 unsigned int blockSize;
1839
1840 /* Block Size */
1841 { size_t const sizeCheck = fread(in_buff, 1, LZ4IO_LEGACY_BLOCK_HEADER_SIZE, finput);
1842 if (sizeCheck == 0) break; /* Nothing to read : file read is completed */
1843 if (sizeCheck != LZ4IO_LEGACY_BLOCK_HEADER_SIZE)
1844 END_PROCESS(62, "Error: cannot read block size in Legacy format");
1845 }
1846 blockSize = LZ4IO_readLE32(in_buff); /* Convert to Little Endian */
1847 if (blockSize > LZ4_COMPRESSBOUND(LEGACY_BLOCKSIZE)) {
1848 /* Cannot read next block : maybe new stream ? */
1849 g_magicRead = blockSize;
1850 break;
1851 }
1852
1853 /* Read Block */
1854 { size_t const sizeCheck = fread(in_buff, 1, blockSize, finput);
1855 if (sizeCheck != blockSize) END_PROCESS(63, "Read error : cannot access compressed block !"); }
1856
1857 /* Decode Block */
1858 { int const decodeSize = LZ4_decompress_safe(in_buff, out_buff, (int)blockSize, LEGACY_BLOCKSIZE);
1859 if (decodeSize < 0) END_PROCESS(64, "Decoding Failed ! Corrupted input detected !");
1860 streamSize += (unsigned long long)decodeSize;
1861 /* Write Block */
1862 storedSkips = LZ4IO_fwriteSparse(foutput, out_buff, (size_t)decodeSize, prefs->sparseFileSupport, storedSkips); /* success or die */
1863 } }
1864 if (ferror(finput)) END_PROCESS(65, "Read error : ferror");
1865
1866 LZ4IO_fwriteSparseEnd(foutput, storedSkips);
1867
1868 /* Free */
1869 free(in_buff);
1870 free(out_buff);
1871
1872 return streamSize;
1873 }
1874 #endif
1875
1876
1877 typedef struct {
1878 void* srcBuffer;
1879 size_t srcBufferSize;
1880 void* dstBuffer;
1881 size_t dstBufferSize;
1882 FILE* dstFile;
1883 LZ4F_decompressionContext_t dCtx;
1884 void* dictBuffer;
1885 size_t dictBufferSize;
1886 } dRess_t;
1887
LZ4IO_loadDDict(dRess_t * ress,const LZ4IO_prefs_t * const prefs)1888 static void LZ4IO_loadDDict(dRess_t* ress, const LZ4IO_prefs_t* const prefs)
1889 {
1890 if (!prefs->useDictionary) {
1891 ress->dictBuffer = NULL;
1892 ress->dictBufferSize = 0;
1893 return;
1894 }
1895
1896 ress->dictBuffer = LZ4IO_createDict(&ress->dictBufferSize, prefs->dictionaryFilename);
1897 if (!ress->dictBuffer) END_PROCESS(25, "Dictionary error : could not create dictionary");
1898 }
1899
1900 static const size_t LZ4IO_dBufferSize = 64 KB;
LZ4IO_createDResources(const LZ4IO_prefs_t * const prefs)1901 static dRess_t LZ4IO_createDResources(const LZ4IO_prefs_t* const prefs)
1902 {
1903 dRess_t ress;
1904
1905 /* init */
1906 LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&ress.dCtx, LZ4F_VERSION);
1907 if (LZ4F_isError(errorCode)) END_PROCESS(60, "Can't create LZ4F context : %s", LZ4F_getErrorName(errorCode));
1908
1909 /* Allocate Memory */
1910 ress.srcBufferSize = LZ4IO_dBufferSize;
1911 ress.srcBuffer = malloc(ress.srcBufferSize);
1912 ress.dstBufferSize = LZ4IO_dBufferSize;
1913 ress.dstBuffer = malloc(ress.dstBufferSize);
1914 if (!ress.srcBuffer || !ress.dstBuffer) END_PROCESS(61, "Allocation error : not enough memory");
1915
1916 LZ4IO_loadDDict(&ress, prefs);
1917
1918 ress.dstFile = NULL;
1919 return ress;
1920 }
1921
LZ4IO_freeDResources(dRess_t ress)1922 static void LZ4IO_freeDResources(dRess_t ress)
1923 {
1924 LZ4F_errorCode_t errorCode = LZ4F_freeDecompressionContext(ress.dCtx);
1925 if (LZ4F_isError(errorCode)) END_PROCESS(69, "Error : can't free LZ4F context resource : %s", LZ4F_getErrorName(errorCode));
1926 free(ress.srcBuffer);
1927 free(ress.dstBuffer);
1928 free(ress.dictBuffer);
1929 }
1930
1931
1932 #if LZ4IO_MULTITHREAD
1933
1934 #define INBUFF_SIZE (4 MB)
1935 #define OUTBUFF_SIZE (1 * INBUFF_SIZE)
1936 #define OUTBUFF_QUEUE 1
1937 #define PBUFFERS_NB (1 /* being decompressed */ + OUTBUFF_QUEUE + 1 /* being written to io */)
1938
1939 typedef struct {
1940 void* ptr;
1941 size_t capacity;
1942 size_t size;
1943 } Buffer;
1944
1945 /* BufferPool:
1946 * Based on ayncio property :
1947 * all buffers are allocated and released in order,
1948 * maximum nb of buffers limited by queues */
1949
1950 typedef struct {
1951 Buffer buffers[PBUFFERS_NB];
1952 int availNext;
1953 int usedIdx;
1954 } BufferPool;
1955
LZ4IO_freeBufferPool(BufferPool * bp)1956 static void LZ4IO_freeBufferPool(BufferPool* bp)
1957 {
1958 int i;
1959 if (bp==NULL) return;
1960 for (i=0; i<PBUFFERS_NB; i++)
1961 free(bp->buffers[i].ptr);
1962 free(bp);
1963 }
1964
LZ4IO_createBufferPool(size_t bufSize)1965 static BufferPool* LZ4IO_createBufferPool(size_t bufSize)
1966 {
1967 BufferPool* const bp = (BufferPool*)calloc(1, sizeof(*bp));
1968 int i;
1969 if (bp==NULL) return NULL;
1970 for (i=0; i<PBUFFERS_NB; i++) {
1971 bp->buffers[i].ptr = malloc(bufSize);
1972 if (bp->buffers[i].ptr == NULL) {
1973 LZ4IO_freeBufferPool(bp);
1974 return NULL;
1975 }
1976 bp->buffers[i].capacity = bufSize;
1977 bp->buffers[i].size = 0;
1978 }
1979 bp->availNext = 0;
1980 bp->usedIdx = 0;
1981 return bp;
1982 }
1983
1984 /* Note: Thread Sanitizer can be detected with below macro
1985 * but it's not guaranteed (doesn't seem to work with clang) */
1986 #ifdef __SANITIZE_THREAD__
1987 # undef LZ4IO_NO_TSAN_ONLY
1988 #endif
1989
BufPool_getBuffer(BufferPool * bp)1990 static Buffer BufPool_getBuffer(BufferPool* bp)
1991 {
1992 assert(bp != NULL);
1993 #ifdef LZ4IO_NO_TSAN_ONLY
1994 /* The following assert() are susceptible to race conditions */
1995 assert(bp->availNext >= bp->usedIdx);
1996 assert(bp->availNext < bp->usedIdx + PBUFFERS_NB);
1997 #endif
1998 { int id = bp->availNext++ % PBUFFERS_NB;
1999 assert(bp->buffers[id].size == 0);
2000 return bp->buffers[id];
2001 } }
2002
BufPool_releaseBuffer(BufferPool * bp,Buffer buf)2003 void BufPool_releaseBuffer(BufferPool* bp, Buffer buf)
2004 {
2005 assert(bp != NULL);
2006 #ifdef LZ4IO_NO_TSAN_ONLY
2007 /* The following assert() is susceptible to race conditions */
2008 assert(bp->usedIdx < bp->availNext);
2009 #endif
2010 { int id = bp->usedIdx++ % PBUFFERS_NB;
2011 assert(bp->buffers[id].ptr == buf.ptr);
2012 bp->buffers[id].size = 0;
2013 } }
2014
2015 typedef struct {
2016 Buffer bufOut;
2017 FILE* fOut;
2018 BufferPool* bp;
2019 int sparseEnable;
2020 unsigned* storedSkips;
2021 unsigned long long* totalSize;
2022 } LZ4FChunkToWrite;
2023
LZ4IO_writeDecodedLZ4FChunk(void * arg)2024 static void LZ4IO_writeDecodedLZ4FChunk(void* arg)
2025 {
2026 LZ4FChunkToWrite* const ctw = (LZ4FChunkToWrite*)arg;
2027 assert(ctw != NULL);
2028
2029 /* note: works because only 1 thread */
2030 *ctw->storedSkips = LZ4IO_fwriteSparse(ctw->fOut, ctw->bufOut.ptr, ctw->bufOut.size, ctw->sparseEnable, *ctw->storedSkips); /* success or die */
2031 *ctw->totalSize += (unsigned long long)ctw->bufOut.size; /* note: works because only 1 thread */
2032 DISPLAYUPDATE(2, "\rDecompressed : %u MiB ", (unsigned)(ctw->totalSize[0] >> 20));
2033
2034 /* clean up */
2035 BufPool_releaseBuffer(ctw->bp, ctw->bufOut);
2036 free(ctw);
2037 }
2038
2039 typedef struct {
2040 LZ4F_dctx* dctx;
2041 const void* inBuffer;
2042 size_t inSize;
2043 const void* dictBuffer;
2044 size_t dictBufferSize;
2045 BufferPool* bp;
2046 unsigned long long* totalSize;
2047 LZ4F_errorCode_t* lastStatus;
2048 TPool* wPool;
2049 FILE* foutput;
2050 int sparseEnable;
2051 unsigned* storedSkips;
2052 } LZ4FChunk;
2053
LZ4IO_decompressLZ4FChunk(void * arg)2054 static void LZ4IO_decompressLZ4FChunk(void* arg)
2055 {
2056 LZ4FChunk* const lz4fc = (LZ4FChunk*)arg;
2057 const char* inPtr = (const char*)lz4fc->inBuffer;
2058 size_t pos = 0;
2059
2060 while ((pos < lz4fc->inSize)) { /* still to read */
2061 size_t remainingInSize = lz4fc->inSize - pos;
2062 Buffer b = BufPool_getBuffer(lz4fc->bp);
2063 if (b.capacity != OUTBUFF_SIZE)
2064 END_PROCESS(33, "Could not allocate output buffer!");
2065 assert(b.size == 0);
2066 b.size = b.capacity;
2067 { size_t nextToLoad = LZ4F_decompress_usingDict(lz4fc->dctx,
2068 b.ptr, &b.size,
2069 inPtr + pos, &remainingInSize,
2070 lz4fc->dictBuffer, lz4fc->dictBufferSize,
2071 NULL);
2072 if (LZ4F_isError(nextToLoad))
2073 END_PROCESS(34, "Decompression error : %s", LZ4F_getErrorName(nextToLoad));
2074 *lz4fc->lastStatus = nextToLoad;
2075 }
2076 assert(remainingInSize <= lz4fc->inSize - pos);
2077 pos += remainingInSize;
2078 assert(b.size <= b.capacity);
2079
2080 /* push to write thread */
2081 { LZ4FChunkToWrite* const ctw = (LZ4FChunkToWrite*)malloc(sizeof(*ctw));
2082 if (ctw==NULL) {
2083 END_PROCESS(35, "Allocation error : can't describe new write job");
2084 }
2085 ctw->bufOut = b;
2086 ctw->fOut = lz4fc->foutput;
2087 ctw->bp = lz4fc->bp;
2088 ctw->sparseEnable = lz4fc->sparseEnable;
2089 ctw->storedSkips = lz4fc->storedSkips;
2090 ctw->totalSize = lz4fc->totalSize;
2091 TPool_submitJob(lz4fc->wPool, LZ4IO_writeDecodedLZ4FChunk, ctw);
2092 }
2093 }
2094
2095 /* clean up */
2096 free(lz4fc);
2097 }
2098
2099 static unsigned long long
LZ4IO_decompressLZ4F(dRess_t ress,FILE * const srcFile,FILE * const dstFile,const LZ4IO_prefs_t * const prefs)2100 LZ4IO_decompressLZ4F(dRess_t ress,
2101 FILE* const srcFile, FILE* const dstFile,
2102 const LZ4IO_prefs_t* const prefs)
2103 {
2104 unsigned long long filesize = 0;
2105 LZ4F_errorCode_t nextToLoad;
2106 LZ4F_errorCode_t lastStatus = 1;
2107 unsigned storedSkips = 0;
2108 LZ4F_decompressOptions_t const dOpt_skipCrc = { 0, 1, 0, 0 };
2109 const LZ4F_decompressOptions_t* const dOptPtr =
2110 ((prefs->blockChecksum==0) && (prefs->streamChecksum==0)) ?
2111 &dOpt_skipCrc : NULL;
2112 TPool* const tPool = TPool_create(1, 1);
2113 TPool* const wPool = TPool_create(1, 1);
2114 BufferPool* const bp = LZ4IO_createBufferPool(OUTBUFF_SIZE);
2115 #define NB_BUFFSETS 4 /* 1 being read, 1 being processed, 1 being written, 1 being queued */
2116 void* inBuffs[NB_BUFFSETS];
2117 int bSetNb;
2118
2119 /* checks */
2120 if (tPool == NULL || wPool == NULL || bp==NULL)
2121 END_PROCESS(22, "threadpool creation error ");
2122
2123 /* allocate buffers up front */
2124 for (bSetNb=0; bSetNb<NB_BUFFSETS; bSetNb++) {
2125 inBuffs[bSetNb] = malloc((size_t)INBUFF_SIZE);
2126 if (!inBuffs[bSetNb])
2127 END_PROCESS(23, "Allocation error : can't allocate buffer for legacy decoding");
2128 }
2129
2130 /* Init feed with magic number (already consumed from FILE* sFile) */
2131 { size_t inSize = MAGICNUMBER_SIZE;
2132 size_t outSize= 0;
2133 LZ4IO_writeLE32(ress.srcBuffer, LZ4IO_MAGICNUMBER);
2134 nextToLoad = LZ4F_decompress_usingDict(ress.dCtx,
2135 ress.dstBuffer, &outSize,
2136 ress.srcBuffer, &inSize,
2137 ress.dictBuffer, ress.dictBufferSize,
2138 dOptPtr); /* set it once, it's enough */
2139 if (LZ4F_isError(nextToLoad))
2140 END_PROCESS(23, "Header error : %s", LZ4F_getErrorName(nextToLoad));
2141 }
2142
2143 /* Main Loop */
2144 assert(nextToLoad);
2145 for (bSetNb = 0; ; bSetNb = (bSetNb+1) % NB_BUFFSETS) {
2146 size_t readSize;
2147
2148 /* Read input */
2149 readSize = fread(inBuffs[bSetNb], 1, INBUFF_SIZE, srcFile);
2150 if (ferror(srcFile)) END_PROCESS(26, "Read error");
2151
2152 /* push to decoding thread */
2153 { LZ4FChunk* const lbi = (LZ4FChunk*)malloc(sizeof(*lbi));
2154 if (lbi==NULL)
2155 END_PROCESS(25, "Allocation error : not enough memory to allocate job descriptor");
2156 lbi->dctx = ress.dCtx;
2157 lbi->inBuffer = inBuffs[bSetNb];
2158 lbi->inSize = readSize;
2159 lbi->dictBuffer = ress.dictBuffer;
2160 lbi->dictBufferSize = ress.dictBufferSize;
2161 lbi->bp = bp;
2162 lbi->wPool = wPool;
2163 lbi->totalSize = &filesize;
2164 lbi->lastStatus = &lastStatus;
2165 lbi->foutput = dstFile;
2166 lbi->sparseEnable = prefs->sparseFileSupport;
2167 lbi->storedSkips = &storedSkips;
2168 TPool_submitJob(tPool, LZ4IO_decompressLZ4FChunk, lbi);
2169 }
2170 if (readSize < INBUFF_SIZE) break; /* likely reached end of stream */
2171 }
2172 assert(feof(srcFile));
2173
2174 /* Wait for all decompression completion */
2175 TPool_jobsCompleted(tPool);
2176
2177 /* flush */
2178 if (lastStatus != 0) {
2179 END_PROCESS(26, "LZ4F frame decoding could not complete: invalid frame");
2180 }
2181 TPool_jobsCompleted(wPool);
2182 if (!prefs->testMode) LZ4IO_fwriteSparseEnd(dstFile, storedSkips);
2183
2184 /* Clean */
2185 for (bSetNb=0; bSetNb<NB_BUFFSETS; bSetNb++) {
2186 free(inBuffs[bSetNb]);
2187 }
2188 LZ4IO_freeBufferPool(bp);
2189 TPool_free(wPool);
2190 TPool_free(tPool);
2191
2192 return filesize;
2193 }
2194
2195 #else
2196
2197 static unsigned long long
LZ4IO_decompressLZ4F(dRess_t ress,FILE * const srcFile,FILE * const dstFile,const LZ4IO_prefs_t * const prefs)2198 LZ4IO_decompressLZ4F(dRess_t ress,
2199 FILE* const srcFile, FILE* const dstFile,
2200 const LZ4IO_prefs_t* const prefs)
2201 {
2202 unsigned long long filesize = 0;
2203 LZ4F_errorCode_t nextToLoad;
2204 unsigned storedSkips = 0;
2205 LZ4F_decompressOptions_t const dOpt_skipCrc = { 0, 1, 0, 0 };
2206 const LZ4F_decompressOptions_t* const dOptPtr =
2207 ((prefs->blockChecksum==0) && (prefs->streamChecksum==0)) ?
2208 &dOpt_skipCrc : NULL;
2209
2210 /* Init feed with magic number (already consumed from FILE* sFile) */
2211 { size_t inSize = MAGICNUMBER_SIZE;
2212 size_t outSize= 0;
2213 LZ4IO_writeLE32(ress.srcBuffer, LZ4IO_MAGICNUMBER);
2214 nextToLoad = LZ4F_decompress_usingDict(ress.dCtx,
2215 ress.dstBuffer, &outSize,
2216 ress.srcBuffer, &inSize,
2217 ress.dictBuffer, ress.dictBufferSize,
2218 dOptPtr); /* set it once, it's enough */
2219 if (LZ4F_isError(nextToLoad))
2220 END_PROCESS(62, "Header error : %s", LZ4F_getErrorName(nextToLoad));
2221 }
2222
2223 /* Main Loop */
2224 for (;nextToLoad;) {
2225 size_t readSize;
2226 size_t pos = 0;
2227 size_t decodedBytes = ress.dstBufferSize;
2228
2229 /* Read input */
2230 if (nextToLoad > ress.srcBufferSize) nextToLoad = ress.srcBufferSize;
2231 readSize = fread(ress.srcBuffer, 1, nextToLoad, srcFile);
2232 if (!readSize) break; /* reached end of file or stream */
2233
2234 while ((pos < readSize) || (decodedBytes == ress.dstBufferSize)) { /* still to read, or still to flush */
2235 /* Decode Input (at least partially) */
2236 size_t remaining = readSize - pos;
2237 decodedBytes = ress.dstBufferSize;
2238 nextToLoad = LZ4F_decompress_usingDict(ress.dCtx,
2239 ress.dstBuffer, &decodedBytes,
2240 (char*)(ress.srcBuffer)+pos, &remaining,
2241 ress.dictBuffer, ress.dictBufferSize,
2242 NULL);
2243 if (LZ4F_isError(nextToLoad))
2244 END_PROCESS(66, "Decompression error : %s", LZ4F_getErrorName(nextToLoad));
2245 pos += remaining;
2246
2247 /* Write Block */
2248 if (decodedBytes) {
2249 if (!prefs->testMode)
2250 storedSkips = LZ4IO_fwriteSparse(dstFile, ress.dstBuffer, decodedBytes, prefs->sparseFileSupport, storedSkips);
2251 filesize += decodedBytes;
2252 DISPLAYUPDATE(2, "\rDecompressed : %u MiB ", (unsigned)(filesize>>20));
2253 }
2254
2255 if (!nextToLoad) break;
2256 }
2257 }
2258 /* can be out because readSize == 0, which could be an fread() error */
2259 if (ferror(srcFile)) END_PROCESS(67, "Read error");
2260
2261 if (!prefs->testMode) LZ4IO_fwriteSparseEnd(dstFile, storedSkips);
2262 if (nextToLoad!=0)
2263 END_PROCESS(68, "Unfinished stream (nextToLoad=%u)", (unsigned)nextToLoad);
2264
2265 return filesize;
2266 }
2267
2268 #endif /* LZ4IO_MULTITHREAD */
2269
2270 /* LZ4IO_passThrough:
2271 * just output the same content as input, no decoding.
2272 * This is a capability of zcat, and by extension lz4cat
2273 * MNstore : contain the first MAGICNUMBER_SIZE bytes already read from finput
2274 */
2275 #define PTSIZE (64 KB)
2276 #define PTSIZET (PTSIZE / sizeof(size_t))
2277 static unsigned long long
LZ4IO_passThrough(FILE * finput,FILE * foutput,unsigned char MNstore[MAGICNUMBER_SIZE],int sparseFileSupport)2278 LZ4IO_passThrough(FILE* finput, FILE* foutput,
2279 unsigned char MNstore[MAGICNUMBER_SIZE],
2280 int sparseFileSupport)
2281 {
2282 size_t buffer[PTSIZET];
2283 size_t readBytes = 1;
2284 unsigned long long total = MAGICNUMBER_SIZE;
2285 unsigned storedSkips = 0;
2286
2287 if (fwrite(MNstore, 1, MAGICNUMBER_SIZE, foutput) != MAGICNUMBER_SIZE) {
2288 END_PROCESS(50, "Pass-through write error");
2289 }
2290 while (readBytes) {
2291 readBytes = fread(buffer, 1, sizeof(buffer), finput);
2292 total += readBytes;
2293 storedSkips = LZ4IO_fwriteSparse(foutput, buffer, readBytes, sparseFileSupport, storedSkips);
2294 }
2295 if (ferror(finput)) END_PROCESS(51, "Read Error");
2296
2297 LZ4IO_fwriteSparseEnd(foutput, storedSkips);
2298 return total;
2299 }
2300
2301 /* when fseek() doesn't work (pipe scenario),
2302 * read and forget from input.
2303 **/
2304 #define SKIP_BUFF_SIZE (16 KB)
skipStream(FILE * f,unsigned offset)2305 static int skipStream(FILE* f, unsigned offset)
2306 {
2307 char buf[SKIP_BUFF_SIZE];
2308 while (offset > 0) {
2309 size_t const tr = MIN(offset, sizeof(buf));
2310 size_t const r = fread(buf, 1, tr, f);
2311 if (r != tr) return 1; /* error reading f */
2312 offset -= (unsigned)tr;
2313 }
2314 assert(offset == 0);
2315 return 0;
2316 }
2317
2318 /** Safely handle cases when (unsigned)offset > LONG_MAX */
fseek_u32(FILE * fp,unsigned offset,int where)2319 static int fseek_u32(FILE *fp, unsigned offset, int where)
2320 {
2321 const unsigned stepMax = 1U << 30;
2322 int errorNb = 0;
2323
2324 if (where != SEEK_CUR) return -1; /* Only allows SEEK_CUR */
2325 while (offset > 0) {
2326 unsigned s = offset;
2327 if (s > stepMax) s = stepMax;
2328 errorNb = UTIL_fseek(fp, (long)s, SEEK_CUR);
2329 if (errorNb==0) { offset -= s; continue; }
2330 errorNb = skipStream(fp, offset);
2331 offset = 0;
2332 }
2333 return errorNb;
2334 }
2335
2336
2337 #define ENDOFSTREAM ((unsigned long long)-1)
2338 #define DECODING_ERROR ((unsigned long long)-2)
2339 static unsigned long long
selectDecoder(dRess_t ress,FILE * finput,FILE * foutput,const LZ4IO_prefs_t * const prefs)2340 selectDecoder(dRess_t ress,
2341 FILE* finput, FILE* foutput,
2342 const LZ4IO_prefs_t* const prefs)
2343 {
2344 unsigned char MNstore[MAGICNUMBER_SIZE];
2345 unsigned magicNumber;
2346 static unsigned nbFrames = 0;
2347
2348 /* init */
2349 nbFrames++;
2350
2351 /* Check Archive Header */
2352 if (g_magicRead) { /* magic number already read from finput (see legacy frame)*/
2353 magicNumber = g_magicRead;
2354 g_magicRead = 0;
2355 } else {
2356 size_t const nbReadBytes = fread(MNstore, 1, MAGICNUMBER_SIZE, finput);
2357 if (nbReadBytes==0) { nbFrames = 0; return ENDOFSTREAM; } /* EOF */
2358 if (nbReadBytes != MAGICNUMBER_SIZE)
2359 END_PROCESS(40, "Unrecognized header : Magic Number unreadable");
2360 magicNumber = LZ4IO_readLE32(MNstore); /* Little Endian format */
2361 }
2362 if (LZ4IO_isSkippableMagicNumber(magicNumber))
2363 magicNumber = LZ4IO_SKIPPABLE0; /* fold skippable magic numbers */
2364
2365 switch(magicNumber)
2366 {
2367 case LZ4IO_MAGICNUMBER:
2368 return LZ4IO_decompressLZ4F(ress, finput, foutput, prefs);
2369 case LEGACY_MAGICNUMBER:
2370 DISPLAYLEVEL(4, "Detected : Legacy format \n");
2371 return LZ4IO_decodeLegacyStream(finput, foutput, prefs);
2372 case LZ4IO_SKIPPABLE0:
2373 DISPLAYLEVEL(4, "Skipping detected skippable area \n");
2374 { size_t const nbReadBytes = fread(MNstore, 1, 4, finput);
2375 if (nbReadBytes != 4)
2376 END_PROCESS(42, "Stream error : skippable size unreadable");
2377 }
2378 { unsigned const size = LZ4IO_readLE32(MNstore);
2379 int const errorNb = fseek_u32(finput, size, SEEK_CUR);
2380 if (errorNb != 0)
2381 END_PROCESS(43, "Stream error : cannot skip skippable area");
2382 }
2383 return 0;
2384 default:
2385 if (nbFrames == 1) { /* just started */
2386 /* Wrong magic number at the beginning of 1st stream */
2387 if (!prefs->testMode && prefs->overwrite && prefs->passThrough) {
2388 nbFrames = 0;
2389 return LZ4IO_passThrough(finput, foutput, MNstore, prefs->sparseFileSupport);
2390 }
2391 END_PROCESS(44,"Unrecognized header : file cannot be decoded");
2392 }
2393 { long int const position = ftell(finput); /* only works for files < 2 GB */
2394 DISPLAYLEVEL(2, "Stream followed by undecodable data ");
2395 if (position != -1L)
2396 DISPLAYLEVEL(2, "at position %i ", (int)position);
2397 DISPLAYLEVEL(2, "\n");
2398 }
2399 return DECODING_ERROR;
2400 }
2401 }
2402
2403
2404 static int
LZ4IO_decompressSrcFile(unsigned long long * outGenSize,dRess_t ress,const char * input_filename,const char * output_filename,const LZ4IO_prefs_t * const prefs)2405 LZ4IO_decompressSrcFile(unsigned long long* outGenSize,
2406 dRess_t ress,
2407 const char* input_filename, const char* output_filename,
2408 const LZ4IO_prefs_t* const prefs)
2409 {
2410 FILE* const foutput = ress.dstFile;
2411 unsigned long long filesize = 0;
2412 int result = 0;
2413
2414 /* Init */
2415 FILE* const finput = LZ4IO_openSrcFile(input_filename);
2416 if (finput==NULL) return 1;
2417 assert(foutput != NULL);
2418
2419 /* Loop over multiple streams */
2420 for ( ; ; ) { /* endless loop, see break condition */
2421 unsigned long long const decodedSize =
2422 selectDecoder(ress, finput, foutput, prefs);
2423 if (decodedSize == ENDOFSTREAM) break;
2424 if (decodedSize == DECODING_ERROR) { result=1; break; }
2425 filesize += decodedSize;
2426 }
2427
2428 /* Close input */
2429 fclose(finput);
2430 if (prefs->removeSrcFile) { /* --rm */
2431 if (remove(input_filename))
2432 END_PROCESS(45, "Remove error : %s: %s", input_filename, strerror(errno));
2433 }
2434
2435 /* Final Status */
2436 DISPLAYLEVEL(2, "\r%79s\r", "");
2437 DISPLAYLEVEL(2, "%-30.30s : decoded %llu bytes \n", input_filename, filesize);
2438 *outGenSize = filesize;
2439 (void)output_filename;
2440
2441 return result;
2442 }
2443
2444
2445 static int
LZ4IO_decompressDstFile(unsigned long long * outGenSize,dRess_t ress,const char * input_filename,const char * output_filename,const LZ4IO_prefs_t * const prefs)2446 LZ4IO_decompressDstFile(unsigned long long* outGenSize,
2447 dRess_t ress,
2448 const char* input_filename,
2449 const char* output_filename,
2450 const LZ4IO_prefs_t* const prefs)
2451 {
2452 int result;
2453 stat_t statbuf;
2454 int stat_result = 0;
2455 FILE* const foutput = LZ4IO_openDstFile(output_filename, prefs);
2456 if (foutput==NULL) return 1; /* failure */
2457
2458 if ( !LZ4IO_isStdin(input_filename)
2459 && UTIL_getFileStat(input_filename, &statbuf))
2460 stat_result = 1;
2461
2462 ress.dstFile = foutput;
2463 result = LZ4IO_decompressSrcFile(outGenSize, ress, input_filename, output_filename, prefs);
2464
2465 fclose(foutput);
2466
2467 /* Copy owner, file permissions and modification time */
2468 if ( stat_result != 0
2469 && !LZ4IO_isStdout(output_filename)
2470 && !LZ4IO_isDevNull(output_filename)) {
2471 UTIL_setFileStat(output_filename, &statbuf);
2472 /* should return value be read ? or is silent fail good enough ? */
2473 }
2474
2475 return result;
2476 }
2477
2478
2479 /* Note : LZ4IO_decompressFilename()
2480 * can provide total decompression time for the specified fileName.
2481 * This information is not available with LZ4IO_decompressMultipleFilenames().
2482 */
LZ4IO_decompressFilename(const char * input_filename,const char * output_filename,const LZ4IO_prefs_t * prefs)2483 int LZ4IO_decompressFilename(const char* input_filename, const char* output_filename, const LZ4IO_prefs_t* prefs)
2484 {
2485 dRess_t const ress = LZ4IO_createDResources(prefs);
2486 TIME_t const timeStart = TIME_getTime();
2487 clock_t const cpuStart = clock();
2488 unsigned long long processed = 0;
2489
2490 int const errStat = LZ4IO_decompressDstFile(&processed, ress, input_filename, output_filename, prefs);
2491 if (errStat)
2492 LZ4IO_finalTimeDisplay(timeStart, cpuStart, processed);
2493 LZ4IO_freeDResources(ress);
2494 return errStat;
2495 }
2496
2497
LZ4IO_decompressMultipleFilenames(const char ** inFileNamesTable,int ifntSize,const char * suffix,const LZ4IO_prefs_t * prefs)2498 int LZ4IO_decompressMultipleFilenames(
2499 const char** inFileNamesTable, int ifntSize,
2500 const char* suffix,
2501 const LZ4IO_prefs_t* prefs)
2502 {
2503 int i;
2504 unsigned long long totalProcessed = 0;
2505 int skippedFiles = 0;
2506 int missingFiles = 0;
2507 char* outFileName = (char*)malloc(FNSPACE);
2508 size_t ofnSize = FNSPACE;
2509 size_t const suffixSize = strlen(suffix);
2510 dRess_t ress = LZ4IO_createDResources(prefs);
2511 TIME_t timeStart = TIME_getTime();
2512 clock_t cpuStart = clock();
2513
2514 if (outFileName==NULL) END_PROCESS(70, "Memory allocation error");
2515 if (prefs->blockChecksum==0 && prefs->streamChecksum==0) {
2516 DISPLAYLEVEL(4, "disabling checksum validation during decoding \n");
2517 }
2518 ress.dstFile = LZ4IO_openDstFile(stdoutmark, prefs);
2519
2520 for (i=0; i<ifntSize; i++) {
2521 unsigned long long processed = 0;
2522 size_t const ifnSize = strlen(inFileNamesTable[i]);
2523 const char* const suffixPtr = inFileNamesTable[i] + ifnSize - suffixSize;
2524 if (LZ4IO_isStdout(suffix) || LZ4IO_isDevNull(suffix)) {
2525 missingFiles += LZ4IO_decompressSrcFile(&processed, ress, inFileNamesTable[i], suffix, prefs);
2526 totalProcessed += processed;
2527 continue;
2528 }
2529 if (ofnSize <= ifnSize-suffixSize+1) {
2530 free(outFileName);
2531 ofnSize = ifnSize + 20;
2532 outFileName = (char*)malloc(ofnSize);
2533 if (outFileName==NULL) END_PROCESS(71, "Memory allocation error");
2534 }
2535 if (ifnSize <= suffixSize || !UTIL_sameString(suffixPtr, suffix) ) {
2536 DISPLAYLEVEL(1, "File extension doesn't match expected LZ4_EXTENSION (%4s); will not process file: %s\n", suffix, inFileNamesTable[i]);
2537 skippedFiles++;
2538 continue;
2539 }
2540 memcpy(outFileName, inFileNamesTable[i], ifnSize - suffixSize);
2541 outFileName[ifnSize-suffixSize] = '\0';
2542 missingFiles += LZ4IO_decompressDstFile(&processed, ress, inFileNamesTable[i], outFileName, prefs);
2543 totalProcessed += processed;
2544 }
2545
2546 LZ4IO_freeDResources(ress);
2547 free(outFileName);
2548 LZ4IO_finalTimeDisplay(timeStart, cpuStart, totalProcessed);
2549 return missingFiles + skippedFiles;
2550 }
2551
2552
2553 /* ********************************************************************* */
2554 /* ********************** LZ4 --list command *********************** */
2555 /* ********************************************************************* */
2556
2557 typedef enum
2558 {
2559 lz4Frame = 0,
2560 legacyFrame,
2561 skippableFrame
2562 } LZ4IO_frameType_t;
2563
2564 typedef struct {
2565 LZ4F_frameInfo_t lz4FrameInfo;
2566 LZ4IO_frameType_t frameType;
2567 } LZ4IO_frameInfo_t;
2568
2569 #define LZ4IO_INIT_FRAMEINFO { LZ4F_INIT_FRAMEINFO, lz4Frame }
2570
2571 typedef struct {
2572 const char* fileName;
2573 unsigned long long fileSize;
2574 unsigned long long frameCount;
2575 LZ4IO_frameInfo_t frameSummary;
2576 unsigned short eqFrameTypes;
2577 unsigned short eqBlockTypes;
2578 unsigned short allContentSize;
2579 } LZ4IO_cFileInfo_t;
2580
2581 #define LZ4IO_INIT_CFILEINFO { NULL, 0ULL, 0, LZ4IO_INIT_FRAMEINFO, 1, 1, 1 }
2582
2583 typedef enum { LZ4IO_LZ4F_OK, LZ4IO_format_not_known, LZ4IO_not_a_file } LZ4IO_infoResult;
2584
2585 static const char * LZ4IO_frameTypeNames[] = {"LZ4Frame", "LegacyFrame", "SkippableFrame" };
2586
2587 /* Read block headers and skip block data
2588 Return total blocks size for this frame including block headers,
2589 block checksums and content checksums.
2590 returns 0 in case it can't successfully skip block data.
2591 Assumes SEEK_CUR after frame header.
2592 */
2593 static unsigned long long
LZ4IO_skipBlocksData(FILE * finput,const LZ4F_blockChecksum_t blockChecksumFlag,const LZ4F_contentChecksum_t contentChecksumFlag)2594 LZ4IO_skipBlocksData(FILE* finput,
2595 const LZ4F_blockChecksum_t blockChecksumFlag,
2596 const LZ4F_contentChecksum_t contentChecksumFlag)
2597 {
2598 unsigned char blockInfo[LZ4F_BLOCK_HEADER_SIZE];
2599 unsigned long long totalBlocksSize = 0;
2600 for (;;) {
2601 if (!fread(blockInfo, 1, LZ4F_BLOCK_HEADER_SIZE, finput)) {
2602 if (feof(finput)) return totalBlocksSize;
2603 return 0;
2604 }
2605 totalBlocksSize += LZ4F_BLOCK_HEADER_SIZE;
2606 { const unsigned long nextCBlockSize = LZ4IO_readLE32(&blockInfo) & 0x7FFFFFFFU;
2607 const unsigned long nextBlock = nextCBlockSize + (blockChecksumFlag * LZ4F_BLOCK_CHECKSUM_SIZE);
2608 if (nextCBlockSize == 0) {
2609 /* Reached EndMark */
2610 if (contentChecksumFlag) {
2611 /* Skip content checksum */
2612 if (UTIL_fseek(finput, LZ4F_CONTENT_CHECKSUM_SIZE, SEEK_CUR) != 0) {
2613 return 0;
2614 }
2615 totalBlocksSize += LZ4F_CONTENT_CHECKSUM_SIZE;
2616 }
2617 break;
2618 }
2619 totalBlocksSize += nextBlock;
2620 /* skip to the next block */
2621 assert(nextBlock < LONG_MAX);
2622 if (UTIL_fseek(finput, (long)nextBlock, SEEK_CUR) != 0) return 0;
2623 } }
2624 return totalBlocksSize;
2625 }
2626
2627 static const unsigned long long legacyFrameUndecodable = (0ULL-1);
2628 /* For legacy frames only.
2629 Read block headers and skip block data.
2630 Return total blocks size for this frame including block headers.
2631 or legacyFrameUndecodable in case it can't successfully skip block data.
2632 This works as long as legacy block header size = magic number size.
2633 Assumes SEEK_CUR after frame header.
2634 */
LZ4IO_skipLegacyBlocksData(FILE * finput)2635 static unsigned long long LZ4IO_skipLegacyBlocksData(FILE* finput)
2636 {
2637 unsigned char blockInfo[LZ4IO_LEGACY_BLOCK_HEADER_SIZE];
2638 unsigned long long totalBlocksSize = 0;
2639 LZ4IO_STATIC_ASSERT(LZ4IO_LEGACY_BLOCK_HEADER_SIZE == MAGICNUMBER_SIZE);
2640 for (;;) {
2641 size_t const bhs = fread(blockInfo, 1, LZ4IO_LEGACY_BLOCK_HEADER_SIZE, finput);
2642 if (bhs == 0) {
2643 if (feof(finput)) return totalBlocksSize;
2644 return legacyFrameUndecodable;
2645 }
2646 if (bhs != 4) {
2647 return legacyFrameUndecodable;
2648 }
2649 { const unsigned int nextCBlockSize = LZ4IO_readLE32(&blockInfo);
2650 if ( nextCBlockSize == LEGACY_MAGICNUMBER
2651 || nextCBlockSize == LZ4IO_MAGICNUMBER
2652 || LZ4IO_isSkippableMagicNumber(nextCBlockSize) ) {
2653 /* Rewind back. we want cursor at the beginning of next frame */
2654 if (UTIL_fseek(finput, -LZ4IO_LEGACY_BLOCK_HEADER_SIZE, SEEK_CUR) != 0) {
2655 END_PROCESS(37, "impossible to skip backward");
2656 }
2657 break;
2658 }
2659 if (nextCBlockSize > LZ4IO_LEGACY_BLOCK_SIZE_MAX) {
2660 DISPLAYLEVEL(4, "Error : block in legacy frame is too large \n");
2661 return legacyFrameUndecodable;
2662 }
2663 totalBlocksSize += LZ4IO_LEGACY_BLOCK_HEADER_SIZE + nextCBlockSize;
2664 /* skip to the next block
2665 * note : this won't fail if nextCBlockSize is too large, skipping past the end of finput */
2666 if (UTIL_fseek(finput, nextCBlockSize, SEEK_CUR) != 0) {
2667 return legacyFrameUndecodable;
2668 } } }
2669 return totalBlocksSize;
2670 }
2671
2672 /* LZ4IO_blockTypeID:
2673 * return human-readable block type, following command line convention
2674 * buffer : must be a valid memory area of at least 4 bytes */
LZ4IO_blockTypeID(LZ4F_blockSizeID_t sizeID,LZ4F_blockMode_t blockMode,char buffer[4])2675 const char* LZ4IO_blockTypeID(LZ4F_blockSizeID_t sizeID, LZ4F_blockMode_t blockMode, char buffer[4])
2676 {
2677 buffer[0] = 'B';
2678 assert(sizeID >= 4); assert(sizeID <= 7);
2679 buffer[1] = (char)(sizeID + '0');
2680 buffer[2] = (blockMode == LZ4F_blockIndependent) ? 'I' : 'D';
2681 buffer[3] = 0;
2682 return buffer;
2683 }
2684
2685 /* buffer : must be valid memory area of at least 10 bytes */
LZ4IO_toHuman(long double size,char * buf)2686 static const char* LZ4IO_toHuman(long double size, char* buf)
2687 {
2688 const char units[] = {"\0KMGTPEZY"};
2689 size_t i = 0;
2690 for (; size >= 1024; i++) size /= 1024;
2691 sprintf(buf, "%.2Lf%c", size, units[i]);
2692 return buf;
2693 }
2694
2695 /* Get filename without path prefix */
LZ4IO_baseName(const char * input_filename)2696 static const char* LZ4IO_baseName(const char* input_filename)
2697 {
2698 const char* b = strrchr(input_filename, '/');
2699 if (!b) b = strrchr(input_filename, '\\');
2700 if (!b) return input_filename;
2701 return b + 1;
2702 }
2703
2704 /* Report frame/s information (--list) in verbose mode (-v).
2705 * Will populate file info with fileName and frameSummary where applicable.
2706 * - TODO :
2707 * + report nb of blocks, hence max. possible decompressed size (when not reported in header)
2708 */
2709 static LZ4IO_infoResult
LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t * cfinfo,const char * input_filename,int displayNow)2710 LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t* cfinfo, const char* input_filename, int displayNow)
2711 {
2712 LZ4IO_infoResult result = LZ4IO_format_not_known; /* default result (error) */
2713 unsigned char buffer[LZ4F_HEADER_SIZE_MAX];
2714 FILE* const finput = LZ4IO_openSrcFile(input_filename);
2715
2716 if (finput == NULL) return LZ4IO_not_a_file;
2717 cfinfo->fileSize = UTIL_getOpenFileSize(finput);
2718
2719 while (!feof(finput)) {
2720 LZ4IO_frameInfo_t frameInfo = LZ4IO_INIT_FRAMEINFO;
2721 unsigned magicNumber;
2722 /* Get MagicNumber */
2723 { size_t const nbReadBytes = fread(buffer, 1, MAGICNUMBER_SIZE, finput);
2724 if (nbReadBytes == 0) { break; } /* EOF */
2725 result = LZ4IO_format_not_known; /* default result (error) */
2726 if (nbReadBytes != MAGICNUMBER_SIZE) {
2727 END_PROCESS(40, "Unrecognized header : Magic Number unreadable");
2728 } }
2729 magicNumber = LZ4IO_readLE32(buffer); /* Little Endian format */
2730 if (LZ4IO_isSkippableMagicNumber(magicNumber))
2731 magicNumber = LZ4IO_SKIPPABLE0; /* fold skippable magic numbers */
2732
2733 switch (magicNumber) {
2734 case LZ4IO_MAGICNUMBER:
2735 if (cfinfo->frameSummary.frameType != lz4Frame) cfinfo->eqFrameTypes = 0;
2736 /* Get frame info */
2737 { const size_t readBytes = fread(buffer + MAGICNUMBER_SIZE, 1, LZ4F_HEADER_SIZE_MIN - MAGICNUMBER_SIZE, finput);
2738 if (!readBytes || ferror(finput)) END_PROCESS(71, "Error reading %s", input_filename);
2739 }
2740 { size_t hSize = LZ4F_headerSize(&buffer, LZ4F_HEADER_SIZE_MIN);
2741 if (LZ4F_isError(hSize)) break;
2742 if (hSize > (LZ4F_HEADER_SIZE_MIN + MAGICNUMBER_SIZE)) {
2743 /* We've already read LZ4F_HEADER_SIZE_MIN so read any extra until hSize*/
2744 const size_t readBytes = fread(buffer + LZ4F_HEADER_SIZE_MIN, 1, hSize - LZ4F_HEADER_SIZE_MIN, finput);
2745 if (!readBytes || ferror(finput)) END_PROCESS(72, "Error reading %s", input_filename);
2746 }
2747 /* Create decompression context */
2748 { LZ4F_dctx* dctx;
2749 if ( LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION)) ) break;
2750 { unsigned const frameInfoError = LZ4F_isError(LZ4F_getFrameInfo(dctx, &frameInfo.lz4FrameInfo, buffer, &hSize));
2751 LZ4F_freeDecompressionContext(dctx);
2752 if (frameInfoError) break;
2753 if ((cfinfo->frameSummary.lz4FrameInfo.blockSizeID != frameInfo.lz4FrameInfo.blockSizeID ||
2754 cfinfo->frameSummary.lz4FrameInfo.blockMode != frameInfo.lz4FrameInfo.blockMode)
2755 && cfinfo->frameCount != 0)
2756 cfinfo->eqBlockTypes = 0;
2757 { const unsigned long long totalBlocksSize = LZ4IO_skipBlocksData(finput,
2758 frameInfo.lz4FrameInfo.blockChecksumFlag,
2759 frameInfo.lz4FrameInfo.contentChecksumFlag);
2760 if (totalBlocksSize) {
2761 char bTypeBuffer[5];
2762 LZ4IO_blockTypeID(frameInfo.lz4FrameInfo.blockSizeID, frameInfo.lz4FrameInfo.blockMode, bTypeBuffer);
2763 if (displayNow) DISPLAYOUT(" %6llu %14s %5s %8s",
2764 cfinfo->frameCount + 1,
2765 LZ4IO_frameTypeNames[frameInfo.frameType],
2766 bTypeBuffer,
2767 frameInfo.lz4FrameInfo.contentChecksumFlag ? "XXH32" : "-");
2768 if (frameInfo.lz4FrameInfo.contentSize) {
2769 double const ratio = (double)(totalBlocksSize + hSize) / (double)frameInfo.lz4FrameInfo.contentSize * 100;
2770 if (displayNow) DISPLAYOUT(" %20llu %20llu %9.2f%%\n",
2771 totalBlocksSize + hSize,
2772 frameInfo.lz4FrameInfo.contentSize,
2773 ratio);
2774 /* Now we've consumed frameInfo we can use it to store the total contentSize */
2775 frameInfo.lz4FrameInfo.contentSize += cfinfo->frameSummary.lz4FrameInfo.contentSize;
2776 }
2777 else {
2778 if (displayNow) DISPLAYOUT(" %20llu %20s %9s \n", totalBlocksSize + hSize, "-", "-");
2779 cfinfo->allContentSize = 0;
2780 }
2781 result = LZ4IO_LZ4F_OK;
2782 } } } } }
2783 break;
2784 case LEGACY_MAGICNUMBER:
2785 frameInfo.frameType = legacyFrame;
2786 if (cfinfo->frameSummary.frameType != legacyFrame && cfinfo->frameCount != 0) cfinfo->eqFrameTypes = 0;
2787 cfinfo->eqBlockTypes = 0;
2788 cfinfo->allContentSize = 0;
2789 { const unsigned long long totalBlocksSize = LZ4IO_skipLegacyBlocksData(finput);
2790 if (totalBlocksSize == legacyFrameUndecodable) {
2791 DISPLAYLEVEL(1, "Corrupted legacy frame \n");
2792 result = LZ4IO_format_not_known;
2793 break;
2794 }
2795 if (totalBlocksSize) {
2796 if (displayNow) DISPLAYOUT(" %6llu %14s %5s %8s %20llu %20s %9s\n",
2797 cfinfo->frameCount + 1,
2798 LZ4IO_frameTypeNames[frameInfo.frameType],
2799 "-", "-",
2800 totalBlocksSize + 4,
2801 "-", "-");
2802 result = LZ4IO_LZ4F_OK;
2803 } }
2804 break;
2805 case LZ4IO_SKIPPABLE0:
2806 frameInfo.frameType = skippableFrame;
2807 if (cfinfo->frameSummary.frameType != skippableFrame && cfinfo->frameCount != 0) cfinfo->eqFrameTypes = 0;
2808 cfinfo->eqBlockTypes = 0;
2809 cfinfo->allContentSize = 0;
2810 { size_t const nbReadBytes = fread(buffer, 1, 4, finput);
2811 if (nbReadBytes != 4)
2812 END_PROCESS(42, "Stream error : skippable size unreadable");
2813 }
2814 { unsigned const size = LZ4IO_readLE32(buffer);
2815 int const errorNb = fseek_u32(finput, size, SEEK_CUR);
2816 if (errorNb != 0)
2817 END_PROCESS(43, "Stream error : cannot skip skippable area");
2818 if (displayNow) DISPLAYOUT(" %6llu %14s %5s %8s %20u %20s %9s\n",
2819 cfinfo->frameCount + 1,
2820 "SkippableFrame",
2821 "-", "-", size + 8, "-", "-");
2822
2823 result = LZ4IO_LZ4F_OK;
2824 }
2825 break;
2826 default:
2827 { long int const position = ftell(finput); /* only works for files < 2 GB */
2828 DISPLAYLEVEL(3, "Stream followed by undecodable data ");
2829 if (position != -1L)
2830 DISPLAYLEVEL(3, "at position %i ", (int)position);
2831 result = LZ4IO_format_not_known;
2832 DISPLAYLEVEL(3, "\n");
2833 }
2834 break;
2835 }
2836 if (result != LZ4IO_LZ4F_OK) break;
2837 cfinfo->frameSummary = frameInfo;
2838 cfinfo->frameCount++;
2839 } /* while (!feof(finput)) */
2840 fclose(finput);
2841 return result;
2842 }
2843
2844
LZ4IO_displayCompressedFilesInfo(const char ** inFileNames,size_t ifnIdx)2845 int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx)
2846 {
2847 int result = 0;
2848 size_t idx = 0;
2849 if (g_displayLevel < 3) {
2850 DISPLAYOUT("%10s %14s %5s %11s %13s %8s %s\n",
2851 "Frames", "Type", "Block", "Compressed", "Uncompressed", "Ratio", "Filename");
2852 }
2853 for (; idx < ifnIdx; idx++) {
2854 /* Get file info */
2855 LZ4IO_cFileInfo_t cfinfo = LZ4IO_INIT_CFILEINFO;
2856 cfinfo.fileName = LZ4IO_baseName(inFileNames[idx]);
2857 if (LZ4IO_isStdin(inFileNames[idx]) ? !UTIL_isRegFD(0) : !UTIL_isRegFile(inFileNames[idx])) {
2858 DISPLAYLEVEL(1, "lz4: %s is not a regular file \n", inFileNames[idx]);
2859 return 1;
2860 }
2861 if (g_displayLevel >= 3) {
2862 /* verbose mode */
2863 DISPLAYOUT("%s(%llu/%llu)\n", cfinfo.fileName, (unsigned long long)idx + 1, (unsigned long long)ifnIdx);
2864 DISPLAYOUT(" %6s %14s %5s %8s %20s %20s %9s\n",
2865 "Frame", "Type", "Block", "Checksum", "Compressed", "Uncompressed", "Ratio");
2866 }
2867 { LZ4IO_infoResult const op_result = LZ4IO_getCompressedFileInfo(&cfinfo, inFileNames[idx], g_displayLevel >= 3);
2868 if (op_result != LZ4IO_LZ4F_OK) {
2869 assert(op_result == LZ4IO_format_not_known);
2870 DISPLAYLEVEL(1, "lz4: %s: File format not recognized \n", inFileNames[idx]);
2871 return 1;
2872 } }
2873 if (g_displayLevel >= 3) {
2874 DISPLAYOUT("\n");
2875 }
2876 if (g_displayLevel < 3) {
2877 /* Display summary */
2878 char buffers[3][10];
2879 DISPLAYOUT("%10llu %14s %5s %11s %13s ",
2880 cfinfo.frameCount,
2881 cfinfo.eqFrameTypes ? LZ4IO_frameTypeNames[cfinfo.frameSummary.frameType] : "-" ,
2882 cfinfo.eqBlockTypes ? LZ4IO_blockTypeID(cfinfo.frameSummary.lz4FrameInfo.blockSizeID,
2883 cfinfo.frameSummary.lz4FrameInfo.blockMode, buffers[0]) : "-",
2884 LZ4IO_toHuman((long double)cfinfo.fileSize, buffers[1]),
2885 cfinfo.allContentSize ? LZ4IO_toHuman((long double)cfinfo.frameSummary.lz4FrameInfo.contentSize, buffers[2]) : "-");
2886 if (cfinfo.allContentSize) {
2887 double const ratio = (double)cfinfo.fileSize / (double)cfinfo.frameSummary.lz4FrameInfo.contentSize * 100;
2888 DISPLAYOUT("%8.2f%% %s \n", ratio, cfinfo.fileName);
2889 } else {
2890 DISPLAYOUT("%8s %s\n",
2891 "-",
2892 cfinfo.fileName);
2893 } } /* if (g_displayLevel < 3) */
2894 } /* for (; idx < ifnIdx; idx++) */
2895
2896 return result;
2897 }
2898