1/* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6#include "zbuild.h"
7#include "zutil_p.h"
8#include "gzguts.h"
9
10/* Local functions */
11static int gz_load(gz_state *, unsigned char *, unsigned, unsigned *);
12static int gz_avail(gz_state *);
13static int gz_look(gz_state *);
14static int gz_decomp(gz_state *);
15static int gz_fetch(gz_state *);
16static int gz_skip(gz_state *, z_off64_t);
17static size_t gz_read(gz_state *, void *, size_t);
18
19/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
20   state->fd, and update state->eof, state->err, and state->msg as appropriate.
21   This function needs to loop on read(), since read() is not guaranteed to
22   read the number of bytes requested, depending on the type of descriptor. */
23static int gz_load(gz_state *state, unsigned char *buf, unsigned len, unsigned *have) {
24    ssize_t ret;
25
26    *have = 0;
27    do {
28        ret = read(state->fd, buf + *have, len - *have);
29        if (ret <= 0)
30            break;
31        *have += (unsigned)ret;
32    } while (*have < len);
33    if (ret < 0) {
34        gz_error(state, Z_ERRNO, zstrerror());
35        return -1;
36    }
37    if (ret == 0)
38        state->eof = 1;
39    return 0;
40}
41
42/* Load up input buffer and set eof flag if last data loaded -- return -1 on
43   error, 0 otherwise.  Note that the eof flag is set when the end of the input
44   file is reached, even though there may be unused data in the buffer.  Once
45   that data has been used, no more attempts will be made to read the file.
46   If strm->avail_in != 0, then the current data is moved to the beginning of
47   the input buffer, and then the remainder of the buffer is loaded with the
48   available data from the input file. */
49static int gz_avail(gz_state *state) {
50    unsigned got;
51    PREFIX3(stream) *strm = &(state->strm);
52
53    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
54        return -1;
55    if (state->eof == 0) {
56        if (strm->avail_in) {       /* copy what's there to the start */
57            unsigned char *p = state->in;
58            unsigned const char *q = strm->next_in;
59            unsigned n = strm->avail_in;
60            do {
61                *p++ = *q++;
62            } while (--n);
63        }
64        if (gz_load(state, state->in + strm->avail_in, state->size - strm->avail_in, &got) == -1)
65            return -1;
66        strm->avail_in += got;
67        strm->next_in = state->in;
68    }
69    return 0;
70}
71
72/* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
73   If this is the first time in, allocate required memory.  state->how will be
74   left unchanged if there is no more input data available, will be set to COPY
75   if there is no gzip header and direct copying will be performed, or it will
76   be set to GZIP for decompression.  If direct copying, then leftover input
77   data from the input buffer will be copied to the output buffer.  In that
78   case, all further file reads will be directly to either the output buffer or
79   a user buffer.  If decompressing, the inflate state will be initialized.
80   gz_look() will return 0 on success or -1 on failure. */
81static int gz_look(gz_state *state) {
82    PREFIX3(stream) *strm = &(state->strm);
83
84    /* allocate read buffers and inflate memory */
85    if (state->size == 0) {
86        /* allocate buffers */
87        state->in = (unsigned char *)zng_alloc(state->want);
88        state->out = (unsigned char *)zng_alloc(state->want << 1);
89        if (state->in == NULL || state->out == NULL) {
90            zng_free(state->out);
91            zng_free(state->in);
92            gz_error(state, Z_MEM_ERROR, "out of memory");
93            return -1;
94        }
95        state->size = state->want;
96
97        /* allocate inflate memory */
98        state->strm.zalloc = NULL;
99        state->strm.zfree = NULL;
100        state->strm.opaque = NULL;
101        state->strm.avail_in = 0;
102        state->strm.next_in = NULL;
103        if (PREFIX(inflateInit2)(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
104            zng_free(state->out);
105            zng_free(state->in);
106            state->size = 0;
107            gz_error(state, Z_MEM_ERROR, "out of memory");
108            return -1;
109        }
110    }
111
112    /* get at least the magic bytes in the input buffer */
113    if (strm->avail_in < 2) {
114        if (gz_avail(state) == -1)
115            return -1;
116        if (strm->avail_in == 0)
117            return 0;
118    }
119
120    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
121       a logical dilemma here when considering the case of a partially written
122       gzip file, to wit, if a single 31 byte is written, then we cannot tell
123       whether this is a single-byte file, or just a partially written gzip
124       file -- for here we assume that if a gzip file is being written, then
125       the header will be written in a single operation, so that reading a
126       single byte is sufficient indication that it is not a gzip file) */
127    if (strm->avail_in > 1 &&
128            strm->next_in[0] == 31 && strm->next_in[1] == 139) {
129        PREFIX(inflateReset)(strm);
130        state->how = GZIP;
131        state->direct = 0;
132        return 0;
133    }
134
135    /* no gzip header -- if we were decoding gzip before, then this is trailing
136       garbage.  Ignore the trailing garbage and finish. */
137    if (state->direct == 0) {
138        strm->avail_in = 0;
139        state->eof = 1;
140        state->x.have = 0;
141        return 0;
142    }
143
144    /* doing raw i/o, copy any leftover input to output -- this assumes that
145       the output buffer is larger than the input buffer, which also assures
146       space for gzungetc() */
147    state->x.next = state->out;
148    if (strm->avail_in) {
149        memcpy(state->x.next, strm->next_in, strm->avail_in);
150        state->x.have = strm->avail_in;
151        strm->avail_in = 0;
152    }
153    state->how = COPY;
154    state->direct = 1;
155    return 0;
156}
157
158/* Decompress from input to the provided next_out and avail_out in the state.
159   On return, state->x.have and state->x.next point to the just decompressed
160   data.  If the gzip stream completes, state->how is reset to LOOK to look for
161   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
162   on success, -1 on failure. */
163static int gz_decomp(gz_state *state) {
164    int ret = Z_OK;
165    unsigned had;
166    PREFIX3(stream) *strm = &(state->strm);
167
168    /* fill output buffer up to end of deflate stream */
169    had = strm->avail_out;
170    do {
171        /* get more input for inflate() */
172        if (strm->avail_in == 0 && gz_avail(state) == -1)
173            return -1;
174        if (strm->avail_in == 0) {
175            gz_error(state, Z_BUF_ERROR, "unexpected end of file");
176            break;
177        }
178
179        /* decompress and handle errors */
180        ret = PREFIX(inflate)(strm, Z_NO_FLUSH);
181        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
182            gz_error(state, Z_STREAM_ERROR, "internal error: inflate stream corrupt");
183            return -1;
184        }
185        if (ret == Z_MEM_ERROR) {
186            gz_error(state, Z_MEM_ERROR, "out of memory");
187            return -1;
188        }
189        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
190            gz_error(state, Z_DATA_ERROR, strm->msg == NULL ? "compressed data error" : strm->msg);
191            return -1;
192        }
193    } while (strm->avail_out && ret != Z_STREAM_END);
194
195    /* update available output */
196    state->x.have = had - strm->avail_out;
197    state->x.next = strm->next_out - state->x.have;
198
199    /* if the gzip stream completed successfully, look for another */
200    if (ret == Z_STREAM_END)
201        state->how = LOOK;
202
203    /* good decompression */
204    return 0;
205}
206
207/* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
208   Data is either copied from the input file or decompressed from the input
209   file depending on state->how.  If state->how is LOOK, then a gzip header is
210   looked for to determine whether to copy or decompress.  Returns -1 on error,
211   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
212   end of the input file has been reached and all data has been processed.  */
213static int gz_fetch(gz_state *state) {
214    PREFIX3(stream) *strm = &(state->strm);
215
216    do {
217        switch (state->how) {
218        case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
219            if (gz_look(state) == -1)
220                return -1;
221            if (state->how == LOOK)
222                return 0;
223            break;
224        case COPY:      /* -> COPY */
225            if (gz_load(state, state->out, state->size << 1, &(state->x.have))
226                    == -1)
227                return -1;
228            state->x.next = state->out;
229            return 0;
230        case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
231            strm->avail_out = state->size << 1;
232            strm->next_out = state->out;
233            if (gz_decomp(state) == -1)
234                return -1;
235        }
236    } while (state->x.have == 0 && (!state->eof || strm->avail_in));
237    return 0;
238}
239
240/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
241static int gz_skip(gz_state *state, z_off64_t len) {
242    unsigned n;
243
244    /* skip over len bytes or reach end-of-file, whichever comes first */
245    while (len)
246        /* skip over whatever is in output buffer */
247        if (state->x.have) {
248            n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
249                (unsigned)len : state->x.have;
250            state->x.have -= n;
251            state->x.next += n;
252            state->x.pos += n;
253            len -= n;
254        } else if (state->eof && state->strm.avail_in == 0) {
255            /* output buffer empty -- return if we're at the end of the input */
256            break;
257        } else {
258            /* need more data to skip -- load up output buffer */
259            /* get more output, looking for header if required */
260            if (gz_fetch(state) == -1)
261                return -1;
262        }
263    return 0;
264}
265
266/* Read len bytes into buf from file, or less than len up to the end of the
267   input.  Return the number of bytes read.  If zero is returned, either the
268   end of file was reached, or there was an error.  state->err must be
269   consulted in that case to determine which. */
270static size_t gz_read(gz_state *state, void *buf, size_t len) {
271    size_t got;
272    unsigned n;
273
274    /* if len is zero, avoid unnecessary operations */
275    if (len == 0)
276        return 0;
277
278    /* process a skip request */
279    if (state->seek) {
280        state->seek = 0;
281        if (gz_skip(state, state->skip) == -1)
282            return 0;
283    }
284
285    /* get len bytes to buf, or less than len if at the end */
286    got = 0;
287    do {
288        /* set n to the maximum amount of len that fits in an unsigned int */
289        n = (unsigned)-1;
290        if (n > len)
291            n = (unsigned)len;
292
293        /* first just try copying data from the output buffer */
294        if (state->x.have) {
295            if (state->x.have < n)
296                n = state->x.have;
297            memcpy(buf, state->x.next, n);
298            state->x.next += n;
299            state->x.have -= n;
300        }
301
302        /* output buffer empty -- return if we're at the end of the input */
303        else if (state->eof && state->strm.avail_in == 0) {
304            state->past = 1;        /* tried to read past end */
305            break;
306        }
307
308        /* need output data -- for small len or new stream load up our output
309           buffer */
310        else if (state->how == LOOK || n < (state->size << 1)) {
311            /* get more output, looking for header if required */
312            if (gz_fetch(state) == -1)
313                return 0;
314            continue;       /* no progress yet -- go back to copy above */
315            /* the copy above assures that we will leave with space in the
316               output buffer, allowing at least one gzungetc() to succeed */
317        }
318
319        /* large len -- read directly into user buffer */
320        else if (state->how == COPY) {      /* read directly */
321            if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
322                return 0;
323        }
324
325        /* large len -- decompress directly into user buffer */
326        else {  /* state->how == GZIP */
327            state->strm.avail_out = n;
328            state->strm.next_out = (unsigned char *)buf;
329            if (gz_decomp(state) == -1)
330                return 0;
331            n = state->x.have;
332            state->x.have = 0;
333        }
334
335        /* update progress */
336        len -= n;
337        buf = (char *)buf + n;
338        got += n;
339        state->x.pos += n;
340    } while (len);
341
342    /* return number of bytes read into user buffer */
343    return got;
344}
345
346/* -- see zlib.h -- */
347int Z_EXPORT PREFIX(gzread)(gzFile file, void *buf, unsigned len) {
348    gz_state *state;
349
350    /* get internal structure */
351    if (file == NULL)
352        return -1;
353    state = (gz_state *)file;
354
355    /* check that we're reading and that there's no (serious) error */
356    if (state->mode != GZ_READ ||
357            (state->err != Z_OK && state->err != Z_BUF_ERROR))
358        return -1;
359
360    /* since an int is returned, make sure len fits in one, otherwise return
361       with an error (this avoids a flaw in the interface) */
362    if ((int)len < 0) {
363        gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
364        return -1;
365    }
366
367    /* read len or fewer bytes to buf */
368    len = (unsigned)gz_read(state, buf, len);
369
370    /* check for an error */
371    if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
372        return -1;
373
374    /* return the number of bytes read (this is assured to fit in an int) */
375    return (int)len;
376}
377
378/* -- see zlib.h -- */
379size_t Z_EXPORT PREFIX(gzfread)(void *buf, size_t size, size_t nitems, gzFile file) {
380    size_t len;
381    gz_state *state;
382
383    /* Exit early if size is zero, also prevents potential division by zero */
384    if (size == 0)
385        return 0;
386
387    /* get internal structure */
388    if (file == NULL)
389        return 0;
390    state = (gz_state *)file;
391
392    /* check that we're reading and that there's no (serious) error */
393    if (state->mode != GZ_READ ||
394            (state->err != Z_OK && state->err != Z_BUF_ERROR))
395        return 0;
396
397    /* compute bytes to read -- error on overflow */
398    if (size && SIZE_MAX / size < nitems) {
399        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
400        return 0;
401    }
402    len = nitems * size;
403
404    /* read len or fewer bytes to buf, return the number of full items read */
405    return len ? gz_read(state, buf, len) / size : 0;
406}
407
408/* -- see zlib.h -- */
409#undef @ZLIB_SYMBOL_PREFIX@gzgetc
410#undef @ZLIB_SYMBOL_PREFIX@zng_gzgetc
411int Z_EXPORT PREFIX(gzgetc)(gzFile file) {
412    unsigned char buf[1];
413    gz_state *state;
414
415    /* get internal structure */
416    if (file == NULL)
417        return -1;
418    state = (gz_state *)file;
419
420    /* check that we're reading and that there's no (serious) error */
421    if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR))
422        return -1;
423
424    /* try output buffer (no need to check for skip request) */
425    if (state->x.have) {
426        state->x.have--;
427        state->x.pos++;
428        return *(state->x.next)++;
429    }
430
431    /* nothing there -- try gz_read() */
432    return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
433}
434
435int Z_EXPORT PREFIX(gzgetc_)(gzFile file) {
436    return PREFIX(gzgetc)(file);
437}
438
439/* -- see zlib.h -- */
440int Z_EXPORT PREFIX(gzungetc)(int c, gzFile file) {
441    gz_state *state;
442
443    /* get internal structure */
444    if (file == NULL)
445        return -1;
446    state = (gz_state *)file;
447
448    /* check that we're reading and that there's no (serious) error */
449    if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR))
450        return -1;
451
452    /* process a skip request */
453    if (state->seek) {
454        state->seek = 0;
455        if (gz_skip(state, state->skip) == -1)
456            return -1;
457    }
458
459    /* can't push EOF */
460    if (c < 0)
461        return -1;
462
463    /* if output buffer empty, put byte at end (allows more pushing) */
464    if (state->x.have == 0) {
465        state->x.have = 1;
466        state->x.next = state->out + (state->size << 1) - 1;
467        state->x.next[0] = (unsigned char)c;
468        state->x.pos--;
469        state->past = 0;
470        return c;
471    }
472
473    /* if no room, give up (must have already done a gzungetc()) */
474    if (state->x.have == (state->size << 1)) {
475        gz_error(state, Z_DATA_ERROR, "out of room to push characters");
476        return -1;
477    }
478
479    /* slide output data if needed and insert byte before existing data */
480    if (state->x.next == state->out) {
481        unsigned char *src = state->out + state->x.have;
482        unsigned char *dest = state->out + (state->size << 1);
483        while (src > state->out)
484            *--dest = *--src;
485        state->x.next = dest;
486    }
487    state->x.have++;
488    state->x.next--;
489    state->x.next[0] = (unsigned char)c;
490    state->x.pos--;
491    state->past = 0;
492    return c;
493}
494
495/* -- see zlib.h -- */
496char * Z_EXPORT PREFIX(gzgets)(gzFile file, char *buf, int len) {
497    unsigned left, n;
498    char *str;
499    unsigned char *eol;
500    gz_state *state;
501
502    /* check parameters and get internal structure */
503    if (file == NULL || buf == NULL || len < 1)
504        return NULL;
505    state = (gz_state *)file;
506
507    /* check that we're reading and that there's no (serious) error */
508    if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR))
509        return NULL;
510
511    /* process a skip request */
512    if (state->seek) {
513        state->seek = 0;
514        if (gz_skip(state, state->skip) == -1)
515            return NULL;
516    }
517
518    /* copy output bytes up to new line or len - 1, whichever comes first --
519       append a terminating zero to the string (we don't check for a zero in
520       the contents, let the user worry about that) */
521    str = buf;
522    left = (unsigned)len - 1;
523    if (left) {
524        do {
525            /* assure that something is in the output buffer */
526            if (state->x.have == 0 && gz_fetch(state) == -1)
527                return NULL;                /* error */
528            if (state->x.have == 0) {       /* end of file */
529                state->past = 1;            /* read past end */
530                break;                      /* return what we have */
531            }
532
533            /* look for end-of-line in current output buffer */
534            n = state->x.have > left ? left : state->x.have;
535            eol = (unsigned char *)memchr(state->x.next, '\n', n);
536            if (eol != NULL)
537                n = (unsigned)(eol - state->x.next) + 1;
538
539            /* copy through end-of-line, or remainder if not found */
540            memcpy(buf, state->x.next, n);
541            state->x.have -= n;
542            state->x.next += n;
543            state->x.pos += n;
544            left -= n;
545            buf += n;
546        } while (left && eol == NULL);
547    }
548
549    /* return terminated string, or if nothing, end of file */
550    if (buf == str)
551        return NULL;
552    buf[0] = 0;
553    return str;
554}
555
556/* -- see zlib.h -- */
557int Z_EXPORT PREFIX(gzdirect)(gzFile file) {
558    gz_state *state;
559
560    /* get internal structure */
561    if (file == NULL)
562        return 0;
563
564    state = (gz_state *)file;
565
566    /* if the state is not known, but we can find out, then do so (this is
567       mainly for right after a gzopen() or gzdopen()) */
568    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
569        (void)gz_look(state);
570
571    /* return 1 if transparent, 0 if processing a gzip stream */
572    return state->direct;
573}
574
575/* -- see zlib.h -- */
576int Z_EXPORT PREFIX(gzclose_r)(gzFile file) {
577    int ret, err;
578    gz_state *state;
579
580    /* get internal structure */
581    if (file == NULL)
582        return Z_STREAM_ERROR;
583
584    state = (gz_state *)file;
585
586    /* check that we're reading */
587    if (state->mode != GZ_READ)
588        return Z_STREAM_ERROR;
589
590    /* free memory and close file */
591    if (state->size) {
592        PREFIX(inflateEnd)(&(state->strm));
593        zng_free(state->out);
594        zng_free(state->in);
595    }
596    err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
597    gz_error(state, Z_OK, NULL);
598    free(state->path);
599    ret = close(state->fd);
600    zng_free(state);
601    return ret ? Z_ERRNO : err;
602}
603