1/* gzread.c -- zlib functions for reading gzip files 2 * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler 3 * For conditions of distribution and use, see copyright notice in zlib.h 4 */ 5 6#include "zbuild.h" 7#include "zutil_p.h" 8#include "gzguts.h" 9 10/* Local functions */ 11static int gz_load(gz_state *, unsigned char *, unsigned, unsigned *); 12static int gz_avail(gz_state *); 13static int gz_look(gz_state *); 14static int gz_decomp(gz_state *); 15static int gz_fetch(gz_state *); 16static int gz_skip(gz_state *, z_off64_t); 17static size_t gz_read(gz_state *, void *, size_t); 18 19/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from 20 state->fd, and update state->eof, state->err, and state->msg as appropriate. 21 This function needs to loop on read(), since read() is not guaranteed to 22 read the number of bytes requested, depending on the type of descriptor. */ 23static int gz_load(gz_state *state, unsigned char *buf, unsigned len, unsigned *have) { 24 ssize_t ret; 25 26 *have = 0; 27 do { 28 ret = read(state->fd, buf + *have, len - *have); 29 if (ret <= 0) 30 break; 31 *have += (unsigned)ret; 32 } while (*have < len); 33 if (ret < 0) { 34 gz_error(state, Z_ERRNO, zstrerror()); 35 return -1; 36 } 37 if (ret == 0) 38 state->eof = 1; 39 return 0; 40} 41 42/* Load up input buffer and set eof flag if last data loaded -- return -1 on 43 error, 0 otherwise. Note that the eof flag is set when the end of the input 44 file is reached, even though there may be unused data in the buffer. Once 45 that data has been used, no more attempts will be made to read the file. 46 If strm->avail_in != 0, then the current data is moved to the beginning of 47 the input buffer, and then the remainder of the buffer is loaded with the 48 available data from the input file. */ 49static int gz_avail(gz_state *state) { 50 unsigned got; 51 PREFIX3(stream) *strm = &(state->strm); 52 53 if (state->err != Z_OK && state->err != Z_BUF_ERROR) 54 return -1; 55 if (state->eof == 0) { 56 if (strm->avail_in) { /* copy what's there to the start */ 57 unsigned char *p = state->in; 58 unsigned const char *q = strm->next_in; 59 unsigned n = strm->avail_in; 60 do { 61 *p++ = *q++; 62 } while (--n); 63 } 64 if (gz_load(state, state->in + strm->avail_in, state->size - strm->avail_in, &got) == -1) 65 return -1; 66 strm->avail_in += got; 67 strm->next_in = state->in; 68 } 69 return 0; 70} 71 72/* Look for gzip header, set up for inflate or copy. state->x.have must be 0. 73 If this is the first time in, allocate required memory. state->how will be 74 left unchanged if there is no more input data available, will be set to COPY 75 if there is no gzip header and direct copying will be performed, or it will 76 be set to GZIP for decompression. If direct copying, then leftover input 77 data from the input buffer will be copied to the output buffer. In that 78 case, all further file reads will be directly to either the output buffer or 79 a user buffer. If decompressing, the inflate state will be initialized. 80 gz_look() will return 0 on success or -1 on failure. */ 81static int gz_look(gz_state *state) { 82 PREFIX3(stream) *strm = &(state->strm); 83 84 /* allocate read buffers and inflate memory */ 85 if (state->size == 0) { 86 /* allocate buffers */ 87 state->in = (unsigned char *)zng_alloc(state->want); 88 state->out = (unsigned char *)zng_alloc(state->want << 1); 89 if (state->in == NULL || state->out == NULL) { 90 zng_free(state->out); 91 zng_free(state->in); 92 gz_error(state, Z_MEM_ERROR, "out of memory"); 93 return -1; 94 } 95 state->size = state->want; 96 97 /* allocate inflate memory */ 98 state->strm.zalloc = NULL; 99 state->strm.zfree = NULL; 100 state->strm.opaque = NULL; 101 state->strm.avail_in = 0; 102 state->strm.next_in = NULL; 103 if (PREFIX(inflateInit2)(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ 104 zng_free(state->out); 105 zng_free(state->in); 106 state->size = 0; 107 gz_error(state, Z_MEM_ERROR, "out of memory"); 108 return -1; 109 } 110 } 111 112 /* get at least the magic bytes in the input buffer */ 113 if (strm->avail_in < 2) { 114 if (gz_avail(state) == -1) 115 return -1; 116 if (strm->avail_in == 0) 117 return 0; 118 } 119 120 /* look for gzip magic bytes -- if there, do gzip decoding (note: there is 121 a logical dilemma here when considering the case of a partially written 122 gzip file, to wit, if a single 31 byte is written, then we cannot tell 123 whether this is a single-byte file, or just a partially written gzip 124 file -- for here we assume that if a gzip file is being written, then 125 the header will be written in a single operation, so that reading a 126 single byte is sufficient indication that it is not a gzip file) */ 127 if (strm->avail_in > 1 && 128 strm->next_in[0] == 31 && strm->next_in[1] == 139) { 129 PREFIX(inflateReset)(strm); 130 state->how = GZIP; 131 state->direct = 0; 132 return 0; 133 } 134 135 /* no gzip header -- if we were decoding gzip before, then this is trailing 136 garbage. Ignore the trailing garbage and finish. */ 137 if (state->direct == 0) { 138 strm->avail_in = 0; 139 state->eof = 1; 140 state->x.have = 0; 141 return 0; 142 } 143 144 /* doing raw i/o, copy any leftover input to output -- this assumes that 145 the output buffer is larger than the input buffer, which also assures 146 space for gzungetc() */ 147 state->x.next = state->out; 148 if (strm->avail_in) { 149 memcpy(state->x.next, strm->next_in, strm->avail_in); 150 state->x.have = strm->avail_in; 151 strm->avail_in = 0; 152 } 153 state->how = COPY; 154 state->direct = 1; 155 return 0; 156} 157 158/* Decompress from input to the provided next_out and avail_out in the state. 159 On return, state->x.have and state->x.next point to the just decompressed 160 data. If the gzip stream completes, state->how is reset to LOOK to look for 161 the next gzip stream or raw data, once state->x.have is depleted. Returns 0 162 on success, -1 on failure. */ 163static int gz_decomp(gz_state *state) { 164 int ret = Z_OK; 165 unsigned had; 166 PREFIX3(stream) *strm = &(state->strm); 167 168 /* fill output buffer up to end of deflate stream */ 169 had = strm->avail_out; 170 do { 171 /* get more input for inflate() */ 172 if (strm->avail_in == 0 && gz_avail(state) == -1) 173 return -1; 174 if (strm->avail_in == 0) { 175 gz_error(state, Z_BUF_ERROR, "unexpected end of file"); 176 break; 177 } 178 179 /* decompress and handle errors */ 180 ret = PREFIX(inflate)(strm, Z_NO_FLUSH); 181 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { 182 gz_error(state, Z_STREAM_ERROR, "internal error: inflate stream corrupt"); 183 return -1; 184 } 185 if (ret == Z_MEM_ERROR) { 186 gz_error(state, Z_MEM_ERROR, "out of memory"); 187 return -1; 188 } 189 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ 190 gz_error(state, Z_DATA_ERROR, strm->msg == NULL ? "compressed data error" : strm->msg); 191 return -1; 192 } 193 } while (strm->avail_out && ret != Z_STREAM_END); 194 195 /* update available output */ 196 state->x.have = had - strm->avail_out; 197 state->x.next = strm->next_out - state->x.have; 198 199 /* if the gzip stream completed successfully, look for another */ 200 if (ret == Z_STREAM_END) 201 state->how = LOOK; 202 203 /* good decompression */ 204 return 0; 205} 206 207/* Fetch data and put it in the output buffer. Assumes state->x.have is 0. 208 Data is either copied from the input file or decompressed from the input 209 file depending on state->how. If state->how is LOOK, then a gzip header is 210 looked for to determine whether to copy or decompress. Returns -1 on error, 211 otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the 212 end of the input file has been reached and all data has been processed. */ 213static int gz_fetch(gz_state *state) { 214 PREFIX3(stream) *strm = &(state->strm); 215 216 do { 217 switch (state->how) { 218 case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ 219 if (gz_look(state) == -1) 220 return -1; 221 if (state->how == LOOK) 222 return 0; 223 break; 224 case COPY: /* -> COPY */ 225 if (gz_load(state, state->out, state->size << 1, &(state->x.have)) 226 == -1) 227 return -1; 228 state->x.next = state->out; 229 return 0; 230 case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ 231 strm->avail_out = state->size << 1; 232 strm->next_out = state->out; 233 if (gz_decomp(state) == -1) 234 return -1; 235 } 236 } while (state->x.have == 0 && (!state->eof || strm->avail_in)); 237 return 0; 238} 239 240/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ 241static int gz_skip(gz_state *state, z_off64_t len) { 242 unsigned n; 243 244 /* skip over len bytes or reach end-of-file, whichever comes first */ 245 while (len) 246 /* skip over whatever is in output buffer */ 247 if (state->x.have) { 248 n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? 249 (unsigned)len : state->x.have; 250 state->x.have -= n; 251 state->x.next += n; 252 state->x.pos += n; 253 len -= n; 254 } else if (state->eof && state->strm.avail_in == 0) { 255 /* output buffer empty -- return if we're at the end of the input */ 256 break; 257 } else { 258 /* need more data to skip -- load up output buffer */ 259 /* get more output, looking for header if required */ 260 if (gz_fetch(state) == -1) 261 return -1; 262 } 263 return 0; 264} 265 266/* Read len bytes into buf from file, or less than len up to the end of the 267 input. Return the number of bytes read. If zero is returned, either the 268 end of file was reached, or there was an error. state->err must be 269 consulted in that case to determine which. */ 270static size_t gz_read(gz_state *state, void *buf, size_t len) { 271 size_t got; 272 unsigned n; 273 274 /* if len is zero, avoid unnecessary operations */ 275 if (len == 0) 276 return 0; 277 278 /* process a skip request */ 279 if (state->seek) { 280 state->seek = 0; 281 if (gz_skip(state, state->skip) == -1) 282 return 0; 283 } 284 285 /* get len bytes to buf, or less than len if at the end */ 286 got = 0; 287 do { 288 /* set n to the maximum amount of len that fits in an unsigned int */ 289 n = (unsigned)-1; 290 if (n > len) 291 n = (unsigned)len; 292 293 /* first just try copying data from the output buffer */ 294 if (state->x.have) { 295 if (state->x.have < n) 296 n = state->x.have; 297 memcpy(buf, state->x.next, n); 298 state->x.next += n; 299 state->x.have -= n; 300 } 301 302 /* output buffer empty -- return if we're at the end of the input */ 303 else if (state->eof && state->strm.avail_in == 0) { 304 state->past = 1; /* tried to read past end */ 305 break; 306 } 307 308 /* need output data -- for small len or new stream load up our output 309 buffer */ 310 else if (state->how == LOOK || n < (state->size << 1)) { 311 /* get more output, looking for header if required */ 312 if (gz_fetch(state) == -1) 313 return 0; 314 continue; /* no progress yet -- go back to copy above */ 315 /* the copy above assures that we will leave with space in the 316 output buffer, allowing at least one gzungetc() to succeed */ 317 } 318 319 /* large len -- read directly into user buffer */ 320 else if (state->how == COPY) { /* read directly */ 321 if (gz_load(state, (unsigned char *)buf, n, &n) == -1) 322 return 0; 323 } 324 325 /* large len -- decompress directly into user buffer */ 326 else { /* state->how == GZIP */ 327 state->strm.avail_out = n; 328 state->strm.next_out = (unsigned char *)buf; 329 if (gz_decomp(state) == -1) 330 return 0; 331 n = state->x.have; 332 state->x.have = 0; 333 } 334 335 /* update progress */ 336 len -= n; 337 buf = (char *)buf + n; 338 got += n; 339 state->x.pos += n; 340 } while (len); 341 342 /* return number of bytes read into user buffer */ 343 return got; 344} 345 346/* -- see zlib.h -- */ 347int Z_EXPORT PREFIX(gzread)(gzFile file, void *buf, unsigned len) { 348 gz_state *state; 349 350 /* get internal structure */ 351 if (file == NULL) 352 return -1; 353 state = (gz_state *)file; 354 355 /* check that we're reading and that there's no (serious) error */ 356 if (state->mode != GZ_READ || 357 (state->err != Z_OK && state->err != Z_BUF_ERROR)) 358 return -1; 359 360 /* since an int is returned, make sure len fits in one, otherwise return 361 with an error (this avoids a flaw in the interface) */ 362 if ((int)len < 0) { 363 gz_error(state, Z_STREAM_ERROR, "request does not fit in an int"); 364 return -1; 365 } 366 367 /* read len or fewer bytes to buf */ 368 len = (unsigned)gz_read(state, buf, len); 369 370 /* check for an error */ 371 if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR) 372 return -1; 373 374 /* return the number of bytes read (this is assured to fit in an int) */ 375 return (int)len; 376} 377 378/* -- see zlib.h -- */ 379size_t Z_EXPORT PREFIX(gzfread)(void *buf, size_t size, size_t nitems, gzFile file) { 380 size_t len; 381 gz_state *state; 382 383 /* Exit early if size is zero, also prevents potential division by zero */ 384 if (size == 0) 385 return 0; 386 387 /* get internal structure */ 388 if (file == NULL) 389 return 0; 390 state = (gz_state *)file; 391 392 /* check that we're reading and that there's no (serious) error */ 393 if (state->mode != GZ_READ || 394 (state->err != Z_OK && state->err != Z_BUF_ERROR)) 395 return 0; 396 397 /* compute bytes to read -- error on overflow */ 398 if (size && SIZE_MAX / size < nitems) { 399 gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); 400 return 0; 401 } 402 len = nitems * size; 403 404 /* read len or fewer bytes to buf, return the number of full items read */ 405 return len ? gz_read(state, buf, len) / size : 0; 406} 407 408/* -- see zlib.h -- */ 409#undef @ZLIB_SYMBOL_PREFIX@gzgetc 410#undef @ZLIB_SYMBOL_PREFIX@zng_gzgetc 411int Z_EXPORT PREFIX(gzgetc)(gzFile file) { 412 unsigned char buf[1]; 413 gz_state *state; 414 415 /* get internal structure */ 416 if (file == NULL) 417 return -1; 418 state = (gz_state *)file; 419 420 /* check that we're reading and that there's no (serious) error */ 421 if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR)) 422 return -1; 423 424 /* try output buffer (no need to check for skip request) */ 425 if (state->x.have) { 426 state->x.have--; 427 state->x.pos++; 428 return *(state->x.next)++; 429 } 430 431 /* nothing there -- try gz_read() */ 432 return gz_read(state, buf, 1) < 1 ? -1 : buf[0]; 433} 434 435int Z_EXPORT PREFIX(gzgetc_)(gzFile file) { 436 return PREFIX(gzgetc)(file); 437} 438 439/* -- see zlib.h -- */ 440int Z_EXPORT PREFIX(gzungetc)(int c, gzFile file) { 441 gz_state *state; 442 443 /* get internal structure */ 444 if (file == NULL) 445 return -1; 446 state = (gz_state *)file; 447 448 /* check that we're reading and that there's no (serious) error */ 449 if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR)) 450 return -1; 451 452 /* process a skip request */ 453 if (state->seek) { 454 state->seek = 0; 455 if (gz_skip(state, state->skip) == -1) 456 return -1; 457 } 458 459 /* can't push EOF */ 460 if (c < 0) 461 return -1; 462 463 /* if output buffer empty, put byte at end (allows more pushing) */ 464 if (state->x.have == 0) { 465 state->x.have = 1; 466 state->x.next = state->out + (state->size << 1) - 1; 467 state->x.next[0] = (unsigned char)c; 468 state->x.pos--; 469 state->past = 0; 470 return c; 471 } 472 473 /* if no room, give up (must have already done a gzungetc()) */ 474 if (state->x.have == (state->size << 1)) { 475 gz_error(state, Z_DATA_ERROR, "out of room to push characters"); 476 return -1; 477 } 478 479 /* slide output data if needed and insert byte before existing data */ 480 if (state->x.next == state->out) { 481 unsigned char *src = state->out + state->x.have; 482 unsigned char *dest = state->out + (state->size << 1); 483 while (src > state->out) 484 *--dest = *--src; 485 state->x.next = dest; 486 } 487 state->x.have++; 488 state->x.next--; 489 state->x.next[0] = (unsigned char)c; 490 state->x.pos--; 491 state->past = 0; 492 return c; 493} 494 495/* -- see zlib.h -- */ 496char * Z_EXPORT PREFIX(gzgets)(gzFile file, char *buf, int len) { 497 unsigned left, n; 498 char *str; 499 unsigned char *eol; 500 gz_state *state; 501 502 /* check parameters and get internal structure */ 503 if (file == NULL || buf == NULL || len < 1) 504 return NULL; 505 state = (gz_state *)file; 506 507 /* check that we're reading and that there's no (serious) error */ 508 if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR)) 509 return NULL; 510 511 /* process a skip request */ 512 if (state->seek) { 513 state->seek = 0; 514 if (gz_skip(state, state->skip) == -1) 515 return NULL; 516 } 517 518 /* copy output bytes up to new line or len - 1, whichever comes first -- 519 append a terminating zero to the string (we don't check for a zero in 520 the contents, let the user worry about that) */ 521 str = buf; 522 left = (unsigned)len - 1; 523 if (left) { 524 do { 525 /* assure that something is in the output buffer */ 526 if (state->x.have == 0 && gz_fetch(state) == -1) 527 return NULL; /* error */ 528 if (state->x.have == 0) { /* end of file */ 529 state->past = 1; /* read past end */ 530 break; /* return what we have */ 531 } 532 533 /* look for end-of-line in current output buffer */ 534 n = state->x.have > left ? left : state->x.have; 535 eol = (unsigned char *)memchr(state->x.next, '\n', n); 536 if (eol != NULL) 537 n = (unsigned)(eol - state->x.next) + 1; 538 539 /* copy through end-of-line, or remainder if not found */ 540 memcpy(buf, state->x.next, n); 541 state->x.have -= n; 542 state->x.next += n; 543 state->x.pos += n; 544 left -= n; 545 buf += n; 546 } while (left && eol == NULL); 547 } 548 549 /* return terminated string, or if nothing, end of file */ 550 if (buf == str) 551 return NULL; 552 buf[0] = 0; 553 return str; 554} 555 556/* -- see zlib.h -- */ 557int Z_EXPORT PREFIX(gzdirect)(gzFile file) { 558 gz_state *state; 559 560 /* get internal structure */ 561 if (file == NULL) 562 return 0; 563 564 state = (gz_state *)file; 565 566 /* if the state is not known, but we can find out, then do so (this is 567 mainly for right after a gzopen() or gzdopen()) */ 568 if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) 569 (void)gz_look(state); 570 571 /* return 1 if transparent, 0 if processing a gzip stream */ 572 return state->direct; 573} 574 575/* -- see zlib.h -- */ 576int Z_EXPORT PREFIX(gzclose_r)(gzFile file) { 577 int ret, err; 578 gz_state *state; 579 580 /* get internal structure */ 581 if (file == NULL) 582 return Z_STREAM_ERROR; 583 584 state = (gz_state *)file; 585 586 /* check that we're reading */ 587 if (state->mode != GZ_READ) 588 return Z_STREAM_ERROR; 589 590 /* free memory and close file */ 591 if (state->size) { 592 PREFIX(inflateEnd)(&(state->strm)); 593 zng_free(state->out); 594 zng_free(state->in); 595 } 596 err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; 597 gz_error(state, Z_OK, NULL); 598 free(state->path); 599 ret = close(state->fd); 600 zng_free(state); 601 return ret ? Z_ERRNO : err; 602} 603