1 use std::ffi::CString;
2 use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
3 use std::time;
4 
5 use crate::bufreader::BufReader;
6 use crate::{Compression, Crc};
7 
8 pub static FHCRC: u8 = 1 << 1;
9 pub static FEXTRA: u8 = 1 << 2;
10 pub static FNAME: u8 = 1 << 3;
11 pub static FCOMMENT: u8 = 1 << 4;
12 pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7;
13 
14 pub mod bufread;
15 pub mod read;
16 pub mod write;
17 
18 // The maximum length of the header filename and comment fields. More than
19 // enough for these fields in reasonable use, but prevents possible attacks.
20 const MAX_HEADER_BUF: usize = 65535;
21 
22 /// A structure representing the header of a gzip stream.
23 ///
24 /// The header can contain metadata about the file that was compressed, if
25 /// present.
26 #[derive(PartialEq, Clone, Debug, Default)]
27 pub struct GzHeader {
28     extra: Option<Vec<u8>>,
29     filename: Option<Vec<u8>>,
30     comment: Option<Vec<u8>>,
31     operating_system: u8,
32     mtime: u32,
33 }
34 
35 impl GzHeader {
36     /// Returns the `filename` field of this gzip stream's header, if present.
filename(&self) -> Option<&[u8]>37     pub fn filename(&self) -> Option<&[u8]> {
38         self.filename.as_ref().map(|s| &s[..])
39     }
40 
41     /// Returns the `extra` field of this gzip stream's header, if present.
extra(&self) -> Option<&[u8]>42     pub fn extra(&self) -> Option<&[u8]> {
43         self.extra.as_ref().map(|s| &s[..])
44     }
45 
46     /// Returns the `comment` field of this gzip stream's header, if present.
comment(&self) -> Option<&[u8]>47     pub fn comment(&self) -> Option<&[u8]> {
48         self.comment.as_ref().map(|s| &s[..])
49     }
50 
51     /// Returns the `operating_system` field of this gzip stream's header.
52     ///
53     /// There are predefined values for various operating systems.
54     /// 255 means that the value is unknown.
operating_system(&self) -> u855     pub fn operating_system(&self) -> u8 {
56         self.operating_system
57     }
58 
59     /// This gives the most recent modification time of the original file being compressed.
60     ///
61     /// The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970.
62     /// (Note that this may cause problems for MS-DOS and other systems that use local
63     /// rather than Universal time.) If the compressed data did not come from a file,
64     /// `mtime` is set to the time at which compression started.
65     /// `mtime` = 0 means no time stamp is available.
66     ///
67     /// The usage of `mtime` is discouraged because of Year 2038 problem.
mtime(&self) -> u3268     pub fn mtime(&self) -> u32 {
69         self.mtime
70     }
71 
72     /// Returns the most recent modification time represented by a date-time type.
73     /// Returns `None` if the value of the underlying counter is 0,
74     /// indicating no time stamp is available.
75     ///
76     ///
77     /// The time is measured as seconds since 00:00:00 GMT, Jan. 1 1970.
78     /// See [`mtime`](#method.mtime) for more detail.
mtime_as_datetime(&self) -> Option<time::SystemTime>79     pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> {
80         if self.mtime == 0 {
81             None
82         } else {
83             let duration = time::Duration::new(u64::from(self.mtime), 0);
84             let datetime = time::UNIX_EPOCH + duration;
85             Some(datetime)
86         }
87     }
88 }
89 
90 #[derive(Debug)]
91 pub enum GzHeaderState {
92     Start(u8, [u8; 10]),
93     Xlen(Option<Box<Crc>>, u8, [u8; 2]),
94     Extra(Option<Box<Crc>>, u16),
95     Filename(Option<Box<Crc>>),
96     Comment(Option<Box<Crc>>),
97     Crc(Option<Box<Crc>>, u8, [u8; 2]),
98     Complete,
99 }
100 
101 impl Default for GzHeaderState {
default() -> Self102     fn default() -> Self {
103         Self::Complete
104     }
105 }
106 
107 #[derive(Debug, Default)]
108 pub struct GzHeaderParser {
109     state: GzHeaderState,
110     flags: u8,
111     header: GzHeader,
112 }
113 
114 impl GzHeaderParser {
new() -> Self115     fn new() -> Self {
116         GzHeaderParser {
117             state: GzHeaderState::Start(0, [0; 10]),
118             flags: 0,
119             header: GzHeader::default(),
120         }
121     }
122 
parse<'a, R: Read>(&mut self, r: &'a mut R) -> Result<()>123     fn parse<'a, R: Read>(&mut self, r: &'a mut R) -> Result<()> {
124         loop {
125             match &mut self.state {
126                 GzHeaderState::Start(count, buffer) => {
127                     while (*count as usize) < buffer.len() {
128                         *count += read_into(r, &mut buffer[*count as usize..])? as u8;
129                     }
130                     // Gzip identification bytes
131                     if buffer[0] != 0x1f || buffer[1] != 0x8b {
132                         return Err(bad_header());
133                     }
134                     // Gzip compression method (8 = deflate)
135                     if buffer[2] != 8 {
136                         return Err(bad_header());
137                     }
138                     self.flags = buffer[3];
139                     // RFC1952: "must give an error indication if any reserved bit is non-zero"
140                     if self.flags & FRESERVED != 0 {
141                         return Err(bad_header());
142                     }
143                     self.header.mtime = ((buffer[4] as u32) << 0)
144                         | ((buffer[5] as u32) << 8)
145                         | ((buffer[6] as u32) << 16)
146                         | ((buffer[7] as u32) << 24);
147                     let _xfl = buffer[8];
148                     self.header.operating_system = buffer[9];
149                     let crc = if self.flags & FHCRC != 0 {
150                         let mut crc = Box::new(Crc::new());
151                         crc.update(buffer);
152                         Some(crc)
153                     } else {
154                         None
155                     };
156                     self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
157                 }
158                 GzHeaderState::Xlen(crc, count, buffer) => {
159                     if self.flags & FEXTRA != 0 {
160                         while (*count as usize) < buffer.len() {
161                             *count += read_into(r, &mut buffer[*count as usize..])? as u8;
162                         }
163                         if let Some(crc) = crc {
164                             crc.update(buffer);
165                         }
166                         let xlen = parse_le_u16(&buffer);
167                         self.header.extra = Some(vec![0; xlen as usize]);
168                         self.state = GzHeaderState::Extra(crc.take(), 0);
169                     } else {
170                         self.state = GzHeaderState::Filename(crc.take());
171                     }
172                 }
173                 GzHeaderState::Extra(crc, count) => {
174                     debug_assert!(self.header.extra.is_some());
175                     let extra = self.header.extra.as_mut().unwrap();
176                     while (*count as usize) < extra.len() {
177                         *count += read_into(r, &mut extra[*count as usize..])? as u16;
178                     }
179                     if let Some(crc) = crc {
180                         crc.update(extra);
181                     }
182                     self.state = GzHeaderState::Filename(crc.take());
183                 }
184                 GzHeaderState::Filename(crc) => {
185                     if self.flags & FNAME != 0 {
186                         let filename = self.header.filename.get_or_insert_with(Vec::new);
187                         read_to_nul(r, filename)?;
188                         if let Some(crc) = crc {
189                             crc.update(filename);
190                             crc.update(b"\0");
191                         }
192                     }
193                     self.state = GzHeaderState::Comment(crc.take());
194                 }
195                 GzHeaderState::Comment(crc) => {
196                     if self.flags & FCOMMENT != 0 {
197                         let comment = self.header.comment.get_or_insert_with(Vec::new);
198                         read_to_nul(r, comment)?;
199                         if let Some(crc) = crc {
200                             crc.update(comment);
201                             crc.update(b"\0");
202                         }
203                     }
204                     self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
205                 }
206                 GzHeaderState::Crc(crc, count, buffer) => {
207                     if let Some(crc) = crc {
208                         debug_assert!(self.flags & FHCRC != 0);
209                         while (*count as usize) < buffer.len() {
210                             *count += read_into(r, &mut buffer[*count as usize..])? as u8;
211                         }
212                         let stored_crc = parse_le_u16(&buffer);
213                         let calced_crc = crc.sum() as u16;
214                         if stored_crc != calced_crc {
215                             return Err(corrupt());
216                         }
217                     }
218                     self.state = GzHeaderState::Complete;
219                 }
220                 GzHeaderState::Complete => {
221                     return Ok(());
222                 }
223             }
224         }
225     }
226 
header(&self) -> Option<&GzHeader>227     fn header(&self) -> Option<&GzHeader> {
228         match self.state {
229             GzHeaderState::Complete => Some(&self.header),
230             _ => None,
231         }
232     }
233 }
234 
235 impl From<GzHeaderParser> for GzHeader {
from(parser: GzHeaderParser) -> Self236     fn from(parser: GzHeaderParser) -> Self {
237         debug_assert!(matches!(parser.state, GzHeaderState::Complete));
238         parser.header
239     }
240 }
241 
242 // Attempt to fill the `buffer` from `r`. Return the number of bytes read.
243 // Return an error if EOF is read before the buffer is full.  This differs
244 // from `read` in that Ok(0) means that more data may be available.
read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize>245 fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
246     debug_assert!(!buffer.is_empty());
247     match r.read(buffer) {
248         Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
249         Ok(n) => Ok(n),
250         Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0),
251         Err(e) => Err(e),
252     }
253 }
254 
255 // Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`.
read_to_nul<R: Read>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()>256 fn read_to_nul<R: Read>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
257     let mut bytes = r.bytes();
258     loop {
259         match bytes.next().transpose()? {
260             Some(byte) if byte == 0 => {
261                 return Ok(());
262             }
263             Some(_) if buffer.len() == MAX_HEADER_BUF => {
264                 return Err(Error::new(
265                     ErrorKind::InvalidInput,
266                     "gzip header field too long",
267                 ));
268             }
269             Some(byte) => {
270                 buffer.push(byte);
271             }
272             None => {
273                 return Err(ErrorKind::UnexpectedEof.into());
274             }
275         }
276     }
277 }
278 
parse_le_u16(buffer: &[u8; 2]) -> u16279 fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
280     (buffer[0] as u16) | ((buffer[1] as u16) << 8)
281 }
282 
bad_header() -> Error283 fn bad_header() -> Error {
284     Error::new(ErrorKind::InvalidInput, "invalid gzip header")
285 }
286 
corrupt() -> Error287 fn corrupt() -> Error {
288     Error::new(
289         ErrorKind::InvalidInput,
290         "corrupt gzip stream does not have a matching checksum",
291     )
292 }
293 
294 /// A builder structure to create a new gzip Encoder.
295 ///
296 /// This structure controls header configuration options such as the filename.
297 ///
298 /// # Examples
299 ///
300 /// ```
301 /// use std::io::prelude::*;
302 /// # use std::io;
303 /// use std::fs::File;
304 /// use flate2::GzBuilder;
305 /// use flate2::Compression;
306 ///
307 /// // GzBuilder opens a file and writes a sample string using GzBuilder pattern
308 ///
309 /// # fn sample_builder() -> Result<(), io::Error> {
310 /// let f = File::create("examples/hello_world.gz")?;
311 /// let mut gz = GzBuilder::new()
312 ///                 .filename("hello_world.txt")
313 ///                 .comment("test file, please delete")
314 ///                 .write(f, Compression::default());
315 /// gz.write_all(b"hello world")?;
316 /// gz.finish()?;
317 /// # Ok(())
318 /// # }
319 /// ```
320 #[derive(Debug)]
321 pub struct GzBuilder {
322     extra: Option<Vec<u8>>,
323     filename: Option<CString>,
324     comment: Option<CString>,
325     operating_system: Option<u8>,
326     mtime: u32,
327 }
328 
329 impl Default for GzBuilder {
default() -> Self330     fn default() -> Self {
331         Self::new()
332     }
333 }
334 
335 impl GzBuilder {
336     /// Create a new blank builder with no header by default.
new() -> GzBuilder337     pub fn new() -> GzBuilder {
338         GzBuilder {
339             extra: None,
340             filename: None,
341             comment: None,
342             operating_system: None,
343             mtime: 0,
344         }
345     }
346 
347     /// Configure the `mtime` field in the gzip header.
mtime(mut self, mtime: u32) -> GzBuilder348     pub fn mtime(mut self, mtime: u32) -> GzBuilder {
349         self.mtime = mtime;
350         self
351     }
352 
353     /// Configure the `operating_system` field in the gzip header.
operating_system(mut self, os: u8) -> GzBuilder354     pub fn operating_system(mut self, os: u8) -> GzBuilder {
355         self.operating_system = Some(os);
356         self
357     }
358 
359     /// Configure the `extra` field in the gzip header.
extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder360     pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder {
361         self.extra = Some(extra.into());
362         self
363     }
364 
365     /// Configure the `filename` field in the gzip header.
366     ///
367     /// # Panics
368     ///
369     /// Panics if the `filename` slice contains a zero.
filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder370     pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder {
371         self.filename = Some(CString::new(filename.into()).unwrap());
372         self
373     }
374 
375     /// Configure the `comment` field in the gzip header.
376     ///
377     /// # Panics
378     ///
379     /// Panics if the `comment` slice contains a zero.
comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder380     pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder {
381         self.comment = Some(CString::new(comment.into()).unwrap());
382         self
383     }
384 
385     /// Consume this builder, creating a writer encoder in the process.
386     ///
387     /// The data written to the returned encoder will be compressed and then
388     /// written out to the supplied parameter `w`.
write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W>389     pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> {
390         write::gz_encoder(self.into_header(lvl), w, lvl)
391     }
392 
393     /// Consume this builder, creating a reader encoder in the process.
394     ///
395     /// Data read from the returned encoder will be the compressed version of
396     /// the data read from the given reader.
read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R>397     pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> {
398         read::gz_encoder(self.buf_read(BufReader::new(r), lvl))
399     }
400 
401     /// Consume this builder, creating a reader encoder in the process.
402     ///
403     /// Data read from the returned encoder will be the compressed version of
404     /// the data read from the given reader.
buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R> where R: BufRead,405     pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R>
406     where
407         R: BufRead,
408     {
409         bufread::gz_encoder(self.into_header(lvl), r, lvl)
410     }
411 
into_header(self, lvl: Compression) -> Vec<u8>412     fn into_header(self, lvl: Compression) -> Vec<u8> {
413         let GzBuilder {
414             extra,
415             filename,
416             comment,
417             operating_system,
418             mtime,
419         } = self;
420         let mut flg = 0;
421         let mut header = vec![0u8; 10];
422         if let Some(v) = extra {
423             flg |= FEXTRA;
424             header.push((v.len() >> 0) as u8);
425             header.push((v.len() >> 8) as u8);
426             header.extend(v);
427         }
428         if let Some(filename) = filename {
429             flg |= FNAME;
430             header.extend(filename.as_bytes_with_nul().iter().copied());
431         }
432         if let Some(comment) = comment {
433             flg |= FCOMMENT;
434             header.extend(comment.as_bytes_with_nul().iter().copied());
435         }
436         header[0] = 0x1f;
437         header[1] = 0x8b;
438         header[2] = 8;
439         header[3] = flg;
440         header[4] = (mtime >> 0) as u8;
441         header[5] = (mtime >> 8) as u8;
442         header[6] = (mtime >> 16) as u8;
443         header[7] = (mtime >> 24) as u8;
444         header[8] = if lvl.0 >= Compression::best().0 {
445             2
446         } else if lvl.0 <= Compression::fast().0 {
447             4
448         } else {
449             0
450         };
451 
452         // Typically this byte indicates what OS the gz stream was created on,
453         // but in an effort to have cross-platform reproducible streams just
454         // default this value to 255. I'm not sure that if we "correctly" set
455         // this it'd do anything anyway...
456         header[9] = operating_system.unwrap_or(255);
457         header
458     }
459 }
460 
461 #[cfg(test)]
462 mod tests {
463     use std::io::prelude::*;
464 
465     use super::{read, write, GzBuilder, GzHeaderParser};
466     use crate::{Compression, GzHeader};
467     use rand::{thread_rng, Rng};
468 
469     #[test]
roundtrip()470     fn roundtrip() {
471         let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
472         e.write_all(b"foo bar baz").unwrap();
473         let inner = e.finish().unwrap();
474         let mut d = read::GzDecoder::new(&inner[..]);
475         let mut s = String::new();
476         d.read_to_string(&mut s).unwrap();
477         assert_eq!(s, "foo bar baz");
478     }
479 
480     #[test]
roundtrip_zero()481     fn roundtrip_zero() {
482         let e = write::GzEncoder::new(Vec::new(), Compression::default());
483         let inner = e.finish().unwrap();
484         let mut d = read::GzDecoder::new(&inner[..]);
485         let mut s = String::new();
486         d.read_to_string(&mut s).unwrap();
487         assert_eq!(s, "");
488     }
489 
490     #[test]
roundtrip_big()491     fn roundtrip_big() {
492         let mut real = Vec::new();
493         let mut w = write::GzEncoder::new(Vec::new(), Compression::default());
494         let v = crate::random_bytes().take(1024).collect::<Vec<_>>();
495         for _ in 0..200 {
496             let to_write = &v[..thread_rng().gen_range(0..v.len())];
497             real.extend(to_write.iter().copied());
498             w.write_all(to_write).unwrap();
499         }
500         let result = w.finish().unwrap();
501         let mut r = read::GzDecoder::new(&result[..]);
502         let mut v = Vec::new();
503         r.read_to_end(&mut v).unwrap();
504         assert_eq!(v, real);
505     }
506 
507     #[test]
roundtrip_big2()508     fn roundtrip_big2() {
509         let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>();
510         let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default()));
511         let mut res = Vec::new();
512         r.read_to_end(&mut res).unwrap();
513         assert_eq!(res, v);
514     }
515 
516     // A Rust implementation of CRC that closely matches the C code in RFC1952.
517     // Only use this to create CRCs for tests.
518     struct Rfc1952Crc {
519         /* Table of CRCs of all 8-bit messages. */
520         crc_table: [u32; 256],
521     }
522 
523     impl Rfc1952Crc {
new() -> Self524         fn new() -> Self {
525             let mut crc = Rfc1952Crc {
526                 crc_table: [0; 256],
527             };
528             /* Make the table for a fast CRC. */
529             for n in 0usize..256 {
530                 let mut c = n as u32;
531                 for _k in 0..8 {
532                     if c & 1 != 0 {
533                         c = 0xedb88320 ^ (c >> 1);
534                     } else {
535                         c = c >> 1;
536                     }
537                 }
538                 crc.crc_table[n] = c;
539             }
540             crc
541         }
542 
543         /*
544          Update a running crc with the bytes buf and return
545          the updated crc. The crc should be initialized to zero. Pre- and
546          post-conditioning (one's complement) is performed within this
547          function so it shouldn't be done by the caller.
548         */
update_crc(&self, crc: u32, buf: &[u8]) -> u32549         fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
550             let mut c = crc ^ 0xffffffff;
551 
552             for b in buf {
553                 c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
554             }
555             c ^ 0xffffffff
556         }
557 
558         /* Return the CRC of the bytes buf. */
crc(&self, buf: &[u8]) -> u32559         fn crc(&self, buf: &[u8]) -> u32 {
560             self.update_crc(0, buf)
561         }
562     }
563 
564     #[test]
roundtrip_header()565     fn roundtrip_header() {
566         let mut header = GzBuilder::new()
567             .mtime(1234)
568             .operating_system(57)
569             .filename("filename")
570             .comment("comment")
571             .into_header(Compression::fast());
572 
573         // Add a CRC to the header
574         header[3] = header[3] ^ super::FHCRC;
575         let rfc1952_crc = Rfc1952Crc::new();
576         let crc32 = rfc1952_crc.crc(&header);
577         let crc16 = crc32 as u16;
578         header.extend(&crc16.to_le_bytes());
579 
580         let mut parser = GzHeaderParser::new();
581         parser.parse(&mut header.as_slice()).unwrap();
582         let actual = parser.header().unwrap();
583         assert_eq!(
584             actual,
585             &GzHeader {
586                 extra: None,
587                 filename: Some("filename".as_bytes().to_vec()),
588                 comment: Some("comment".as_bytes().to_vec()),
589                 operating_system: 57,
590                 mtime: 1234
591             }
592         )
593     }
594 
595     #[test]
fields()596     fn fields() {
597         let r = vec![0, 2, 4, 6];
598         let e = GzBuilder::new()
599             .filename("foo.rs")
600             .comment("bar")
601             .extra(vec![0, 1, 2, 3])
602             .read(&r[..], Compression::default());
603         let mut d = read::GzDecoder::new(e);
604         assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..]));
605         assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..]));
606         assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..]));
607         let mut res = Vec::new();
608         d.read_to_end(&mut res).unwrap();
609         assert_eq!(res, vec![0, 2, 4, 6]);
610     }
611 
612     #[test]
keep_reading_after_end()613     fn keep_reading_after_end() {
614         let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
615         e.write_all(b"foo bar baz").unwrap();
616         let inner = e.finish().unwrap();
617         let mut d = read::GzDecoder::new(&inner[..]);
618         let mut s = String::new();
619         d.read_to_string(&mut s).unwrap();
620         assert_eq!(s, "foo bar baz");
621         d.read_to_string(&mut s).unwrap();
622         assert_eq!(s, "foo bar baz");
623     }
624 
625     #[test]
qc_reader()626     fn qc_reader() {
627         ::quickcheck::quickcheck(test as fn(_) -> _);
628 
629         fn test(v: Vec<u8>) -> bool {
630             let r = read::GzEncoder::new(&v[..], Compression::default());
631             let mut r = read::GzDecoder::new(r);
632             let mut v2 = Vec::new();
633             r.read_to_end(&mut v2).unwrap();
634             v == v2
635         }
636     }
637 
638     #[test]
flush_after_write()639     fn flush_after_write() {
640         let mut f = write::GzEncoder::new(Vec::new(), Compression::default());
641         write!(f, "Hello world").unwrap();
642         f.flush().unwrap();
643     }
644 }
645