1 use std::fs;
2 use std::io::{self, Read};
3 use std::path::Path;
4 
5 use super::{
6     central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile,
7     ZipFileData, ZipResult,
8 };
9 
10 use byteorder::{LittleEndian, ReadBytesExt};
11 
12 /// Stream decoder for zip.
13 #[derive(Debug)]
14 pub struct ZipStreamReader<R>(R);
15 
16 impl<R> ZipStreamReader<R> {
17     /// Create a new ZipStreamReader
new(reader: R) -> Self18     pub fn new(reader: R) -> Self {
19         Self(reader)
20     }
21 }
22 
23 impl<R: Read> ZipStreamReader<R> {
parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>>24     fn parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>> {
25         // Give archive_offset and central_header_start dummy value 0, since
26         // they are not used in the output.
27         let archive_offset = 0;
28         let central_header_start = 0;
29 
30         // Parse central header
31         let signature = self.0.read_u32::<LittleEndian>()?;
32         if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
33             Ok(None)
34         } else {
35             central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start)
36                 .map(ZipStreamFileMetadata)
37                 .map(Some)
38         }
39     }
40 
41     /// Iteraate over the stream and extract all file and their
42     /// metadata.
visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()>43     pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> {
44         while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? {
45             visitor.visit_file(&mut file)?;
46         }
47 
48         while let Some(metadata) = self.parse_central_directory()? {
49             visitor.visit_additional_metadata(&metadata)?;
50         }
51 
52         Ok(())
53     }
54 
55     /// Extract a Zip archive into a directory, overwriting files if they
56     /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
57     ///
58     /// Extraction is not atomic; If an error is encountered, some of the files
59     /// may be left on disk.
extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()>60     pub fn extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()> {
61         struct Extractor<'a>(&'a Path);
62         impl ZipStreamVisitor for Extractor<'_> {
63             fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
64                 let filepath = file
65                     .enclosed_name()
66                     .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
67 
68                 let outpath = self.0.join(filepath);
69 
70                 if file.name().ends_with('/') {
71                     fs::create_dir_all(&outpath)?;
72                 } else {
73                     if let Some(p) = outpath.parent() {
74                         fs::create_dir_all(p)?;
75                     }
76                     let mut outfile = fs::File::create(&outpath)?;
77                     io::copy(file, &mut outfile)?;
78                 }
79 
80                 Ok(())
81             }
82 
83             #[allow(unused)]
84             fn visit_additional_metadata(
85                 &mut self,
86                 metadata: &ZipStreamFileMetadata,
87             ) -> ZipResult<()> {
88                 #[cfg(unix)]
89                 {
90                     let filepath = metadata
91                         .enclosed_name()
92                         .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
93 
94                     let outpath = self.0.join(filepath);
95 
96                     use std::os::unix::fs::PermissionsExt;
97                     if let Some(mode) = metadata.unix_mode() {
98                         fs::set_permissions(outpath, fs::Permissions::from_mode(mode))?;
99                     }
100                 }
101 
102                 Ok(())
103             }
104         }
105 
106         self.visit(&mut Extractor(directory.as_ref()))
107     }
108 }
109 
110 /// Visitor for ZipStreamReader
111 pub trait ZipStreamVisitor {
112     ///  * `file` - contains the content of the file and most of the metadata,
113     ///    except:
114     ///     - `comment`: set to an empty string
115     ///     - `data_start`: set to 0
116     ///     - `external_attributes`: `unix_mode()`: will return None
visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>117     fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>;
118 
119     /// This function is guranteed to be called after all `visit_file`s.
120     ///
121     ///  * `metadata` - Provides missing metadata in `visit_file`.
visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>122     fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>;
123 }
124 
125 /// Additional metadata for the file.
126 #[derive(Debug)]
127 pub struct ZipStreamFileMetadata(ZipFileData);
128 
129 impl ZipStreamFileMetadata {
130     /// Get the name of the file
131     ///
132     /// # Warnings
133     ///
134     /// It is dangerous to use this name directly when extracting an archive.
135     /// It may contain an absolute path (`/etc/shadow`), or break out of the
136     /// current directory (`../runtime`). Carelessly writing to these paths
137     /// allows an attacker to craft a ZIP archive that will overwrite critical
138     /// files.
139     ///
140     /// You can use the [`ZipFile::enclosed_name`] method to validate the name
141     /// as a safe path.
name(&self) -> &str142     pub fn name(&self) -> &str {
143         &self.0.file_name
144     }
145 
146     /// Get the name of the file, in the raw (internal) byte representation.
147     ///
148     /// The encoding of this data is currently undefined.
name_raw(&self) -> &[u8]149     pub fn name_raw(&self) -> &[u8] {
150         &self.0.file_name_raw
151     }
152 
153     /// Rewrite the path, ignoring any path components with special meaning.
154     ///
155     /// - Absolute paths are made relative
156     /// - [`ParentDir`]s are ignored
157     /// - Truncates the filename at a NULL byte
158     ///
159     /// This is appropriate if you need to be able to extract *something* from
160     /// any archive, but will easily misrepresent trivial paths like
161     /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
162     /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
163     ///
164     /// [`ParentDir`]: `Component::ParentDir`
mangled_name(&self) -> ::std::path::PathBuf165     pub fn mangled_name(&self) -> ::std::path::PathBuf {
166         self.0.file_name_sanitized()
167     }
168 
169     /// Ensure the file path is safe to use as a [`Path`].
170     ///
171     /// - It can't contain NULL bytes
172     /// - It can't resolve to a path outside the current directory
173     ///   > `foo/../bar` is fine, `foo/../../bar` is not.
174     /// - It can't be an absolute path
175     ///
176     /// This will read well-formed ZIP files correctly, and is resistant
177     /// to path-based exploits. It is recommended over
178     /// [`ZipFile::mangled_name`].
enclosed_name(&self) -> Option<&Path>179     pub fn enclosed_name(&self) -> Option<&Path> {
180         self.0.enclosed_name()
181     }
182 
183     /// Returns whether the file is actually a directory
is_dir(&self) -> bool184     pub fn is_dir(&self) -> bool {
185         self.name()
186             .chars()
187             .rev()
188             .next()
189             .map_or(false, |c| c == '/' || c == '\\')
190     }
191 
192     /// Returns whether the file is a regular file
is_file(&self) -> bool193     pub fn is_file(&self) -> bool {
194         !self.is_dir()
195     }
196 
197     /// Get the comment of the file
comment(&self) -> &str198     pub fn comment(&self) -> &str {
199         &self.0.file_comment
200     }
201 
202     /// Get the starting offset of the data of the compressed file
data_start(&self) -> u64203     pub fn data_start(&self) -> u64 {
204         self.0.data_start.load()
205     }
206 
207     /// Get unix mode for the file
unix_mode(&self) -> Option<u32>208     pub fn unix_mode(&self) -> Option<u32> {
209         self.0.unix_mode()
210     }
211 }
212 
213 #[cfg(test)]
214 mod test {
215     use super::*;
216     use std::collections::BTreeSet;
217     use std::io;
218 
219     struct DummyVisitor;
220     impl ZipStreamVisitor for DummyVisitor {
visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()>221         fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
222             Ok(())
223         }
224 
visit_additional_metadata( &mut self, _metadata: &ZipStreamFileMetadata, ) -> ZipResult<()>225         fn visit_additional_metadata(
226             &mut self,
227             _metadata: &ZipStreamFileMetadata,
228         ) -> ZipResult<()> {
229             Ok(())
230         }
231     }
232 
233     #[derive(Default, Debug, Eq, PartialEq)]
234     struct CounterVisitor(u64, u64);
235     impl ZipStreamVisitor for CounterVisitor {
visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()>236         fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
237             self.0 += 1;
238             Ok(())
239         }
240 
visit_additional_metadata( &mut self, _metadata: &ZipStreamFileMetadata, ) -> ZipResult<()>241         fn visit_additional_metadata(
242             &mut self,
243             _metadata: &ZipStreamFileMetadata,
244         ) -> ZipResult<()> {
245             self.1 += 1;
246             Ok(())
247         }
248     }
249 
250     #[test]
invalid_offset()251     fn invalid_offset() {
252         ZipStreamReader::new(io::Cursor::new(include_bytes!(
253             "../../tests/data/invalid_offset.zip"
254         )))
255         .visit(&mut DummyVisitor)
256         .unwrap_err();
257     }
258 
259     #[test]
invalid_offset2()260     fn invalid_offset2() {
261         ZipStreamReader::new(io::Cursor::new(include_bytes!(
262             "../../tests/data/invalid_offset2.zip"
263         )))
264         .visit(&mut DummyVisitor)
265         .unwrap_err();
266     }
267 
268     #[test]
zip_read_streaming()269     fn zip_read_streaming() {
270         let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
271             "../../tests/data/mimetype.zip"
272         )));
273 
274         #[derive(Default)]
275         struct V {
276             filenames: BTreeSet<Box<str>>,
277         }
278         impl ZipStreamVisitor for V {
279             fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
280                 if file.is_file() {
281                     self.filenames.insert(file.name().into());
282                 }
283 
284                 Ok(())
285             }
286             fn visit_additional_metadata(
287                 &mut self,
288                 metadata: &ZipStreamFileMetadata,
289             ) -> ZipResult<()> {
290                 if metadata.is_file() {
291                     assert!(
292                         self.filenames.contains(metadata.name()),
293                         "{} is missing its file content",
294                         metadata.name()
295                     );
296                 }
297 
298                 Ok(())
299             }
300         }
301 
302         reader.visit(&mut V::default()).unwrap();
303     }
304 
305     #[test]
file_and_dir_predicates()306     fn file_and_dir_predicates() {
307         let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
308             "../../tests/data/files_and_dirs.zip"
309         )));
310 
311         #[derive(Default)]
312         struct V {
313             filenames: BTreeSet<Box<str>>,
314         }
315         impl ZipStreamVisitor for V {
316             fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
317                 let full_name = file.enclosed_name().unwrap();
318                 let file_name = full_name.file_name().unwrap().to_str().unwrap();
319                 assert!(
320                     (file_name.starts_with("dir") && file.is_dir())
321                         || (file_name.starts_with("file") && file.is_file())
322                 );
323 
324                 if file.is_file() {
325                     self.filenames.insert(file.name().into());
326                 }
327 
328                 Ok(())
329             }
330             fn visit_additional_metadata(
331                 &mut self,
332                 metadata: &ZipStreamFileMetadata,
333             ) -> ZipResult<()> {
334                 if metadata.is_file() {
335                     assert!(
336                         self.filenames.contains(metadata.name()),
337                         "{} is missing its file content",
338                         metadata.name()
339                     );
340                 }
341 
342                 Ok(())
343             }
344         }
345 
346         reader.visit(&mut V::default()).unwrap();
347     }
348 
349     /// test case to ensure we don't preemptively over allocate based on the
350     /// declared number of files in the CDE of an invalid zip when the number of
351     /// files declared is more than the alleged offset in the CDE
352     #[test]
invalid_cde_number_of_files_allocation_smaller_offset()353     fn invalid_cde_number_of_files_allocation_smaller_offset() {
354         ZipStreamReader::new(io::Cursor::new(include_bytes!(
355             "../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
356         )))
357         .visit(&mut DummyVisitor)
358         .unwrap_err();
359     }
360 
361     /// test case to ensure we don't preemptively over allocate based on the
362     /// declared number of files in the CDE of an invalid zip when the number of
363     /// files declared is less than the alleged offset in the CDE
364     #[test]
invalid_cde_number_of_files_allocation_greater_offset()365     fn invalid_cde_number_of_files_allocation_greater_offset() {
366         ZipStreamReader::new(io::Cursor::new(include_bytes!(
367             "../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
368         )))
369         .visit(&mut DummyVisitor)
370         .unwrap_err();
371     }
372 }
373