1 use std::fs; 2 use std::io::{self, Read}; 3 use std::path::Path; 4 5 use super::{ 6 central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile, 7 ZipFileData, ZipResult, 8 }; 9 10 use byteorder::{LittleEndian, ReadBytesExt}; 11 12 /// Stream decoder for zip. 13 #[derive(Debug)] 14 pub struct ZipStreamReader<R>(R); 15 16 impl<R> ZipStreamReader<R> { 17 /// Create a new ZipStreamReader new(reader: R) -> Self18 pub fn new(reader: R) -> Self { 19 Self(reader) 20 } 21 } 22 23 impl<R: Read> ZipStreamReader<R> { parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>>24 fn parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>> { 25 // Give archive_offset and central_header_start dummy value 0, since 26 // they are not used in the output. 27 let archive_offset = 0; 28 let central_header_start = 0; 29 30 // Parse central header 31 let signature = self.0.read_u32::<LittleEndian>()?; 32 if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { 33 Ok(None) 34 } else { 35 central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start) 36 .map(ZipStreamFileMetadata) 37 .map(Some) 38 } 39 } 40 41 /// Iteraate over the stream and extract all file and their 42 /// metadata. visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()>43 pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> { 44 while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? { 45 visitor.visit_file(&mut file)?; 46 } 47 48 while let Some(metadata) = self.parse_central_directory()? { 49 visitor.visit_additional_metadata(&metadata)?; 50 } 51 52 Ok(()) 53 } 54 55 /// Extract a Zip archive into a directory, overwriting files if they 56 /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. 57 /// 58 /// Extraction is not atomic; If an error is encountered, some of the files 59 /// may be left on disk. extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()>60 pub fn extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()> { 61 struct Extractor<'a>(&'a Path); 62 impl ZipStreamVisitor for Extractor<'_> { 63 fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { 64 let filepath = file 65 .enclosed_name() 66 .ok_or(ZipError::InvalidArchive("Invalid file path"))?; 67 68 let outpath = self.0.join(filepath); 69 70 if file.name().ends_with('/') { 71 fs::create_dir_all(&outpath)?; 72 } else { 73 if let Some(p) = outpath.parent() { 74 fs::create_dir_all(p)?; 75 } 76 let mut outfile = fs::File::create(&outpath)?; 77 io::copy(file, &mut outfile)?; 78 } 79 80 Ok(()) 81 } 82 83 #[allow(unused)] 84 fn visit_additional_metadata( 85 &mut self, 86 metadata: &ZipStreamFileMetadata, 87 ) -> ZipResult<()> { 88 #[cfg(unix)] 89 { 90 let filepath = metadata 91 .enclosed_name() 92 .ok_or(ZipError::InvalidArchive("Invalid file path"))?; 93 94 let outpath = self.0.join(filepath); 95 96 use std::os::unix::fs::PermissionsExt; 97 if let Some(mode) = metadata.unix_mode() { 98 fs::set_permissions(outpath, fs::Permissions::from_mode(mode))?; 99 } 100 } 101 102 Ok(()) 103 } 104 } 105 106 self.visit(&mut Extractor(directory.as_ref())) 107 } 108 } 109 110 /// Visitor for ZipStreamReader 111 pub trait ZipStreamVisitor { 112 /// * `file` - contains the content of the file and most of the metadata, 113 /// except: 114 /// - `comment`: set to an empty string 115 /// - `data_start`: set to 0 116 /// - `external_attributes`: `unix_mode()`: will return None visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>117 fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>; 118 119 /// This function is guranteed to be called after all `visit_file`s. 120 /// 121 /// * `metadata` - Provides missing metadata in `visit_file`. visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>122 fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>; 123 } 124 125 /// Additional metadata for the file. 126 #[derive(Debug)] 127 pub struct ZipStreamFileMetadata(ZipFileData); 128 129 impl ZipStreamFileMetadata { 130 /// Get the name of the file 131 /// 132 /// # Warnings 133 /// 134 /// It is dangerous to use this name directly when extracting an archive. 135 /// It may contain an absolute path (`/etc/shadow`), or break out of the 136 /// current directory (`../runtime`). Carelessly writing to these paths 137 /// allows an attacker to craft a ZIP archive that will overwrite critical 138 /// files. 139 /// 140 /// You can use the [`ZipFile::enclosed_name`] method to validate the name 141 /// as a safe path. name(&self) -> &str142 pub fn name(&self) -> &str { 143 &self.0.file_name 144 } 145 146 /// Get the name of the file, in the raw (internal) byte representation. 147 /// 148 /// The encoding of this data is currently undefined. name_raw(&self) -> &[u8]149 pub fn name_raw(&self) -> &[u8] { 150 &self.0.file_name_raw 151 } 152 153 /// Rewrite the path, ignoring any path components with special meaning. 154 /// 155 /// - Absolute paths are made relative 156 /// - [`ParentDir`]s are ignored 157 /// - Truncates the filename at a NULL byte 158 /// 159 /// This is appropriate if you need to be able to extract *something* from 160 /// any archive, but will easily misrepresent trivial paths like 161 /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, 162 /// [`ZipFile::enclosed_name`] is the better option in most scenarios. 163 /// 164 /// [`ParentDir`]: `Component::ParentDir` mangled_name(&self) -> ::std::path::PathBuf165 pub fn mangled_name(&self) -> ::std::path::PathBuf { 166 self.0.file_name_sanitized() 167 } 168 169 /// Ensure the file path is safe to use as a [`Path`]. 170 /// 171 /// - It can't contain NULL bytes 172 /// - It can't resolve to a path outside the current directory 173 /// > `foo/../bar` is fine, `foo/../../bar` is not. 174 /// - It can't be an absolute path 175 /// 176 /// This will read well-formed ZIP files correctly, and is resistant 177 /// to path-based exploits. It is recommended over 178 /// [`ZipFile::mangled_name`]. enclosed_name(&self) -> Option<&Path>179 pub fn enclosed_name(&self) -> Option<&Path> { 180 self.0.enclosed_name() 181 } 182 183 /// Returns whether the file is actually a directory is_dir(&self) -> bool184 pub fn is_dir(&self) -> bool { 185 self.name() 186 .chars() 187 .rev() 188 .next() 189 .map_or(false, |c| c == '/' || c == '\\') 190 } 191 192 /// Returns whether the file is a regular file is_file(&self) -> bool193 pub fn is_file(&self) -> bool { 194 !self.is_dir() 195 } 196 197 /// Get the comment of the file comment(&self) -> &str198 pub fn comment(&self) -> &str { 199 &self.0.file_comment 200 } 201 202 /// Get the starting offset of the data of the compressed file data_start(&self) -> u64203 pub fn data_start(&self) -> u64 { 204 self.0.data_start.load() 205 } 206 207 /// Get unix mode for the file unix_mode(&self) -> Option<u32>208 pub fn unix_mode(&self) -> Option<u32> { 209 self.0.unix_mode() 210 } 211 } 212 213 #[cfg(test)] 214 mod test { 215 use super::*; 216 use std::collections::BTreeSet; 217 use std::io; 218 219 struct DummyVisitor; 220 impl ZipStreamVisitor for DummyVisitor { visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()>221 fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { 222 Ok(()) 223 } 224 visit_additional_metadata( &mut self, _metadata: &ZipStreamFileMetadata, ) -> ZipResult<()>225 fn visit_additional_metadata( 226 &mut self, 227 _metadata: &ZipStreamFileMetadata, 228 ) -> ZipResult<()> { 229 Ok(()) 230 } 231 } 232 233 #[derive(Default, Debug, Eq, PartialEq)] 234 struct CounterVisitor(u64, u64); 235 impl ZipStreamVisitor for CounterVisitor { visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()>236 fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { 237 self.0 += 1; 238 Ok(()) 239 } 240 visit_additional_metadata( &mut self, _metadata: &ZipStreamFileMetadata, ) -> ZipResult<()>241 fn visit_additional_metadata( 242 &mut self, 243 _metadata: &ZipStreamFileMetadata, 244 ) -> ZipResult<()> { 245 self.1 += 1; 246 Ok(()) 247 } 248 } 249 250 #[test] invalid_offset()251 fn invalid_offset() { 252 ZipStreamReader::new(io::Cursor::new(include_bytes!( 253 "../../tests/data/invalid_offset.zip" 254 ))) 255 .visit(&mut DummyVisitor) 256 .unwrap_err(); 257 } 258 259 #[test] invalid_offset2()260 fn invalid_offset2() { 261 ZipStreamReader::new(io::Cursor::new(include_bytes!( 262 "../../tests/data/invalid_offset2.zip" 263 ))) 264 .visit(&mut DummyVisitor) 265 .unwrap_err(); 266 } 267 268 #[test] zip_read_streaming()269 fn zip_read_streaming() { 270 let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!( 271 "../../tests/data/mimetype.zip" 272 ))); 273 274 #[derive(Default)] 275 struct V { 276 filenames: BTreeSet<Box<str>>, 277 } 278 impl ZipStreamVisitor for V { 279 fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { 280 if file.is_file() { 281 self.filenames.insert(file.name().into()); 282 } 283 284 Ok(()) 285 } 286 fn visit_additional_metadata( 287 &mut self, 288 metadata: &ZipStreamFileMetadata, 289 ) -> ZipResult<()> { 290 if metadata.is_file() { 291 assert!( 292 self.filenames.contains(metadata.name()), 293 "{} is missing its file content", 294 metadata.name() 295 ); 296 } 297 298 Ok(()) 299 } 300 } 301 302 reader.visit(&mut V::default()).unwrap(); 303 } 304 305 #[test] file_and_dir_predicates()306 fn file_and_dir_predicates() { 307 let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!( 308 "../../tests/data/files_and_dirs.zip" 309 ))); 310 311 #[derive(Default)] 312 struct V { 313 filenames: BTreeSet<Box<str>>, 314 } 315 impl ZipStreamVisitor for V { 316 fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { 317 let full_name = file.enclosed_name().unwrap(); 318 let file_name = full_name.file_name().unwrap().to_str().unwrap(); 319 assert!( 320 (file_name.starts_with("dir") && file.is_dir()) 321 || (file_name.starts_with("file") && file.is_file()) 322 ); 323 324 if file.is_file() { 325 self.filenames.insert(file.name().into()); 326 } 327 328 Ok(()) 329 } 330 fn visit_additional_metadata( 331 &mut self, 332 metadata: &ZipStreamFileMetadata, 333 ) -> ZipResult<()> { 334 if metadata.is_file() { 335 assert!( 336 self.filenames.contains(metadata.name()), 337 "{} is missing its file content", 338 metadata.name() 339 ); 340 } 341 342 Ok(()) 343 } 344 } 345 346 reader.visit(&mut V::default()).unwrap(); 347 } 348 349 /// test case to ensure we don't preemptively over allocate based on the 350 /// declared number of files in the CDE of an invalid zip when the number of 351 /// files declared is more than the alleged offset in the CDE 352 #[test] invalid_cde_number_of_files_allocation_smaller_offset()353 fn invalid_cde_number_of_files_allocation_smaller_offset() { 354 ZipStreamReader::new(io::Cursor::new(include_bytes!( 355 "../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip" 356 ))) 357 .visit(&mut DummyVisitor) 358 .unwrap_err(); 359 } 360 361 /// test case to ensure we don't preemptively over allocate based on the 362 /// declared number of files in the CDE of an invalid zip when the number of 363 /// files declared is less than the alleged offset in the CDE 364 #[test] invalid_cde_number_of_files_allocation_greater_offset()365 fn invalid_cde_number_of_files_allocation_greater_offset() { 366 ZipStreamReader::new(io::Cursor::new(include_bytes!( 367 "../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip" 368 ))) 369 .visit(&mut DummyVisitor) 370 .unwrap_err(); 371 } 372 } 373