1 // Copyright 2011 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 5 #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 6 7 #include <stddef.h> 8 #include <stdint.h> 9 10 #include <limits> 11 #include <memory> 12 #include <string> 13 14 #include "base/files/file.h" 15 #include "base/files/file_path.h" 16 #include "base/functional/callback.h" 17 #include "base/memory/weak_ptr.h" 18 #include "base/numerics/safe_conversions.h" 19 #include "base/time/time.h" 20 21 #if defined(USE_SYSTEM_MINIZIP) 22 #include <minizip/unzip.h> 23 #else 24 #include "third_party/zlib/contrib/minizip/unzip.h" 25 #endif 26 27 namespace zip { 28 29 // A delegate interface used to stream out an entry; see 30 // ZipReader::ExtractCurrentEntry. 31 class WriterDelegate { 32 public: ~WriterDelegate()33 virtual ~WriterDelegate() {} 34 35 // Invoked once before any data is streamed out to pave the way (e.g., to open 36 // the output file). Return false on failure to cancel extraction. PrepareOutput()37 virtual bool PrepareOutput() { return true; } 38 39 // Invoked to write the next chunk of data. Return false on failure to cancel 40 // extraction. WriteBytes(const char * data,int num_bytes)41 virtual bool WriteBytes(const char* data, int num_bytes) { return true; } 42 43 // Sets the last-modified time of the data. SetTimeModified(const base::Time & time)44 virtual void SetTimeModified(const base::Time& time) {} 45 46 // Called with the POSIX file permissions of the data; POSIX implementations 47 // may apply some of the permissions (for example, the executable bit) to the 48 // output file. SetPosixFilePermissions(int mode)49 virtual void SetPosixFilePermissions(int mode) {} 50 51 // Called if an error occurred while extracting the file. The WriterDelegate 52 // can then remove and clean up the partially extracted data. OnError()53 virtual void OnError() {} 54 }; 55 56 // This class is used for reading ZIP archives. A typical use case of this class 57 // is to scan entries in a ZIP archive and extract them. The code will look 58 // like: 59 // 60 // ZipReader reader; 61 // if (!reader.Open(zip_path)) { 62 // // Cannot open 63 // return; 64 // } 65 // 66 // while (const ZipReader::entry* entry = reader.Next()) { 67 // auto writer = CreateFilePathWriterDelegate(extract_dir, entry->path); 68 // if (!reader.ExtractCurrentEntry(writer)) { 69 // // Cannot extract 70 // return; 71 // } 72 // } 73 // 74 // if (!reader.ok()) { 75 // // Error while enumerating entries 76 // return; 77 // } 78 // 79 class ZipReader { 80 public: 81 // A callback that is called when the operation is successful. 82 using SuccessCallback = base::OnceClosure; 83 // A callback that is called when the operation fails. 84 using FailureCallback = base::OnceClosure; 85 // A callback that is called periodically during the operation with the number 86 // of bytes that have been processed so far. 87 using ProgressCallback = base::RepeatingCallback<void(int64_t)>; 88 // A callback that is called periodically during the operation with the number 89 // of bytes that have been processed since the previous call (i.e. delta). 90 using ListenerCallback = base::RepeatingCallback<void(uint64_t)>; 91 92 // Information of an entry (file or directory) in a ZIP archive. 93 struct Entry { 94 // Path of this entry, in its original encoding as it is stored in the ZIP 95 // archive. The encoding is not specified here. It might or might not be 96 // UTF-8, and the caller needs to use other means to determine the encoding 97 // if it wants to interpret this path correctly. 98 std::string path_in_original_encoding; 99 100 // Path of the entry, converted to Unicode. This path is relative (eg 101 // "foo/bar.txt"). Absolute paths (eg "/foo/bar.txt") or paths containing 102 // ".." or "." components (eg "../foo/bar.txt") are converted to safe 103 // relative paths. Eg: 104 // (In ZIP) -> (Entry.path) 105 // /foo/bar -> ROOT/foo/bar 106 // ../a -> UP/a 107 // ./a -> DOT/a 108 base::FilePath path; 109 110 // Size of the original uncompressed file, or 0 if the entry is a directory. 111 // This value should not be trusted, because it is stored as metadata in the 112 // ZIP archive and can be different from the real uncompressed size. 113 int64_t original_size; 114 115 // Last modified time. If the timestamp stored in the ZIP archive is not 116 // valid, the Unix epoch will be returned. 117 // 118 // The timestamp stored in the ZIP archive uses the MS-DOS date and time 119 // format. 120 // 121 // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx 122 // 123 // As such the following limitations apply: 124 // * Only years from 1980 to 2107 can be represented. 125 // * The timestamp has a 2-second resolution. 126 // * There is no timezone information, so the time is interpreted as UTC. 127 base::Time last_modified; 128 129 // True if the entry is a directory. 130 // False if the entry is a file. 131 bool is_directory = false; 132 133 // True if the entry path cannot be converted to a safe relative path. This 134 // happens if a file entry (not a directory) has a filename "." or "..". 135 bool is_unsafe = false; 136 137 // True if the file content is encrypted. 138 bool is_encrypted = false; 139 140 // True if the encryption scheme is AES. 141 bool uses_aes_encryption = false; 142 143 // Entry POSIX permissions (POSIX systems only). 144 int posix_mode; 145 }; 146 147 ZipReader(); 148 149 ZipReader(const ZipReader&) = delete; 150 ZipReader& operator=(const ZipReader&) = delete; 151 152 ~ZipReader(); 153 154 // Opens the ZIP archive specified by |zip_path|. Returns true on 155 // success. 156 bool Open(const base::FilePath& zip_path); 157 158 // Opens the ZIP archive referred to by the platform file |zip_fd|, without 159 // taking ownership of |zip_fd|. Returns true on success. 160 bool OpenFromPlatformFile(base::PlatformFile zip_fd); 161 162 // Opens the zip data stored in |data|. This class uses a weak reference to 163 // the given sring while extracting files, i.e. the caller should keep the 164 // string until it finishes extracting files. 165 bool OpenFromString(const std::string& data); 166 167 // Closes the currently opened ZIP archive. This function is called in the 168 // destructor of the class, so you usually don't need to call this. 169 void Close(); 170 171 // Sets the encoding of entry paths in the ZIP archive. 172 // By default, paths are assumed to be in UTF-8. SetEncoding(std::string encoding)173 void SetEncoding(std::string encoding) { encoding_ = std::move(encoding); } 174 175 // Sets the decryption password that will be used to decrypt encrypted file in 176 // the ZIP archive. SetPassword(std::string password)177 void SetPassword(std::string password) { password_ = std::move(password); } 178 179 // Gets the next entry. Returns null if there is no more entry, or if an error 180 // occurred while scanning entries. The returned Entry is owned by this 181 // ZipReader, and is valid until Next() is called again or until this 182 // ZipReader is closed. 183 // 184 // This function should be called before operations over the current entry 185 // like ExtractCurrentEntryToFile(). 186 // 187 // while (const ZipReader::Entry* entry = reader.Next()) { 188 // // Do something with the current entry here. 189 // ... 190 // } 191 // 192 // // Finished scanning entries. 193 // // Check if the scanning stopped because of an error. 194 // if (!reader.ok()) { 195 // // There was an error. 196 // ... 197 // } 198 const Entry* Next(); 199 200 // Returns true if the enumeration of entries was successful, or false if it 201 // stopped because of an error. ok()202 bool ok() const { return ok_; } 203 204 // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|, 205 // starting from the beginning of the entry. 206 // 207 // Returns true if the entire file was extracted without error. 208 // 209 // Precondition: Next() returned a non-null Entry. 210 bool ExtractCurrentEntry(WriterDelegate* delegate, 211 uint64_t num_bytes_to_extract = 212 std::numeric_limits<uint64_t>::max()) const; 213 214 // Extracts the current entry to |delegate|, starting from the beginning 215 // of the entry, calling |listener_callback| regularly with the number of 216 // bytes extracted. 217 // 218 // Returns true if the entire file was extracted without error. 219 // 220 // Precondition: Next() returned a non-null Entry. 221 bool ExtractCurrentEntryWithListener( 222 WriterDelegate* delegate, 223 ListenerCallback listener_callback) const; 224 225 // Asynchronously extracts the current entry to the given output file path. If 226 // the current entry is a directory it just creates the directory 227 // synchronously instead. 228 // 229 // |success_callback| will be called on success and |failure_callback| will be 230 // called on failure. |progress_callback| will be called at least once. 231 // Callbacks will be posted to the current MessageLoop in-order. 232 // 233 // Precondition: Next() returned a non-null Entry. 234 void ExtractCurrentEntryToFilePathAsync( 235 const base::FilePath& output_file_path, 236 SuccessCallback success_callback, 237 FailureCallback failure_callback, 238 ProgressCallback progress_callback); 239 240 // Extracts the current entry into memory. If the current entry is a 241 // directory, |*output| is set to the empty string. If the current entry is a 242 // file, |*output| is filled with its contents. 243 // 244 // The value in |Entry::original_size| cannot be trusted, so the real size of 245 // the uncompressed contents can be different. |max_read_bytes| limits the 246 // amount of memory used to carry the entry. 247 // 248 // Returns true if the entire content is read without error. If the content is 249 // bigger than |max_read_bytes|, this function returns false and |*output| is 250 // filled with |max_read_bytes| of data. If an error occurs, this function 251 // returns false and |*output| contains the content extracted so far, which 252 // might be garbage data. 253 // 254 // Precondition: Next() returned a non-null Entry. 255 bool ExtractCurrentEntryToString(uint64_t max_read_bytes, 256 std::string* output) const; 257 ExtractCurrentEntryToString(std::string * output)258 bool ExtractCurrentEntryToString(std::string* output) const { 259 return ExtractCurrentEntryToString( 260 base::checked_cast<uint64_t>(output->max_size()), output); 261 } 262 263 // Returns the number of entries in the ZIP archive. 264 // 265 // Precondition: one of the Open() methods returned true. num_entries()266 int num_entries() const { return num_entries_; } 267 268 private: 269 // Common code used both in Open and OpenFromFd. 270 bool OpenInternal(); 271 272 // Resets the internal state. 273 void Reset(); 274 275 // Opens the current entry in the ZIP archive. On success, returns true and 276 // updates the current entry state |entry_|. 277 // 278 // Note that there is no matching CloseEntry(). The current entry state is 279 // reset automatically as needed. 280 bool OpenEntry(); 281 282 // Normalizes the given path passed as UTF-16 string piece. Sets entry_.path, 283 // entry_.is_directory and entry_.is_unsafe. 284 void Normalize(base::StringPiece16 in); 285 286 // Runs the ListenerCallback at a throttled rate. 287 void ReportProgress(ListenerCallback listener_callback, uint64_t bytes) const; 288 289 // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|, 290 // starting from the beginning of the entry calling |listener_callback| if 291 // its supplied. 292 // 293 // Returns true if the entire file was extracted without error. 294 // 295 // Precondition: Next() returned a non-null Entry. 296 bool ExtractCurrentEntry(WriterDelegate* delegate, 297 ListenerCallback listener_callback, 298 uint64_t num_bytes_to_extract = 299 std::numeric_limits<uint64_t>::max()) const; 300 301 // Extracts a chunk of the file to the target. Will post a task for the next 302 // chunk and success/failure/progress callbacks as necessary. 303 void ExtractChunk(base::File target_file, 304 SuccessCallback success_callback, 305 FailureCallback failure_callback, 306 ProgressCallback progress_callback, 307 const int64_t offset); 308 309 std::string encoding_; 310 std::string password_; 311 unzFile zip_file_; 312 int num_entries_; 313 int next_index_; 314 bool reached_end_; 315 bool ok_; 316 Entry entry_; 317 318 // Next time to report progress. 319 mutable base::TimeTicks next_progress_report_time_ = base::TimeTicks::Now(); 320 321 // Progress time delta. 322 // TODO(crbug.com/953256) Add this as parameter to the unzip options. 323 base::TimeDelta progress_period_ = base::Milliseconds(1000); 324 325 // Number of bytes read since last progress report callback executed. 326 mutable uint64_t delta_bytes_read_ = 0; 327 328 base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this}; 329 }; 330 331 // A writer delegate that writes to a given File. It is recommended that this 332 // file be initially empty. 333 class FileWriterDelegate : public WriterDelegate { 334 public: 335 // Constructs a FileWriterDelegate that manipulates |file|. The delegate will 336 // not own |file|, therefore the caller must guarantee |file| will outlive the 337 // delegate. 338 explicit FileWriterDelegate(base::File* file); 339 340 // Constructs a FileWriterDelegate that takes ownership of |file|. 341 explicit FileWriterDelegate(base::File owned_file); 342 343 FileWriterDelegate(const FileWriterDelegate&) = delete; 344 FileWriterDelegate& operator=(const FileWriterDelegate&) = delete; 345 346 ~FileWriterDelegate() override; 347 348 // Returns true if the file handle passed to the constructor is valid. 349 bool PrepareOutput() override; 350 351 // Writes |num_bytes| bytes of |data| to the file, returning false on error or 352 // if not all bytes could be written. 353 bool WriteBytes(const char* data, int num_bytes) override; 354 355 // Sets the last-modified time of the data. 356 void SetTimeModified(const base::Time& time) override; 357 358 // On POSIX systems, sets the file to be executable if the source file was 359 // executable. 360 void SetPosixFilePermissions(int mode) override; 361 362 // Empties the file to avoid leaving garbage data in it. 363 void OnError() override; 364 365 // Gets the number of bytes written into the file. file_length()366 int64_t file_length() { return file_length_; } 367 368 protected: 369 // The delegate can optionally own the file it modifies, in which case 370 // owned_file_ is set and file_ is an alias for owned_file_. 371 base::File owned_file_; 372 373 // The file the delegate modifies. 374 base::File* const file_ = &owned_file_; 375 376 int64_t file_length_ = 0; 377 }; 378 379 // A writer delegate that creates and writes a file at a given path. This does 380 // not overwrite any existing file. 381 class FilePathWriterDelegate : public FileWriterDelegate { 382 public: 383 explicit FilePathWriterDelegate(base::FilePath output_file_path); 384 385 FilePathWriterDelegate(const FilePathWriterDelegate&) = delete; 386 FilePathWriterDelegate& operator=(const FilePathWriterDelegate&) = delete; 387 388 ~FilePathWriterDelegate() override; 389 390 // Creates the output file and any necessary intermediate directories. Does 391 // not overwrite any existing file, and returns false if the output file 392 // cannot be created because another file conflicts with it. 393 bool PrepareOutput() override; 394 395 // Deletes the output file. 396 void OnError() override; 397 398 private: 399 const base::FilePath output_file_path_; 400 }; 401 402 } // namespace zip 403 404 #endif // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 405