xref: /aosp_15_r20/external/zlib/google/zip_reader.h (revision 86ee64e75fa5f8bce2c8c356138035642429cd05)
1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
5 #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
6 
7 #include <stddef.h>
8 #include <stdint.h>
9 
10 #include <limits>
11 #include <memory>
12 #include <string>
13 
14 #include "base/files/file.h"
15 #include "base/files/file_path.h"
16 #include "base/functional/callback.h"
17 #include "base/memory/weak_ptr.h"
18 #include "base/numerics/safe_conversions.h"
19 #include "base/time/time.h"
20 
21 #if defined(USE_SYSTEM_MINIZIP)
22 #include <minizip/unzip.h>
23 #else
24 #include "third_party/zlib/contrib/minizip/unzip.h"
25 #endif
26 
27 namespace zip {
28 
29 // A delegate interface used to stream out an entry; see
30 // ZipReader::ExtractCurrentEntry.
31 class WriterDelegate {
32  public:
~WriterDelegate()33   virtual ~WriterDelegate() {}
34 
35   // Invoked once before any data is streamed out to pave the way (e.g., to open
36   // the output file). Return false on failure to cancel extraction.
PrepareOutput()37   virtual bool PrepareOutput() { return true; }
38 
39   // Invoked to write the next chunk of data. Return false on failure to cancel
40   // extraction.
WriteBytes(const char * data,int num_bytes)41   virtual bool WriteBytes(const char* data, int num_bytes) { return true; }
42 
43   // Sets the last-modified time of the data.
SetTimeModified(const base::Time & time)44   virtual void SetTimeModified(const base::Time& time) {}
45 
46   // Called with the POSIX file permissions of the data; POSIX implementations
47   // may apply some of the permissions (for example, the executable bit) to the
48   // output file.
SetPosixFilePermissions(int mode)49   virtual void SetPosixFilePermissions(int mode) {}
50 
51   // Called if an error occurred while extracting the file. The WriterDelegate
52   // can then remove and clean up the partially extracted data.
OnError()53   virtual void OnError() {}
54 };
55 
56 // This class is used for reading ZIP archives. A typical use case of this class
57 // is to scan entries in a ZIP archive and extract them. The code will look
58 // like:
59 //
60 //   ZipReader reader;
61 //   if (!reader.Open(zip_path)) {
62 //     // Cannot open
63 //     return;
64 //   }
65 //
66 //   while (const ZipReader::entry* entry = reader.Next()) {
67 //     auto writer = CreateFilePathWriterDelegate(extract_dir, entry->path);
68 //     if (!reader.ExtractCurrentEntry(writer)) {
69 //           // Cannot extract
70 //           return;
71 //     }
72 //   }
73 //
74 //   if (!reader.ok()) {
75 //     // Error while enumerating entries
76 //     return;
77 //   }
78 //
79 class ZipReader {
80  public:
81   // A callback that is called when the operation is successful.
82   using SuccessCallback = base::OnceClosure;
83   // A callback that is called when the operation fails.
84   using FailureCallback = base::OnceClosure;
85   // A callback that is called periodically during the operation with the number
86   // of bytes that have been processed so far.
87   using ProgressCallback = base::RepeatingCallback<void(int64_t)>;
88   // A callback that is called periodically during the operation with the number
89   // of bytes that have been processed since the previous call (i.e. delta).
90   using ListenerCallback = base::RepeatingCallback<void(uint64_t)>;
91 
92   // Information of an entry (file or directory) in a ZIP archive.
93   struct Entry {
94     // Path of this entry, in its original encoding as it is stored in the ZIP
95     // archive. The encoding is not specified here. It might or might not be
96     // UTF-8, and the caller needs to use other means to determine the encoding
97     // if it wants to interpret this path correctly.
98     std::string path_in_original_encoding;
99 
100     // Path of the entry, converted to Unicode. This path is relative (eg
101     // "foo/bar.txt"). Absolute paths (eg "/foo/bar.txt") or paths containing
102     // ".." or "." components (eg "../foo/bar.txt") are converted to safe
103     // relative paths. Eg:
104     // (In ZIP) -> (Entry.path)
105     // /foo/bar -> ROOT/foo/bar
106     // ../a     -> UP/a
107     // ./a      -> DOT/a
108     base::FilePath path;
109 
110     // Size of the original uncompressed file, or 0 if the entry is a directory.
111     // This value should not be trusted, because it is stored as metadata in the
112     // ZIP archive and can be different from the real uncompressed size.
113     int64_t original_size;
114 
115     // Last modified time. If the timestamp stored in the ZIP archive is not
116     // valid, the Unix epoch will be returned.
117     //
118     // The timestamp stored in the ZIP archive uses the MS-DOS date and time
119     // format.
120     //
121     // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx
122     //
123     // As such the following limitations apply:
124     // * Only years from 1980 to 2107 can be represented.
125     // * The timestamp has a 2-second resolution.
126     // * There is no timezone information, so the time is interpreted as UTC.
127     base::Time last_modified;
128 
129     // True if the entry is a directory.
130     // False if the entry is a file.
131     bool is_directory = false;
132 
133     // True if the entry path cannot be converted to a safe relative path. This
134     // happens if a file entry (not a directory) has a filename "." or "..".
135     bool is_unsafe = false;
136 
137     // True if the file content is encrypted.
138     bool is_encrypted = false;
139 
140     // True if the encryption scheme is AES.
141     bool uses_aes_encryption = false;
142 
143     // Entry POSIX permissions (POSIX systems only).
144     int posix_mode;
145   };
146 
147   ZipReader();
148 
149   ZipReader(const ZipReader&) = delete;
150   ZipReader& operator=(const ZipReader&) = delete;
151 
152   ~ZipReader();
153 
154   // Opens the ZIP archive specified by |zip_path|. Returns true on
155   // success.
156   bool Open(const base::FilePath& zip_path);
157 
158   // Opens the ZIP archive referred to by the platform file |zip_fd|, without
159   // taking ownership of |zip_fd|. Returns true on success.
160   bool OpenFromPlatformFile(base::PlatformFile zip_fd);
161 
162   // Opens the zip data stored in |data|. This class uses a weak reference to
163   // the given sring while extracting files, i.e. the caller should keep the
164   // string until it finishes extracting files.
165   bool OpenFromString(const std::string& data);
166 
167   // Closes the currently opened ZIP archive. This function is called in the
168   // destructor of the class, so you usually don't need to call this.
169   void Close();
170 
171   // Sets the encoding of entry paths in the ZIP archive.
172   // By default, paths are assumed to be in UTF-8.
SetEncoding(std::string encoding)173   void SetEncoding(std::string encoding) { encoding_ = std::move(encoding); }
174 
175   // Sets the decryption password that will be used to decrypt encrypted file in
176   // the ZIP archive.
SetPassword(std::string password)177   void SetPassword(std::string password) { password_ = std::move(password); }
178 
179   // Gets the next entry. Returns null if there is no more entry, or if an error
180   // occurred while scanning entries. The returned Entry is owned by this
181   // ZipReader, and is valid until Next() is called again or until this
182   // ZipReader is closed.
183   //
184   // This function should be called before operations over the current entry
185   // like ExtractCurrentEntryToFile().
186   //
187   // while (const ZipReader::Entry* entry = reader.Next()) {
188   //   // Do something with the current entry here.
189   //   ...
190   // }
191   //
192   // // Finished scanning entries.
193   // // Check if the scanning stopped because of an error.
194   // if (!reader.ok()) {
195   //   // There was an error.
196   //   ...
197   // }
198   const Entry* Next();
199 
200   // Returns true if the enumeration of entries was successful, or false if it
201   // stopped because of an error.
ok()202   bool ok() const { return ok_; }
203 
204   // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|,
205   // starting from the beginning of the entry.
206   //
207   // Returns true if the entire file was extracted without error.
208   //
209   // Precondition: Next() returned a non-null Entry.
210   bool ExtractCurrentEntry(WriterDelegate* delegate,
211                            uint64_t num_bytes_to_extract =
212                                std::numeric_limits<uint64_t>::max()) const;
213 
214   // Extracts the current entry to |delegate|, starting from the beginning
215   // of the entry, calling |listener_callback| regularly with the number of
216   // bytes extracted.
217   //
218   // Returns true if the entire file was extracted without error.
219   //
220   // Precondition: Next() returned a non-null Entry.
221   bool ExtractCurrentEntryWithListener(
222       WriterDelegate* delegate,
223       ListenerCallback listener_callback) const;
224 
225   // Asynchronously extracts the current entry to the given output file path. If
226   // the current entry is a directory it just creates the directory
227   // synchronously instead.
228   //
229   // |success_callback| will be called on success and |failure_callback| will be
230   // called on failure. |progress_callback| will be called at least once.
231   // Callbacks will be posted to the current MessageLoop in-order.
232   //
233   // Precondition: Next() returned a non-null Entry.
234   void ExtractCurrentEntryToFilePathAsync(
235       const base::FilePath& output_file_path,
236       SuccessCallback success_callback,
237       FailureCallback failure_callback,
238       ProgressCallback progress_callback);
239 
240   // Extracts the current entry into memory. If the current entry is a
241   // directory, |*output| is set to the empty string. If the current entry is a
242   // file, |*output| is filled with its contents.
243   //
244   // The value in |Entry::original_size| cannot be trusted, so the real size of
245   // the uncompressed contents can be different. |max_read_bytes| limits the
246   // amount of memory used to carry the entry.
247   //
248   // Returns true if the entire content is read without error. If the content is
249   // bigger than |max_read_bytes|, this function returns false and |*output| is
250   // filled with |max_read_bytes| of data. If an error occurs, this function
251   // returns false and |*output| contains the content extracted so far, which
252   // might be garbage data.
253   //
254   // Precondition: Next() returned a non-null Entry.
255   bool ExtractCurrentEntryToString(uint64_t max_read_bytes,
256                                    std::string* output) const;
257 
ExtractCurrentEntryToString(std::string * output)258   bool ExtractCurrentEntryToString(std::string* output) const {
259     return ExtractCurrentEntryToString(
260         base::checked_cast<uint64_t>(output->max_size()), output);
261   }
262 
263   // Returns the number of entries in the ZIP archive.
264   //
265   // Precondition: one of the Open() methods returned true.
num_entries()266   int num_entries() const { return num_entries_; }
267 
268  private:
269   // Common code used both in Open and OpenFromFd.
270   bool OpenInternal();
271 
272   // Resets the internal state.
273   void Reset();
274 
275   // Opens the current entry in the ZIP archive. On success, returns true and
276   // updates the current entry state |entry_|.
277   //
278   // Note that there is no matching CloseEntry(). The current entry state is
279   // reset automatically as needed.
280   bool OpenEntry();
281 
282   // Normalizes the given path passed as UTF-16 string piece. Sets entry_.path,
283   // entry_.is_directory and entry_.is_unsafe.
284   void Normalize(base::StringPiece16 in);
285 
286   // Runs the ListenerCallback at a throttled rate.
287   void ReportProgress(ListenerCallback listener_callback, uint64_t bytes) const;
288 
289   // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|,
290   // starting from the beginning of the entry calling |listener_callback| if
291   // its supplied.
292   //
293   // Returns true if the entire file was extracted without error.
294   //
295   // Precondition: Next() returned a non-null Entry.
296   bool ExtractCurrentEntry(WriterDelegate* delegate,
297                            ListenerCallback listener_callback,
298                            uint64_t num_bytes_to_extract =
299                                std::numeric_limits<uint64_t>::max()) const;
300 
301   // Extracts a chunk of the file to the target.  Will post a task for the next
302   // chunk and success/failure/progress callbacks as necessary.
303   void ExtractChunk(base::File target_file,
304                     SuccessCallback success_callback,
305                     FailureCallback failure_callback,
306                     ProgressCallback progress_callback,
307                     const int64_t offset);
308 
309   std::string encoding_;
310   std::string password_;
311   unzFile zip_file_;
312   int num_entries_;
313   int next_index_;
314   bool reached_end_;
315   bool ok_;
316   Entry entry_;
317 
318   // Next time to report progress.
319   mutable base::TimeTicks next_progress_report_time_ = base::TimeTicks::Now();
320 
321   // Progress time delta.
322   // TODO(crbug.com/953256) Add this as parameter to the unzip options.
323   base::TimeDelta progress_period_ = base::Milliseconds(1000);
324 
325   // Number of bytes read since last progress report callback executed.
326   mutable uint64_t delta_bytes_read_ = 0;
327 
328   base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this};
329 };
330 
331 // A writer delegate that writes to a given File. It is recommended that this
332 // file be initially empty.
333 class FileWriterDelegate : public WriterDelegate {
334  public:
335   // Constructs a FileWriterDelegate that manipulates |file|. The delegate will
336   // not own |file|, therefore the caller must guarantee |file| will outlive the
337   // delegate.
338   explicit FileWriterDelegate(base::File* file);
339 
340   // Constructs a FileWriterDelegate that takes ownership of |file|.
341   explicit FileWriterDelegate(base::File owned_file);
342 
343   FileWriterDelegate(const FileWriterDelegate&) = delete;
344   FileWriterDelegate& operator=(const FileWriterDelegate&) = delete;
345 
346   ~FileWriterDelegate() override;
347 
348   // Returns true if the file handle passed to the constructor is valid.
349   bool PrepareOutput() override;
350 
351   // Writes |num_bytes| bytes of |data| to the file, returning false on error or
352   // if not all bytes could be written.
353   bool WriteBytes(const char* data, int num_bytes) override;
354 
355   // Sets the last-modified time of the data.
356   void SetTimeModified(const base::Time& time) override;
357 
358   // On POSIX systems, sets the file to be executable if the source file was
359   // executable.
360   void SetPosixFilePermissions(int mode) override;
361 
362   // Empties the file to avoid leaving garbage data in it.
363   void OnError() override;
364 
365   // Gets the number of bytes written into the file.
file_length()366   int64_t file_length() { return file_length_; }
367 
368  protected:
369   // The delegate can optionally own the file it modifies, in which case
370   // owned_file_ is set and file_ is an alias for owned_file_.
371   base::File owned_file_;
372 
373   // The file the delegate modifies.
374   base::File* const file_ = &owned_file_;
375 
376   int64_t file_length_ = 0;
377 };
378 
379 // A writer delegate that creates and writes a file at a given path. This does
380 // not overwrite any existing file.
381 class FilePathWriterDelegate : public FileWriterDelegate {
382  public:
383   explicit FilePathWriterDelegate(base::FilePath output_file_path);
384 
385   FilePathWriterDelegate(const FilePathWriterDelegate&) = delete;
386   FilePathWriterDelegate& operator=(const FilePathWriterDelegate&) = delete;
387 
388   ~FilePathWriterDelegate() override;
389 
390   // Creates the output file and any necessary intermediate directories. Does
391   // not overwrite any existing file, and returns false if the output file
392   // cannot be created because another file conflicts with it.
393   bool PrepareOutput() override;
394 
395   // Deletes the output file.
396   void OnError() override;
397 
398  private:
399   const base::FilePath output_file_path_;
400 };
401 
402 }  // namespace zip
403 
404 #endif  // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
405