xref: /aosp_15_r20/external/puffin/src/main.cc (revision 07fb1d065b7cfb4729786fadd42a612532d2f466)
1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 #include <fstream>
7 #include <iostream>
8 #include <sstream>
9 
10 #ifdef USE_BRILLO
11 #include "brillo/flag_helper.h"
12 #else
13 #include "gflags/gflags.h"
14 #endif
15 
16 #include "puffin/file_stream.h"
17 #include "puffin/memory_stream.h"
18 #include "puffin/src/extent_stream.h"
19 #include "puffin/src/include/puffin/common.h"
20 #include "puffin/src/include/puffin/huffer.h"
21 #include "puffin/src/include/puffin/puffdiff.h"
22 #include "puffin/src/include/puffin/puffer.h"
23 #include "puffin/src/include/puffin/puffpatch.h"
24 #include "puffin/src/include/puffin/utils.h"
25 #include "puffin/src/logging.h"
26 #include "puffin/src/puffin_stream.h"
27 
28 using puffin::BitExtent;
29 using puffin::Buffer;
30 using puffin::ByteExtent;
31 using puffin::ExtentStream;
32 using puffin::FileStream;
33 using puffin::Huffer;
34 using puffin::MemoryStream;
35 using puffin::Puffer;
36 using puffin::PuffinStream;
37 using puffin::UniqueStreamPtr;
38 using std::string;
39 using std::stringstream;
40 using std::vector;
41 
42 namespace {
43 
44 constexpr char kExtentDelimeter = ',';
45 constexpr char kOffsetLengthDelimeter = ':';
46 
47 template <typename T>
StringToExtents(const string & str)48 vector<T> StringToExtents(const string& str) {
49   vector<T> extents;
50   if (!str.empty()) {
51     stringstream ss(str);
52     string extent_str;
53     while (getline(ss, extent_str, kExtentDelimeter)) {
54       stringstream extent_ss(extent_str);
55       string offset_str, length_str;
56       getline(extent_ss, offset_str, kOffsetLengthDelimeter);
57       getline(extent_ss, length_str, kOffsetLengthDelimeter);
58       extents.emplace_back(stoull(offset_str), stoull(length_str));
59     }
60   }
61   return extents;
62 }
63 
64 const uint64_t kDefaultPuffCacheSize = 50 * 1024 * 1024;  // 50 MB
65 
66 // An enum representing the type of compressed files.
67 enum class FileType { kDeflate, kZlib, kGzip, kZip, kRaw, kUnknown };
68 
69 // Returns a file type based on the input string |file_type| (normally the final
70 // extension of the file).
StringToFileType(const string & file_type)71 FileType StringToFileType(const string& file_type) {
72   if (file_type == "raw") {
73     return FileType::kRaw;
74   }
75   if (file_type == "deflate") {
76     return FileType::kDeflate;
77   } else if (file_type == "zlib") {
78     return FileType::kZlib;
79   } else if (file_type == "gzip" || file_type == "gz" || file_type == "tgz") {
80     return FileType::kGzip;
81   } else if (file_type == "zip" || file_type == "apk" || file_type == "jar") {
82     return FileType::kZip;
83   }
84   return FileType::kUnknown;
85 }
86 
87 // Finds the location of deflates in |stream|. If |file_type_to_override| is
88 // non-empty, it infers the file type based on that, otherwise, it infers the
89 // file type based on the final extension of |file_name|. It returns false if
90 // file type cannot be inferred from any of the input arguments. |deflates|
91 // is filled with byte-aligned location of deflates.
LocateDeflatesBasedOnFileType(const UniqueStreamPtr & stream,const string & file_name,const string & file_type_to_override,vector<BitExtent> * deflates)92 bool LocateDeflatesBasedOnFileType(const UniqueStreamPtr& stream,
93                                    const string& file_name,
94                                    const string& file_type_to_override,
95                                    vector<BitExtent>* deflates) {
96   auto file_type = FileType::kUnknown;
97 
98   auto last_dot = file_name.find_last_of(".");
99   if (last_dot == string::npos) {
100     // Could not find a dot so we assume there is no extension.
101     return false;
102   }
103   auto extension = file_name.substr(last_dot + 1);
104   file_type = StringToFileType(extension);
105 
106   if (!file_type_to_override.empty()) {
107     auto override_file_type = StringToFileType(file_type_to_override);
108     if (override_file_type == FileType::kUnknown) {
109       LOG(ERROR) << "Overriden file type " << file_type_to_override
110                  << " does not exist.";
111       return false;
112     }
113     if (file_type != FileType::kUnknown && file_type != override_file_type) {
114       LOG(WARNING) << "Based on the file name, the file type is " << extension
115                    << ", But the overriden file type is "
116                    << file_type_to_override << ". Is this intentional?";
117     }
118     file_type = override_file_type;
119   }
120 
121   if (file_type == FileType::kRaw) {
122     // Do not need to populate |deflates|.
123     return true;
124   }
125 
126   uint64_t stream_size;
127   TEST_AND_RETURN_FALSE(stream->GetSize(&stream_size));
128   Buffer data(stream_size);
129   TEST_AND_RETURN_FALSE(stream->Read(data.data(), data.size()));
130   switch (file_type) {
131     case FileType::kDeflate:
132       TEST_AND_RETURN_FALSE(puffin::LocateDeflatesInDeflateStream(
133           data.data(), data.size(), 0, deflates, nullptr));
134       break;
135     case FileType::kZlib:
136       TEST_AND_RETURN_FALSE(puffin::LocateDeflatesInZlib(data, deflates));
137       break;
138     case FileType::kGzip:
139       TEST_AND_RETURN_FALSE(puffin::LocateDeflatesInGzip(data, deflates));
140       break;
141     case FileType::kZip:
142       TEST_AND_RETURN_FALSE(puffin::LocateDeflatesInZipArchive(data, deflates));
143       break;
144     default:
145       LOG(ERROR) << "Unknown file type: (" << file_type_to_override << ") nor ("
146                  << extension << ").";
147       return false;
148   }
149   // Return the stream to its zero offset in case we used it.
150   TEST_AND_RETURN_FALSE(stream->Seek(0));
151 
152   return true;
153 }
154 
155 }  // namespace
156 
157 #define SETUP_FLAGS                                                          \
158   DEFINE_string(src_file, "", "Source file");                                \
159   DEFINE_string(dst_file, "", "Target file");                                \
160   DEFINE_string(patch_file, "", "patch file");                               \
161   DEFINE_string(                                                             \
162       src_deflates_byte, "",                                                 \
163       "Source deflate byte locations in the format offset:length,...");      \
164   DEFINE_string(                                                             \
165       dst_deflates_byte, "",                                                 \
166       "Target deflate byte locations in the format offset:length,...");      \
167   DEFINE_string(                                                             \
168       src_deflates_bit, "",                                                  \
169       "Source deflate bit locations in the format offset:length,...");       \
170   DEFINE_string(                                                             \
171       dst_deflates_bit, "",                                                  \
172       "Target deflatebit locations in the format offset:length,...");        \
173   DEFINE_string(src_puffs, "",                                               \
174                 "Source puff locations in the format offset:length,...");    \
175   DEFINE_string(dst_puffs, "",                                               \
176                 "Target puff locations in the format offset:length,...");    \
177   DEFINE_string(src_extents, "",                                             \
178                 "Source extents in the format of offset:length,...");        \
179   DEFINE_string(dst_extents, "",                                             \
180                 "Target extents in the format of offset:length,...");        \
181   DEFINE_string(operation, "",                                               \
182                 "Type of the operation: puff, huff, puffdiff, puffpatch, "   \
183                 "puffhuff");                                                 \
184   DEFINE_string(src_file_type, "",                                           \
185                 "Type of the input source file: deflate, gzip, "             \
186                 "zlib or zip");                                              \
187   DEFINE_string(dst_file_type, "",                                           \
188                 "Same as src_file_type but for the target file");            \
189   DEFINE_bool(verbose, false,                                                \
190               "Logs all the given parameters including internally "          \
191               "generated ones");                                             \
192   DEFINE_uint64(cache_size, kDefaultPuffCacheSize,                           \
193                 "Maximum size to cache the puff stream. Used in puffpatch"); \
194   DEFINE_int32(patch_algorithm, 0,                                           \
195                "Type of raw diff algorithm to use. The current supported "   \
196                "ones are 0: bsdiff, 1: zucchini.");
197 #ifndef USE_BRILLO
198 SETUP_FLAGS;
199 #endif
200 
201 // Main entry point to the application.
Main(int argc,char ** argv)202 bool Main(int argc, char** argv) {
203 #ifdef USE_BRILLO
204   SETUP_FLAGS;
205   brillo::FlagHelper::Init(argc, argv, "Puffin tool");
206 #else
207   // google::InitGoogleLogging(argv[0]);
208   google::ParseCommandLineFlags(&argc, &argv, true);
209 #endif
210 
211   TEST_AND_RETURN_FALSE(!FLAGS_operation.empty());
212   TEST_AND_RETURN_FALSE(!FLAGS_src_file.empty());
213   TEST_AND_RETURN_FALSE(!FLAGS_dst_file.empty());
214 
215   auto src_deflates_byte = StringToExtents<ByteExtent>(FLAGS_src_deflates_byte);
216   auto dst_deflates_byte = StringToExtents<ByteExtent>(FLAGS_dst_deflates_byte);
217   auto src_deflates_bit = StringToExtents<BitExtent>(FLAGS_src_deflates_bit);
218   auto dst_deflates_bit = StringToExtents<BitExtent>(FLAGS_dst_deflates_bit);
219   auto src_puffs = StringToExtents<ByteExtent>(FLAGS_src_puffs);
220   auto dst_puffs = StringToExtents<ByteExtent>(FLAGS_dst_puffs);
221   auto src_extents = StringToExtents<ByteExtent>(FLAGS_src_extents);
222   auto dst_extents = StringToExtents<ByteExtent>(FLAGS_dst_extents);
223 
224   auto src_stream = FileStream::Open(FLAGS_src_file, true, false);
225   TEST_AND_RETURN_FALSE(src_stream);
226   if (!src_extents.empty()) {
227     src_stream =
228         ExtentStream::CreateForRead(std::move(src_stream), src_extents);
229     TEST_AND_RETURN_FALSE(src_stream);
230   }
231 
232   if (FLAGS_operation == "puff" || FLAGS_operation == "puffhuff") {
233     TEST_AND_RETURN_FALSE(LocateDeflatesBasedOnFileType(
234         src_stream, FLAGS_src_file, FLAGS_src_file_type, &src_deflates_bit));
235 
236     if (src_deflates_bit.empty() && src_deflates_byte.empty()) {
237       LOG(WARNING) << "You should pass source deflates, is this intentional?";
238     }
239     if (src_deflates_bit.empty()) {
240       TEST_AND_RETURN_FALSE(FindDeflateSubBlocks(src_stream, src_deflates_byte,
241                                                  &src_deflates_bit));
242     }
243     TEST_AND_RETURN_FALSE(dst_puffs.empty());
244     uint64_t dst_puff_size;
245     TEST_AND_RETURN_FALSE(FindPuffLocations(src_stream, src_deflates_bit,
246                                             &dst_puffs, &dst_puff_size));
247 
248     auto dst_stream = FileStream::Open(FLAGS_dst_file, false, true);
249     TEST_AND_RETURN_FALSE(dst_stream);
250     auto puffer = std::make_shared<Puffer>();
251     auto reader =
252         PuffinStream::CreateForPuff(std::move(src_stream), puffer,
253                                     dst_puff_size, src_deflates_bit, dst_puffs);
254 
255     Buffer puff_buffer;
256     auto writer = FLAGS_operation == "puffhuff"
257                       ? MemoryStream::CreateForWrite(&puff_buffer)
258                       : std::move(dst_stream);
259 
260     Buffer buffer(1024 * 1024);
261     uint64_t bytes_wrote = 0;
262     while (bytes_wrote < dst_puff_size) {
263       auto write_size = std::min(static_cast<uint64_t>(buffer.size()),
264                                  dst_puff_size - bytes_wrote);
265       TEST_AND_RETURN_FALSE(reader->Read(buffer.data(), write_size));
266       TEST_AND_RETURN_FALSE(writer->Write(buffer.data(), write_size));
267       bytes_wrote += write_size;
268     }
269 
270     // puffhuff operation puffs a stream and huffs it back to the target stream
271     // to make sure we can get to the original stream.
272     if (FLAGS_operation == "puffhuff") {
273       src_puffs = dst_puffs;
274       dst_deflates_byte = src_deflates_byte;
275       dst_deflates_bit = src_deflates_bit;
276 
277       auto read_puff_stream = MemoryStream::CreateForRead(puff_buffer);
278       auto huffer = std::make_shared<Huffer>();
279       auto huff_writer = PuffinStream::CreateForHuff(
280           std::move(dst_stream), huffer, dst_puff_size, dst_deflates_bit,
281           src_puffs);
282 
283       uint64_t bytes_read = 0;
284       while (bytes_read < dst_puff_size) {
285         auto read_size = std::min(static_cast<uint64_t>(buffer.size()),
286                                   dst_puff_size - bytes_read);
287         TEST_AND_RETURN_FALSE(read_puff_stream->Read(buffer.data(), read_size));
288         TEST_AND_RETURN_FALSE(huff_writer->Write(buffer.data(), read_size));
289         bytes_read += read_size;
290       }
291     }
292   } else if (FLAGS_operation == "huff") {
293     if (dst_deflates_bit.empty() && src_puffs.empty()) {
294       LOG(WARNING) << "You should pass source puffs and destination deflates"
295                    << ", is this intentional?";
296     }
297     TEST_AND_RETURN_FALSE(src_puffs.size() == dst_deflates_bit.size());
298     uint64_t src_stream_size;
299     TEST_AND_RETURN_FALSE(src_stream->GetSize(&src_stream_size));
300     auto dst_file = FileStream::Open(FLAGS_dst_file, false, true);
301     TEST_AND_RETURN_FALSE(dst_file);
302 
303     auto huffer = std::make_shared<Huffer>();
304     auto dst_stream = PuffinStream::CreateForHuff(std::move(dst_file), huffer,
305                                                   src_stream_size,
306                                                   dst_deflates_bit, src_puffs);
307 
308     Buffer buffer(1024 * 1024);
309     uint64_t bytes_read = 0;
310     while (bytes_read < src_stream_size) {
311       auto read_size = std::min(static_cast<uint64_t>(buffer.size()),
312                                 src_stream_size - bytes_read);
313       TEST_AND_RETURN_FALSE(src_stream->Read(buffer.data(), read_size));
314       TEST_AND_RETURN_FALSE(dst_stream->Write(buffer.data(), read_size));
315       bytes_read += read_size;
316     }
317   } else if (FLAGS_operation == "puffdiff") {
318     auto dst_stream = FileStream::Open(FLAGS_dst_file, true, false);
319     TEST_AND_RETURN_FALSE(dst_stream);
320 
321     TEST_AND_RETURN_FALSE(LocateDeflatesBasedOnFileType(
322         src_stream, FLAGS_src_file, FLAGS_src_file_type, &src_deflates_bit));
323     TEST_AND_RETURN_FALSE(LocateDeflatesBasedOnFileType(
324         dst_stream, FLAGS_dst_file, FLAGS_dst_file_type, &dst_deflates_bit));
325 
326     if (src_deflates_bit.empty() && src_deflates_byte.empty()) {
327       LOG(WARNING) << "You should pass source deflates, is this intentional?";
328     }
329     if (dst_deflates_bit.empty() && dst_deflates_byte.empty()) {
330       LOG(WARNING) << "You should pass target deflates, is this intentional?";
331     }
332     if (!dst_extents.empty()) {
333       dst_stream =
334           ExtentStream::CreateForWrite(std::move(dst_stream), dst_extents);
335       TEST_AND_RETURN_FALSE(dst_stream);
336     }
337 
338     if (src_deflates_bit.empty()) {
339       TEST_AND_RETURN_FALSE(FindDeflateSubBlocks(src_stream, src_deflates_byte,
340                                                  &src_deflates_bit));
341     }
342 
343     if (dst_deflates_bit.empty()) {
344       TEST_AND_RETURN_FALSE(FindDeflateSubBlocks(dst_stream, dst_deflates_byte,
345                                                  &dst_deflates_bit));
346     }
347 
348     if (FLAGS_patch_algorithm != 0 && FLAGS_patch_algorithm != 1) {
349       LOG(ERROR)
350           << "The supported patch algorithms are 0: bsdiff, 1: zucchini.";
351       return false;
352     }
353     // TODO(xunchang) add flags to select the bsdiff compressors.
354     Buffer puffdiff_delta;
355     TEST_AND_RETURN_FALSE(puffin::PuffDiff(
356         std::move(src_stream), std::move(dst_stream), src_deflates_bit,
357         dst_deflates_bit,
358         {bsdiff::CompressorType::kBZ2, bsdiff::CompressorType::kBrotli},
359         static_cast<puffin::PatchAlgorithm>(FLAGS_patch_algorithm),
360         "/tmp/patch.tmp", &puffdiff_delta));
361     if (FLAGS_verbose) {
362       LOG(INFO) << "patch_size: " << puffdiff_delta.size();
363     }
364     auto patch_stream = FileStream::Open(FLAGS_patch_file, false, true);
365     TEST_AND_RETURN_FALSE(patch_stream);
366     TEST_AND_RETURN_FALSE(
367         patch_stream->Write(puffdiff_delta.data(), puffdiff_delta.size()));
368   } else if (FLAGS_operation == "puffpatch") {
369     auto patch_stream = FileStream::Open(FLAGS_patch_file, true, false);
370     TEST_AND_RETURN_FALSE(patch_stream);
371     uint64_t patch_size;
372     TEST_AND_RETURN_FALSE(patch_stream->GetSize(&patch_size));
373 
374     Buffer puffdiff_delta(patch_size);
375     TEST_AND_RETURN_FALSE(
376         patch_stream->Read(puffdiff_delta.data(), puffdiff_delta.size()));
377     auto dst_stream = FileStream::Open(FLAGS_dst_file, false, true);
378     TEST_AND_RETURN_FALSE(dst_stream);
379     if (!dst_extents.empty()) {
380       dst_stream =
381           ExtentStream::CreateForWrite(std::move(dst_stream), dst_extents);
382       TEST_AND_RETURN_FALSE(dst_stream);
383     }
384     // Apply the patch. Use 50MB cache, it should be enough for most of the
385     // operations.
386     TEST_AND_RETURN_FALSE(puffin::PuffPatch(
387         std::move(src_stream), std::move(dst_stream), puffdiff_delta.data(),
388         puffdiff_delta.size(), FLAGS_cache_size));
389   }
390 
391   if (FLAGS_verbose) {
392     LOG(INFO) << "src_deflates_byte: "
393               << puffin::ExtentsToString(src_deflates_byte);
394     LOG(INFO) << "dst_deflates_byte: "
395               << puffin::ExtentsToString(dst_deflates_byte);
396     LOG(INFO) << "src_deflates_bit: "
397               << puffin::ExtentsToString(src_deflates_bit);
398     LOG(INFO) << "dst_deflates_bit: "
399               << puffin::ExtentsToString(dst_deflates_bit);
400     LOG(INFO) << "src_puffs: " << puffin::ExtentsToString(src_puffs);
401     LOG(INFO) << "dst_puffs: " << puffin::ExtentsToString(dst_puffs);
402     LOG(INFO) << "src_extents: " << puffin::ExtentsToString(src_extents);
403     LOG(INFO) << "dst_extents: " << puffin::ExtentsToString(dst_extents);
404   }
405   return true;
406 }
407 
main(int argc,char ** argv)408 int main(int argc, char** argv) {
409   if (!Main(argc, argv)) {
410     return 1;
411   }
412   return 0;
413 }
414