xref: /aosp_15_r20/external/bsdiff/endsley_patch_writer.h (revision a3a45f308bd90ef1a6e6a5e8fb92fe449b895909)
1*a3a45f30SXin Li // Copyright 2017 The Chromium OS Authors. All rights reserved.
2*a3a45f30SXin Li // Use of this source code is governed by a BSD-style license that can be
3*a3a45f30SXin Li // found in the LICENSE file.
4*a3a45f30SXin Li 
5*a3a45f30SXin Li #ifndef _BSDIFF_ENDSLEY_PATCH_WRITER_H_
6*a3a45f30SXin Li #define _BSDIFF_ENDSLEY_PATCH_WRITER_H_
7*a3a45f30SXin Li 
8*a3a45f30SXin Li #include <memory>
9*a3a45f30SXin Li #include <string>
10*a3a45f30SXin Li #include <vector>
11*a3a45f30SXin Li 
12*a3a45f30SXin Li #include "bsdiff/compressor_interface.h"
13*a3a45f30SXin Li #include "bsdiff/constants.h"
14*a3a45f30SXin Li #include "bsdiff/patch_writer_interface.h"
15*a3a45f30SXin Li 
16*a3a45f30SXin Li namespace bsdiff {
17*a3a45f30SXin Li 
18*a3a45f30SXin Li // A PatchWriterInterface class compatible with the format used by Android Play
19*a3a45f30SXin Li // Store's bsdiff implementation, which is based on Matthew Endsley's bsdiff
20*a3a45f30SXin Li // implementation. See https://github.com/mendsley/bsdiff for the original
21*a3a45f30SXin Li // implementation of this format. See also Google's APK patch size estimator for
22*a3a45f30SXin Li // more information on the file-by-file format used by Play Store:
23*a3a45f30SXin Li // https://github.com/googlesamples/apk-patch-size-estimator
24*a3a45f30SXin Li 
25*a3a45f30SXin Li // This format, identified by the "ENDSLEY/BSDIFF43" magic string, uses a single
26*a3a45f30SXin Li // stream with the control entries, diff data and extra data interleaved. After
27*a3a45f30SXin Li // the header, each Control Entry is stored in 24 bytes followed by the diff
28*a3a45f30SXin Li // stream data for that entry only, and then followed by the extra stream data
29*a3a45f30SXin Li // for that entry only. The format doesn't handle the compression of the data,
30*a3a45f30SXin Li // instead, the whole file (including the magic string) is compressed with any
31*a3a45f30SXin Li // compression algorithm.
32*a3a45f30SXin Li 
33*a3a45f30SXin Li // This format is easier to parse and allows the patch to be streamed, but by
34*a3a45f30SXin Li // mixing the diff and extra data into the same compression context offers a
35*a3a45f30SXin Li // slightly worse compression ratio (about 3.5% compared to upstream's format).
36*a3a45f30SXin Li 
37*a3a45f30SXin Li class EndsleyPatchWriter : public PatchWriterInterface {
38*a3a45f30SXin Li  public:
39*a3a45f30SXin Li   // Create the patch writer that will write the data to the passed vector
40*a3a45f30SXin Li   // |patch|, resizing it as needed. The |patch| vector must be valid until
41*a3a45f30SXin Li   // Close() is called or this patch is destroyed. The data in |patch| will be
42*a3a45f30SXin Li   // compressed using the compressor type |type|.
EndsleyPatchWriter(std::vector<uint8_t> * patch,CompressorType type,int brotli_quality)43*a3a45f30SXin Li   EndsleyPatchWriter(std::vector<uint8_t>* patch,
44*a3a45f30SXin Li                      CompressorType type,
45*a3a45f30SXin Li                      int brotli_quality)
46*a3a45f30SXin Li       : patch_(patch),
47*a3a45f30SXin Li         compressor_type_(type),
48*a3a45f30SXin Li         brotli_quality_(brotli_quality) {}
49*a3a45f30SXin Li 
50*a3a45f30SXin Li   // PatchWriterInterface overrides.
51*a3a45f30SXin Li   bool Init(size_t new_size) override;
52*a3a45f30SXin Li   bool WriteDiffStream(const uint8_t* data, size_t size) override;
53*a3a45f30SXin Li   bool WriteExtraStream(const uint8_t* data, size_t size) override;
54*a3a45f30SXin Li   bool AddControlEntry(const ControlEntry& entry) override;
55*a3a45f30SXin Li   bool Close() override;
56*a3a45f30SXin Li 
57*a3a45f30SXin Li  private:
58*a3a45f30SXin Li   // Emit at the end of the |patch_| vector the passed control entry.
59*a3a45f30SXin Li   void EmitControlEntry(const ControlEntry& entry);
60*a3a45f30SXin Li 
61*a3a45f30SXin Li   // Emit at the end of the |patch_| vector the passed buffer.
62*a3a45f30SXin Li   void EmitBuffer(const uint8_t* data, size_t size);
63*a3a45f30SXin Li 
64*a3a45f30SXin Li   // Flush as much as possible of the pending data.
65*a3a45f30SXin Li   void Flush();
66*a3a45f30SXin Li 
67*a3a45f30SXin Li   // The vector we are writing to, owned by the caller.
68*a3a45f30SXin Li   std::vector<uint8_t>* patch_;
69*a3a45f30SXin Li 
70*a3a45f30SXin Li   // The compressor type to use and its quality (if any).
71*a3a45f30SXin Li   CompressorType compressor_type_;
72*a3a45f30SXin Li   int brotli_quality_;
73*a3a45f30SXin Li 
74*a3a45f30SXin Li   std::unique_ptr<CompressorInterface> compressor_;
75*a3a45f30SXin Li 
76*a3a45f30SXin Li   // The pending diff and extra data to be encoded in the file. These vectors
77*a3a45f30SXin Li   // would not be used whenever is possible to the data directly to the patch_
78*a3a45f30SXin Li   // vector; namely when the control, diff and extra stream data are provided in
79*a3a45f30SXin Li   // that order for each control entry.
80*a3a45f30SXin Li   std::vector<uint8_t> diff_data_;
81*a3a45f30SXin Li   std::vector<uint8_t> extra_data_;
82*a3a45f30SXin Li   std::vector<ControlEntry> control_;
83*a3a45f30SXin Li 
84*a3a45f30SXin Li   // Defined as the sum of all the diff_size and extra_size values in
85*a3a45f30SXin Li   // |control_|. This is used to determine whether it is worth Flushing the
86*a3a45f30SXin Li   // pending data.
87*a3a45f30SXin Li   size_t pending_control_data_{0};
88*a3a45f30SXin Li 
89*a3a45f30SXin Li   // Number of bytes in the diff and extra stream that are pending in the
90*a3a45f30SXin Li   // last control entry encoded in the |patch_|. If both are zero the last
91*a3a45f30SXin Li   // control entry was completely emitted.
92*a3a45f30SXin Li   size_t pending_diff_{0};
93*a3a45f30SXin Li   size_t pending_extra_{0};
94*a3a45f30SXin Li };
95*a3a45f30SXin Li 
96*a3a45f30SXin Li }  // namespace bsdiff
97*a3a45f30SXin Li 
98*a3a45f30SXin Li #endif  // _BSDIFF_ENDSLEY_PATCH_WRITER_H_
99