1*a3a45f30SXin Li // Copyright 2017 The Chromium OS Authors. All rights reserved. 2*a3a45f30SXin Li // Use of this source code is governed by a BSD-style license that can be 3*a3a45f30SXin Li // found in the LICENSE file. 4*a3a45f30SXin Li 5*a3a45f30SXin Li #ifndef _BSDIFF_ENDSLEY_PATCH_WRITER_H_ 6*a3a45f30SXin Li #define _BSDIFF_ENDSLEY_PATCH_WRITER_H_ 7*a3a45f30SXin Li 8*a3a45f30SXin Li #include <memory> 9*a3a45f30SXin Li #include <string> 10*a3a45f30SXin Li #include <vector> 11*a3a45f30SXin Li 12*a3a45f30SXin Li #include "bsdiff/compressor_interface.h" 13*a3a45f30SXin Li #include "bsdiff/constants.h" 14*a3a45f30SXin Li #include "bsdiff/patch_writer_interface.h" 15*a3a45f30SXin Li 16*a3a45f30SXin Li namespace bsdiff { 17*a3a45f30SXin Li 18*a3a45f30SXin Li // A PatchWriterInterface class compatible with the format used by Android Play 19*a3a45f30SXin Li // Store's bsdiff implementation, which is based on Matthew Endsley's bsdiff 20*a3a45f30SXin Li // implementation. See https://github.com/mendsley/bsdiff for the original 21*a3a45f30SXin Li // implementation of this format. See also Google's APK patch size estimator for 22*a3a45f30SXin Li // more information on the file-by-file format used by Play Store: 23*a3a45f30SXin Li // https://github.com/googlesamples/apk-patch-size-estimator 24*a3a45f30SXin Li 25*a3a45f30SXin Li // This format, identified by the "ENDSLEY/BSDIFF43" magic string, uses a single 26*a3a45f30SXin Li // stream with the control entries, diff data and extra data interleaved. After 27*a3a45f30SXin Li // the header, each Control Entry is stored in 24 bytes followed by the diff 28*a3a45f30SXin Li // stream data for that entry only, and then followed by the extra stream data 29*a3a45f30SXin Li // for that entry only. The format doesn't handle the compression of the data, 30*a3a45f30SXin Li // instead, the whole file (including the magic string) is compressed with any 31*a3a45f30SXin Li // compression algorithm. 32*a3a45f30SXin Li 33*a3a45f30SXin Li // This format is easier to parse and allows the patch to be streamed, but by 34*a3a45f30SXin Li // mixing the diff and extra data into the same compression context offers a 35*a3a45f30SXin Li // slightly worse compression ratio (about 3.5% compared to upstream's format). 36*a3a45f30SXin Li 37*a3a45f30SXin Li class EndsleyPatchWriter : public PatchWriterInterface { 38*a3a45f30SXin Li public: 39*a3a45f30SXin Li // Create the patch writer that will write the data to the passed vector 40*a3a45f30SXin Li // |patch|, resizing it as needed. The |patch| vector must be valid until 41*a3a45f30SXin Li // Close() is called or this patch is destroyed. The data in |patch| will be 42*a3a45f30SXin Li // compressed using the compressor type |type|. EndsleyPatchWriter(std::vector<uint8_t> * patch,CompressorType type,int brotli_quality)43*a3a45f30SXin Li EndsleyPatchWriter(std::vector<uint8_t>* patch, 44*a3a45f30SXin Li CompressorType type, 45*a3a45f30SXin Li int brotli_quality) 46*a3a45f30SXin Li : patch_(patch), 47*a3a45f30SXin Li compressor_type_(type), 48*a3a45f30SXin Li brotli_quality_(brotli_quality) {} 49*a3a45f30SXin Li 50*a3a45f30SXin Li // PatchWriterInterface overrides. 51*a3a45f30SXin Li bool Init(size_t new_size) override; 52*a3a45f30SXin Li bool WriteDiffStream(const uint8_t* data, size_t size) override; 53*a3a45f30SXin Li bool WriteExtraStream(const uint8_t* data, size_t size) override; 54*a3a45f30SXin Li bool AddControlEntry(const ControlEntry& entry) override; 55*a3a45f30SXin Li bool Close() override; 56*a3a45f30SXin Li 57*a3a45f30SXin Li private: 58*a3a45f30SXin Li // Emit at the end of the |patch_| vector the passed control entry. 59*a3a45f30SXin Li void EmitControlEntry(const ControlEntry& entry); 60*a3a45f30SXin Li 61*a3a45f30SXin Li // Emit at the end of the |patch_| vector the passed buffer. 62*a3a45f30SXin Li void EmitBuffer(const uint8_t* data, size_t size); 63*a3a45f30SXin Li 64*a3a45f30SXin Li // Flush as much as possible of the pending data. 65*a3a45f30SXin Li void Flush(); 66*a3a45f30SXin Li 67*a3a45f30SXin Li // The vector we are writing to, owned by the caller. 68*a3a45f30SXin Li std::vector<uint8_t>* patch_; 69*a3a45f30SXin Li 70*a3a45f30SXin Li // The compressor type to use and its quality (if any). 71*a3a45f30SXin Li CompressorType compressor_type_; 72*a3a45f30SXin Li int brotli_quality_; 73*a3a45f30SXin Li 74*a3a45f30SXin Li std::unique_ptr<CompressorInterface> compressor_; 75*a3a45f30SXin Li 76*a3a45f30SXin Li // The pending diff and extra data to be encoded in the file. These vectors 77*a3a45f30SXin Li // would not be used whenever is possible to the data directly to the patch_ 78*a3a45f30SXin Li // vector; namely when the control, diff and extra stream data are provided in 79*a3a45f30SXin Li // that order for each control entry. 80*a3a45f30SXin Li std::vector<uint8_t> diff_data_; 81*a3a45f30SXin Li std::vector<uint8_t> extra_data_; 82*a3a45f30SXin Li std::vector<ControlEntry> control_; 83*a3a45f30SXin Li 84*a3a45f30SXin Li // Defined as the sum of all the diff_size and extra_size values in 85*a3a45f30SXin Li // |control_|. This is used to determine whether it is worth Flushing the 86*a3a45f30SXin Li // pending data. 87*a3a45f30SXin Li size_t pending_control_data_{0}; 88*a3a45f30SXin Li 89*a3a45f30SXin Li // Number of bytes in the diff and extra stream that are pending in the 90*a3a45f30SXin Li // last control entry encoded in the |patch_|. If both are zero the last 91*a3a45f30SXin Li // control entry was completely emitted. 92*a3a45f30SXin Li size_t pending_diff_{0}; 93*a3a45f30SXin Li size_t pending_extra_{0}; 94*a3a45f30SXin Li }; 95*a3a45f30SXin Li 96*a3a45f30SXin Li } // namespace bsdiff 97*a3a45f30SXin Li 98*a3a45f30SXin Li #endif // _BSDIFF_ENDSLEY_PATCH_WRITER_H_ 99