1 // Copyright 2017 The ChromiumOS Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef SRC_PUFFIN_STREAM_H_ 6 #define SRC_PUFFIN_STREAM_H_ 7 8 #include <filesystem> 9 #include <list> 10 #include <memory> 11 #include <set> 12 #include <unordered_map> 13 #include <utility> 14 #include <vector> 15 16 #include "puffin/src/include/puffin/common.h" 17 #include "puffin/src/include/puffin/huffer.h" 18 #include "puffin/src/include/puffin/puffer.h" 19 #include "puffin/src/include/puffin/stream.h" 20 21 namespace puffin { 22 23 class LRUCache { 24 public: 25 using Key = int; 26 using Value = std::shared_ptr<Buffer>; 27 typedef typename std::pair<Key, Value> key_value_pair_t; 28 typedef typename std::list<key_value_pair_t>::iterator iterator; 29 30 LRUCache(size_t max_size); 31 ~LRUCache(); 32 LRUCache(const LRUCache&) = delete; 33 34 void put(const Key& key, const Value& value); 35 36 bool EvictLRUItem(); 37 38 // Ensure that cache has sufficient space for a new item of |size| bytes 39 void EnsureSpace(size_t size); 40 41 const Value get(const Key& key); 42 exists(const Key & key)43 bool exists(const Key& key) const { 44 return items_map_.find(key) != items_map_.end(); 45 } 46 size()47 size_t size() const { return cache_size_; } capacity()48 size_t capacity() const { return max_size_; } 49 50 private: 51 bool WriteToDisk(const Key& key, const Value& value); 52 Value ReadFromDisk(const Key& key); 53 std::list<key_value_pair_t> items_list_; 54 std::unordered_map<Key, iterator> items_map_; 55 size_t cache_size_ = 0; 56 size_t max_size_; 57 std::filesystem::path tmpdir_; 58 std::set<Key> ondisk_items_; 59 }; 60 61 // A class for puffing a deflate stream and huffing into a deflate stream. The 62 // puff stream is "imaginary", which means it doesn't really exists; It is build 63 // and used on demand. This class uses a given deflate stream, and puffs the 64 // deflate buffers in the stream as needed or vice versa. An object of this 65 // class can be used for reading and writing puff data but should not be used 66 // for both reading and writing using the same instance. In theory we can 67 // separate this class into two classes, namely |PuffStream| and |HuffStream|, 68 // but they are sharing a lot of codes which might be inconvenient and 69 // unnecessary to do so. In this implementation, there is no protection against 70 // reading and writing at the same time. 71 class PuffinStream : public StreamInterface { 72 public: 73 ~PuffinStream() override = default; 74 75 // Creates a |PuffinStream| for reading puff buffers from a deflate stream. 76 // |stream| IN The deflate stream. 77 // |puffer| IN The |Puffer| used for puffing the stream. 78 // |puff_size| IN The size of the puff stream (assuming |stream| has been 79 // completely puffed. 80 // |deflates| IN The location of deflates in |stream|. 81 // |puffs| IN The location of puffs into the final puff stream. 82 // |max_cache_size| IN The amount of memory to use for caching puff buffers. 83 // If the mount is smaller than the maximum puff buffer 84 // size in |puffs|, then its value will be set to zero 85 // and no puff will be cached. 86 static UniqueStreamPtr CreateForPuff(UniqueStreamPtr stream, 87 std::shared_ptr<Puffer> puffer, 88 uint64_t puff_size, 89 const std::vector<BitExtent>& deflates, 90 const std::vector<ByteExtent>& puffs, 91 size_t max_cache_size = 0); 92 93 // Creates a |PuffinStream| for writing puff buffers into a deflate stream. 94 // |stream| IN The deflate stream. 95 // |huffer| IN The |Huffer| used for huffing into the |stream|. 96 // |puff_size| IN The size of the puff stream (assuming |stream| has been 97 // completely puffed. 98 // |deflates| IN The location of deflates in |stream|. 99 // |puffs| IN The location of puffs into the input puff stream. 100 static UniqueStreamPtr CreateForHuff(UniqueStreamPtr stream, 101 std::shared_ptr<Huffer> huffer, 102 uint64_t puff_size, 103 const std::vector<BitExtent>& deflates, 104 const std::vector<ByteExtent>& puffs); 105 106 bool GetSize(uint64_t* size) const override; 107 108 // Returns the current offset in the imaginary puff stream. 109 bool GetOffset(uint64_t* offset) const override; 110 111 // Sets the current offset in the imaginary puff stream. 112 bool Seek(uint64_t offset) override; 113 114 // Reads from the deflate stream |stream_| and writes the puff stream into 115 // |buffer|. 116 bool Read(void* buffer, size_t length) override; 117 118 // Reads the puff stream from |buffer|, huffs it and writes it into the 119 // deflate stream |stream_|. The current assumption for write is that data is 120 // wrote from beginning to end with no retraction or random change of offset. 121 // This function, writes non-puff data directly to |stream_| and caches the 122 // puff data into |puff_buffer_|. When |puff_buffer_| is full, it huffs it 123 // into |deflate_buffer_| and writes it to |stream_|. 124 bool Write(const void* buffer, size_t length) override; 125 126 bool Close() override; 127 128 protected: 129 // The non-public internal Ctor. 130 PuffinStream(UniqueStreamPtr stream, 131 std::shared_ptr<Puffer> puffer, 132 std::shared_ptr<Huffer> huffer, 133 uint64_t puff_size, 134 const std::vector<BitExtent>& deflates, 135 const std::vector<ByteExtent>& puffs, 136 size_t max_cache_size); 137 138 private: 139 // See |extra_byte_|. 140 bool SetExtraByte(); 141 142 // Returns the cache for the |puff_id|th puff. If it does not find it, either 143 // returns the least accessed cached (if cache is full) or creates a new empty 144 // buffer. It returns false if it cannot find the |puff_id|th puff cache. 145 bool GetPuffCache(int puff_id, 146 uint64_t puff_size, 147 std::shared_ptr<Buffer>* buffer); 148 149 UniqueStreamPtr stream_; 150 151 std::shared_ptr<Puffer> puffer_; 152 std::shared_ptr<Huffer> huffer_; 153 154 // The size of the imaginary puff stream. 155 uint64_t puff_stream_size_; 156 157 std::vector<BitExtent> deflates_; 158 // The current deflate is being processed. 159 std::vector<BitExtent>::iterator cur_deflate_; 160 161 std::vector<ByteExtent> puffs_; 162 // The current puff is being processed. 163 std::vector<ByteExtent>::iterator cur_puff_; 164 165 std::vector<uint64_t> upper_bounds_; 166 167 // The current offset in the imaginary puff stream is |puff_pos_| + 168 // |skip_bytes_| 169 uint64_t puff_pos_; 170 uint64_t skip_bytes_; 171 172 // The current bit offset in |stream_|. 173 uint64_t deflate_bit_pos_; 174 175 // This value caches the first or last byte of a deflate stream. This is 176 // needed when two deflate stream end on the same byte (with greater than zero 177 // bit offset difference) or a deflate starts from middle of the byte. We need 178 // to cache the value in here before we have the rest of the puff buffer to 179 // make the deflate. 180 uint8_t last_byte_; 181 182 // We have to figure out if we need to cache an extra puff byte for the last 183 // byte of the deflate. This is only needed if the last bit of the current 184 // deflate is not in the same byte as the first bit of the next deflate. The 185 // value is either 0 or 1. If 1. 186 size_t extra_byte_; 187 188 // True if the stream is only for puffing. False if for huffing. 189 bool is_for_puff_; 190 191 // True if the |Close()| is called. 192 bool closed_; 193 194 std::unique_ptr<Buffer> deflate_buffer_; 195 std::shared_ptr<Buffer> puff_buffer_; 196 197 // The LRU cache for holding puff data 198 LRUCache lru_cache_; 199 200 DISALLOW_COPY_AND_ASSIGN(PuffinStream); 201 }; 202 203 } // namespace puffin 204 205 #endif // SRC_PUFFIN_STREAM_H_ 206