xref: /aosp_15_r20/external/puffin/src/puffin_stream.h (revision 07fb1d065b7cfb4729786fadd42a612532d2f466)
1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef SRC_PUFFIN_STREAM_H_
6 #define SRC_PUFFIN_STREAM_H_
7 
8 #include <filesystem>
9 #include <list>
10 #include <memory>
11 #include <set>
12 #include <unordered_map>
13 #include <utility>
14 #include <vector>
15 
16 #include "puffin/src/include/puffin/common.h"
17 #include "puffin/src/include/puffin/huffer.h"
18 #include "puffin/src/include/puffin/puffer.h"
19 #include "puffin/src/include/puffin/stream.h"
20 
21 namespace puffin {
22 
23 class LRUCache {
24  public:
25   using Key = int;
26   using Value = std::shared_ptr<Buffer>;
27   typedef typename std::pair<Key, Value> key_value_pair_t;
28   typedef typename std::list<key_value_pair_t>::iterator iterator;
29 
30   LRUCache(size_t max_size);
31   ~LRUCache();
32   LRUCache(const LRUCache&) = delete;
33 
34   void put(const Key& key, const Value& value);
35 
36   bool EvictLRUItem();
37 
38   // Ensure that cache has sufficient space for a new item of |size| bytes
39   void EnsureSpace(size_t size);
40 
41   const Value get(const Key& key);
42 
exists(const Key & key)43   bool exists(const Key& key) const {
44     return items_map_.find(key) != items_map_.end();
45   }
46 
size()47   size_t size() const { return cache_size_; }
capacity()48   size_t capacity() const { return max_size_; }
49 
50  private:
51   bool WriteToDisk(const Key& key, const Value& value);
52   Value ReadFromDisk(const Key& key);
53   std::list<key_value_pair_t> items_list_;
54   std::unordered_map<Key, iterator> items_map_;
55   size_t cache_size_ = 0;
56   size_t max_size_;
57   std::filesystem::path tmpdir_;
58   std::set<Key> ondisk_items_;
59 };
60 
61 // A class for puffing a deflate stream and huffing into a deflate stream. The
62 // puff stream is "imaginary", which means it doesn't really exists; It is build
63 // and used on demand. This class uses a given deflate stream, and puffs the
64 // deflate buffers in the stream as needed or vice versa.  An object of this
65 // class can be used for reading and writing puff data but should not be used
66 // for both reading and writing using the same instance. In theory we can
67 // separate this class into two classes, namely |PuffStream| and |HuffStream|,
68 // but they are sharing a lot of codes which might be inconvenient and
69 // unnecessary to do so. In this implementation, there is no protection against
70 // reading and writing at the same time.
71 class PuffinStream : public StreamInterface {
72  public:
73   ~PuffinStream() override = default;
74 
75   // Creates a |PuffinStream| for reading puff buffers from a deflate stream.
76   // |stream|    IN  The deflate stream.
77   // |puffer|    IN  The |Puffer| used for puffing the stream.
78   // |puff_size| IN  The size of the puff stream (assuming |stream| has been
79   //                 completely puffed.
80   // |deflates|  IN  The location of deflates in |stream|.
81   // |puffs|     IN  The location of puffs into the final puff stream.
82   // |max_cache_size| IN  The amount of memory to use for caching puff buffers.
83   //                      If the mount is smaller than the maximum puff buffer
84   //                      size in |puffs|, then its value will be set to zero
85   //                      and no puff will be cached.
86   static UniqueStreamPtr CreateForPuff(UniqueStreamPtr stream,
87                                        std::shared_ptr<Puffer> puffer,
88                                        uint64_t puff_size,
89                                        const std::vector<BitExtent>& deflates,
90                                        const std::vector<ByteExtent>& puffs,
91                                        size_t max_cache_size = 0);
92 
93   // Creates a |PuffinStream| for writing puff buffers into a deflate stream.
94   // |stream|    IN  The deflate stream.
95   // |huffer|    IN  The |Huffer| used for huffing into the |stream|.
96   // |puff_size| IN  The size of the puff stream (assuming |stream| has been
97   //                 completely puffed.
98   // |deflates|  IN  The location of deflates in |stream|.
99   // |puffs|     IN  The location of puffs into the input puff stream.
100   static UniqueStreamPtr CreateForHuff(UniqueStreamPtr stream,
101                                        std::shared_ptr<Huffer> huffer,
102                                        uint64_t puff_size,
103                                        const std::vector<BitExtent>& deflates,
104                                        const std::vector<ByteExtent>& puffs);
105 
106   bool GetSize(uint64_t* size) const override;
107 
108   // Returns the current offset in the imaginary puff stream.
109   bool GetOffset(uint64_t* offset) const override;
110 
111   // Sets the current offset in the imaginary puff stream.
112   bool Seek(uint64_t offset) override;
113 
114   // Reads from the deflate stream |stream_| and writes the puff stream into
115   // |buffer|.
116   bool Read(void* buffer, size_t length) override;
117 
118   // Reads the puff stream from |buffer|, huffs it and writes it into the
119   // deflate stream |stream_|. The current assumption for write is that data is
120   // wrote from beginning to end with no retraction or random change of offset.
121   // This function, writes non-puff data directly to |stream_| and caches the
122   // puff data into |puff_buffer_|. When |puff_buffer_| is full, it huffs it
123   // into |deflate_buffer_| and writes it to |stream_|.
124   bool Write(const void* buffer, size_t length) override;
125 
126   bool Close() override;
127 
128  protected:
129   // The non-public internal Ctor.
130   PuffinStream(UniqueStreamPtr stream,
131                std::shared_ptr<Puffer> puffer,
132                std::shared_ptr<Huffer> huffer,
133                uint64_t puff_size,
134                const std::vector<BitExtent>& deflates,
135                const std::vector<ByteExtent>& puffs,
136                size_t max_cache_size);
137 
138  private:
139   // See |extra_byte_|.
140   bool SetExtraByte();
141 
142   // Returns the cache for the |puff_id|th puff. If it does not find it, either
143   // returns the least accessed cached (if cache is full) or creates a new empty
144   // buffer. It returns false if it cannot find the |puff_id|th puff cache.
145   bool GetPuffCache(int puff_id,
146                     uint64_t puff_size,
147                     std::shared_ptr<Buffer>* buffer);
148 
149   UniqueStreamPtr stream_;
150 
151   std::shared_ptr<Puffer> puffer_;
152   std::shared_ptr<Huffer> huffer_;
153 
154   // The size of the imaginary puff stream.
155   uint64_t puff_stream_size_;
156 
157   std::vector<BitExtent> deflates_;
158   // The current deflate is being processed.
159   std::vector<BitExtent>::iterator cur_deflate_;
160 
161   std::vector<ByteExtent> puffs_;
162   // The current puff is being processed.
163   std::vector<ByteExtent>::iterator cur_puff_;
164 
165   std::vector<uint64_t> upper_bounds_;
166 
167   // The current offset in the imaginary puff stream is |puff_pos_| +
168   // |skip_bytes_|
169   uint64_t puff_pos_;
170   uint64_t skip_bytes_;
171 
172   // The current bit offset in |stream_|.
173   uint64_t deflate_bit_pos_;
174 
175   // This value caches the first or last byte of a deflate stream. This is
176   // needed when two deflate stream end on the same byte (with greater than zero
177   // bit offset difference) or a deflate starts from middle of the byte. We need
178   // to cache the value in here before we have the rest of the puff buffer to
179   // make the deflate.
180   uint8_t last_byte_;
181 
182   // We have to figure out if we need to cache an extra puff byte for the last
183   // byte of the deflate. This is only needed if the last bit of the current
184   // deflate is not in the same byte as the first bit of the next deflate. The
185   // value is either 0 or 1. If 1.
186   size_t extra_byte_;
187 
188   // True if the stream is only for puffing. False if for huffing.
189   bool is_for_puff_;
190 
191   // True if the |Close()| is called.
192   bool closed_;
193 
194   std::unique_ptr<Buffer> deflate_buffer_;
195   std::shared_ptr<Buffer> puff_buffer_;
196 
197   // The LRU cache for holding puff data
198   LRUCache lru_cache_;
199 
200   DISALLOW_COPY_AND_ASSIGN(PuffinStream);
201 };
202 
203 }  // namespace puffin
204 
205 #endif  // SRC_PUFFIN_STREAM_H_
206