xref: /aosp_15_r20/external/puffin/src/puffin_stream.h (revision 07fb1d065b7cfb4729786fadd42a612532d2f466)
1*07fb1d06SElliott Hughes // Copyright 2017 The ChromiumOS Authors
2*07fb1d06SElliott Hughes // Use of this source code is governed by a BSD-style license that can be
3*07fb1d06SElliott Hughes // found in the LICENSE file.
4*07fb1d06SElliott Hughes 
5*07fb1d06SElliott Hughes #ifndef SRC_PUFFIN_STREAM_H_
6*07fb1d06SElliott Hughes #define SRC_PUFFIN_STREAM_H_
7*07fb1d06SElliott Hughes 
8*07fb1d06SElliott Hughes #include <filesystem>
9*07fb1d06SElliott Hughes #include <list>
10*07fb1d06SElliott Hughes #include <memory>
11*07fb1d06SElliott Hughes #include <set>
12*07fb1d06SElliott Hughes #include <unordered_map>
13*07fb1d06SElliott Hughes #include <utility>
14*07fb1d06SElliott Hughes #include <vector>
15*07fb1d06SElliott Hughes 
16*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/common.h"
17*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/huffer.h"
18*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/puffer.h"
19*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/stream.h"
20*07fb1d06SElliott Hughes 
21*07fb1d06SElliott Hughes namespace puffin {
22*07fb1d06SElliott Hughes 
23*07fb1d06SElliott Hughes class LRUCache {
24*07fb1d06SElliott Hughes  public:
25*07fb1d06SElliott Hughes   using Key = int;
26*07fb1d06SElliott Hughes   using Value = std::shared_ptr<Buffer>;
27*07fb1d06SElliott Hughes   typedef typename std::pair<Key, Value> key_value_pair_t;
28*07fb1d06SElliott Hughes   typedef typename std::list<key_value_pair_t>::iterator iterator;
29*07fb1d06SElliott Hughes 
30*07fb1d06SElliott Hughes   LRUCache(size_t max_size);
31*07fb1d06SElliott Hughes   ~LRUCache();
32*07fb1d06SElliott Hughes   LRUCache(const LRUCache&) = delete;
33*07fb1d06SElliott Hughes 
34*07fb1d06SElliott Hughes   void put(const Key& key, const Value& value);
35*07fb1d06SElliott Hughes 
36*07fb1d06SElliott Hughes   bool EvictLRUItem();
37*07fb1d06SElliott Hughes 
38*07fb1d06SElliott Hughes   // Ensure that cache has sufficient space for a new item of |size| bytes
39*07fb1d06SElliott Hughes   void EnsureSpace(size_t size);
40*07fb1d06SElliott Hughes 
41*07fb1d06SElliott Hughes   const Value get(const Key& key);
42*07fb1d06SElliott Hughes 
exists(const Key & key)43*07fb1d06SElliott Hughes   bool exists(const Key& key) const {
44*07fb1d06SElliott Hughes     return items_map_.find(key) != items_map_.end();
45*07fb1d06SElliott Hughes   }
46*07fb1d06SElliott Hughes 
size()47*07fb1d06SElliott Hughes   size_t size() const { return cache_size_; }
capacity()48*07fb1d06SElliott Hughes   size_t capacity() const { return max_size_; }
49*07fb1d06SElliott Hughes 
50*07fb1d06SElliott Hughes  private:
51*07fb1d06SElliott Hughes   bool WriteToDisk(const Key& key, const Value& value);
52*07fb1d06SElliott Hughes   Value ReadFromDisk(const Key& key);
53*07fb1d06SElliott Hughes   std::list<key_value_pair_t> items_list_;
54*07fb1d06SElliott Hughes   std::unordered_map<Key, iterator> items_map_;
55*07fb1d06SElliott Hughes   size_t cache_size_ = 0;
56*07fb1d06SElliott Hughes   size_t max_size_;
57*07fb1d06SElliott Hughes   std::filesystem::path tmpdir_;
58*07fb1d06SElliott Hughes   std::set<Key> ondisk_items_;
59*07fb1d06SElliott Hughes };
60*07fb1d06SElliott Hughes 
61*07fb1d06SElliott Hughes // A class for puffing a deflate stream and huffing into a deflate stream. The
62*07fb1d06SElliott Hughes // puff stream is "imaginary", which means it doesn't really exists; It is build
63*07fb1d06SElliott Hughes // and used on demand. This class uses a given deflate stream, and puffs the
64*07fb1d06SElliott Hughes // deflate buffers in the stream as needed or vice versa.  An object of this
65*07fb1d06SElliott Hughes // class can be used for reading and writing puff data but should not be used
66*07fb1d06SElliott Hughes // for both reading and writing using the same instance. In theory we can
67*07fb1d06SElliott Hughes // separate this class into two classes, namely |PuffStream| and |HuffStream|,
68*07fb1d06SElliott Hughes // but they are sharing a lot of codes which might be inconvenient and
69*07fb1d06SElliott Hughes // unnecessary to do so. In this implementation, there is no protection against
70*07fb1d06SElliott Hughes // reading and writing at the same time.
71*07fb1d06SElliott Hughes class PuffinStream : public StreamInterface {
72*07fb1d06SElliott Hughes  public:
73*07fb1d06SElliott Hughes   ~PuffinStream() override = default;
74*07fb1d06SElliott Hughes 
75*07fb1d06SElliott Hughes   // Creates a |PuffinStream| for reading puff buffers from a deflate stream.
76*07fb1d06SElliott Hughes   // |stream|    IN  The deflate stream.
77*07fb1d06SElliott Hughes   // |puffer|    IN  The |Puffer| used for puffing the stream.
78*07fb1d06SElliott Hughes   // |puff_size| IN  The size of the puff stream (assuming |stream| has been
79*07fb1d06SElliott Hughes   //                 completely puffed.
80*07fb1d06SElliott Hughes   // |deflates|  IN  The location of deflates in |stream|.
81*07fb1d06SElliott Hughes   // |puffs|     IN  The location of puffs into the final puff stream.
82*07fb1d06SElliott Hughes   // |max_cache_size| IN  The amount of memory to use for caching puff buffers.
83*07fb1d06SElliott Hughes   //                      If the mount is smaller than the maximum puff buffer
84*07fb1d06SElliott Hughes   //                      size in |puffs|, then its value will be set to zero
85*07fb1d06SElliott Hughes   //                      and no puff will be cached.
86*07fb1d06SElliott Hughes   static UniqueStreamPtr CreateForPuff(UniqueStreamPtr stream,
87*07fb1d06SElliott Hughes                                        std::shared_ptr<Puffer> puffer,
88*07fb1d06SElliott Hughes                                        uint64_t puff_size,
89*07fb1d06SElliott Hughes                                        const std::vector<BitExtent>& deflates,
90*07fb1d06SElliott Hughes                                        const std::vector<ByteExtent>& puffs,
91*07fb1d06SElliott Hughes                                        size_t max_cache_size = 0);
92*07fb1d06SElliott Hughes 
93*07fb1d06SElliott Hughes   // Creates a |PuffinStream| for writing puff buffers into a deflate stream.
94*07fb1d06SElliott Hughes   // |stream|    IN  The deflate stream.
95*07fb1d06SElliott Hughes   // |huffer|    IN  The |Huffer| used for huffing into the |stream|.
96*07fb1d06SElliott Hughes   // |puff_size| IN  The size of the puff stream (assuming |stream| has been
97*07fb1d06SElliott Hughes   //                 completely puffed.
98*07fb1d06SElliott Hughes   // |deflates|  IN  The location of deflates in |stream|.
99*07fb1d06SElliott Hughes   // |puffs|     IN  The location of puffs into the input puff stream.
100*07fb1d06SElliott Hughes   static UniqueStreamPtr CreateForHuff(UniqueStreamPtr stream,
101*07fb1d06SElliott Hughes                                        std::shared_ptr<Huffer> huffer,
102*07fb1d06SElliott Hughes                                        uint64_t puff_size,
103*07fb1d06SElliott Hughes                                        const std::vector<BitExtent>& deflates,
104*07fb1d06SElliott Hughes                                        const std::vector<ByteExtent>& puffs);
105*07fb1d06SElliott Hughes 
106*07fb1d06SElliott Hughes   bool GetSize(uint64_t* size) const override;
107*07fb1d06SElliott Hughes 
108*07fb1d06SElliott Hughes   // Returns the current offset in the imaginary puff stream.
109*07fb1d06SElliott Hughes   bool GetOffset(uint64_t* offset) const override;
110*07fb1d06SElliott Hughes 
111*07fb1d06SElliott Hughes   // Sets the current offset in the imaginary puff stream.
112*07fb1d06SElliott Hughes   bool Seek(uint64_t offset) override;
113*07fb1d06SElliott Hughes 
114*07fb1d06SElliott Hughes   // Reads from the deflate stream |stream_| and writes the puff stream into
115*07fb1d06SElliott Hughes   // |buffer|.
116*07fb1d06SElliott Hughes   bool Read(void* buffer, size_t length) override;
117*07fb1d06SElliott Hughes 
118*07fb1d06SElliott Hughes   // Reads the puff stream from |buffer|, huffs it and writes it into the
119*07fb1d06SElliott Hughes   // deflate stream |stream_|. The current assumption for write is that data is
120*07fb1d06SElliott Hughes   // wrote from beginning to end with no retraction or random change of offset.
121*07fb1d06SElliott Hughes   // This function, writes non-puff data directly to |stream_| and caches the
122*07fb1d06SElliott Hughes   // puff data into |puff_buffer_|. When |puff_buffer_| is full, it huffs it
123*07fb1d06SElliott Hughes   // into |deflate_buffer_| and writes it to |stream_|.
124*07fb1d06SElliott Hughes   bool Write(const void* buffer, size_t length) override;
125*07fb1d06SElliott Hughes 
126*07fb1d06SElliott Hughes   bool Close() override;
127*07fb1d06SElliott Hughes 
128*07fb1d06SElliott Hughes  protected:
129*07fb1d06SElliott Hughes   // The non-public internal Ctor.
130*07fb1d06SElliott Hughes   PuffinStream(UniqueStreamPtr stream,
131*07fb1d06SElliott Hughes                std::shared_ptr<Puffer> puffer,
132*07fb1d06SElliott Hughes                std::shared_ptr<Huffer> huffer,
133*07fb1d06SElliott Hughes                uint64_t puff_size,
134*07fb1d06SElliott Hughes                const std::vector<BitExtent>& deflates,
135*07fb1d06SElliott Hughes                const std::vector<ByteExtent>& puffs,
136*07fb1d06SElliott Hughes                size_t max_cache_size);
137*07fb1d06SElliott Hughes 
138*07fb1d06SElliott Hughes  private:
139*07fb1d06SElliott Hughes   // See |extra_byte_|.
140*07fb1d06SElliott Hughes   bool SetExtraByte();
141*07fb1d06SElliott Hughes 
142*07fb1d06SElliott Hughes   // Returns the cache for the |puff_id|th puff. If it does not find it, either
143*07fb1d06SElliott Hughes   // returns the least accessed cached (if cache is full) or creates a new empty
144*07fb1d06SElliott Hughes   // buffer. It returns false if it cannot find the |puff_id|th puff cache.
145*07fb1d06SElliott Hughes   bool GetPuffCache(int puff_id,
146*07fb1d06SElliott Hughes                     uint64_t puff_size,
147*07fb1d06SElliott Hughes                     std::shared_ptr<Buffer>* buffer);
148*07fb1d06SElliott Hughes 
149*07fb1d06SElliott Hughes   UniqueStreamPtr stream_;
150*07fb1d06SElliott Hughes 
151*07fb1d06SElliott Hughes   std::shared_ptr<Puffer> puffer_;
152*07fb1d06SElliott Hughes   std::shared_ptr<Huffer> huffer_;
153*07fb1d06SElliott Hughes 
154*07fb1d06SElliott Hughes   // The size of the imaginary puff stream.
155*07fb1d06SElliott Hughes   uint64_t puff_stream_size_;
156*07fb1d06SElliott Hughes 
157*07fb1d06SElliott Hughes   std::vector<BitExtent> deflates_;
158*07fb1d06SElliott Hughes   // The current deflate is being processed.
159*07fb1d06SElliott Hughes   std::vector<BitExtent>::iterator cur_deflate_;
160*07fb1d06SElliott Hughes 
161*07fb1d06SElliott Hughes   std::vector<ByteExtent> puffs_;
162*07fb1d06SElliott Hughes   // The current puff is being processed.
163*07fb1d06SElliott Hughes   std::vector<ByteExtent>::iterator cur_puff_;
164*07fb1d06SElliott Hughes 
165*07fb1d06SElliott Hughes   std::vector<uint64_t> upper_bounds_;
166*07fb1d06SElliott Hughes 
167*07fb1d06SElliott Hughes   // The current offset in the imaginary puff stream is |puff_pos_| +
168*07fb1d06SElliott Hughes   // |skip_bytes_|
169*07fb1d06SElliott Hughes   uint64_t puff_pos_;
170*07fb1d06SElliott Hughes   uint64_t skip_bytes_;
171*07fb1d06SElliott Hughes 
172*07fb1d06SElliott Hughes   // The current bit offset in |stream_|.
173*07fb1d06SElliott Hughes   uint64_t deflate_bit_pos_;
174*07fb1d06SElliott Hughes 
175*07fb1d06SElliott Hughes   // This value caches the first or last byte of a deflate stream. This is
176*07fb1d06SElliott Hughes   // needed when two deflate stream end on the same byte (with greater than zero
177*07fb1d06SElliott Hughes   // bit offset difference) or a deflate starts from middle of the byte. We need
178*07fb1d06SElliott Hughes   // to cache the value in here before we have the rest of the puff buffer to
179*07fb1d06SElliott Hughes   // make the deflate.
180*07fb1d06SElliott Hughes   uint8_t last_byte_;
181*07fb1d06SElliott Hughes 
182*07fb1d06SElliott Hughes   // We have to figure out if we need to cache an extra puff byte for the last
183*07fb1d06SElliott Hughes   // byte of the deflate. This is only needed if the last bit of the current
184*07fb1d06SElliott Hughes   // deflate is not in the same byte as the first bit of the next deflate. The
185*07fb1d06SElliott Hughes   // value is either 0 or 1. If 1.
186*07fb1d06SElliott Hughes   size_t extra_byte_;
187*07fb1d06SElliott Hughes 
188*07fb1d06SElliott Hughes   // True if the stream is only for puffing. False if for huffing.
189*07fb1d06SElliott Hughes   bool is_for_puff_;
190*07fb1d06SElliott Hughes 
191*07fb1d06SElliott Hughes   // True if the |Close()| is called.
192*07fb1d06SElliott Hughes   bool closed_;
193*07fb1d06SElliott Hughes 
194*07fb1d06SElliott Hughes   std::unique_ptr<Buffer> deflate_buffer_;
195*07fb1d06SElliott Hughes   std::shared_ptr<Buffer> puff_buffer_;
196*07fb1d06SElliott Hughes 
197*07fb1d06SElliott Hughes   // The LRU cache for holding puff data
198*07fb1d06SElliott Hughes   LRUCache lru_cache_;
199*07fb1d06SElliott Hughes 
200*07fb1d06SElliott Hughes   DISALLOW_COPY_AND_ASSIGN(PuffinStream);
201*07fb1d06SElliott Hughes };
202*07fb1d06SElliott Hughes 
203*07fb1d06SElliott Hughes }  // namespace puffin
204*07fb1d06SElliott Hughes 
205*07fb1d06SElliott Hughes #endif  // SRC_PUFFIN_STREAM_H_
206