1*07fb1d06SElliott Hughes // Copyright 2017 The ChromiumOS Authors 2*07fb1d06SElliott Hughes // Use of this source code is governed by a BSD-style license that can be 3*07fb1d06SElliott Hughes // found in the LICENSE file. 4*07fb1d06SElliott Hughes 5*07fb1d06SElliott Hughes #ifndef SRC_PUFFIN_STREAM_H_ 6*07fb1d06SElliott Hughes #define SRC_PUFFIN_STREAM_H_ 7*07fb1d06SElliott Hughes 8*07fb1d06SElliott Hughes #include <filesystem> 9*07fb1d06SElliott Hughes #include <list> 10*07fb1d06SElliott Hughes #include <memory> 11*07fb1d06SElliott Hughes #include <set> 12*07fb1d06SElliott Hughes #include <unordered_map> 13*07fb1d06SElliott Hughes #include <utility> 14*07fb1d06SElliott Hughes #include <vector> 15*07fb1d06SElliott Hughes 16*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/common.h" 17*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/huffer.h" 18*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/puffer.h" 19*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/stream.h" 20*07fb1d06SElliott Hughes 21*07fb1d06SElliott Hughes namespace puffin { 22*07fb1d06SElliott Hughes 23*07fb1d06SElliott Hughes class LRUCache { 24*07fb1d06SElliott Hughes public: 25*07fb1d06SElliott Hughes using Key = int; 26*07fb1d06SElliott Hughes using Value = std::shared_ptr<Buffer>; 27*07fb1d06SElliott Hughes typedef typename std::pair<Key, Value> key_value_pair_t; 28*07fb1d06SElliott Hughes typedef typename std::list<key_value_pair_t>::iterator iterator; 29*07fb1d06SElliott Hughes 30*07fb1d06SElliott Hughes LRUCache(size_t max_size); 31*07fb1d06SElliott Hughes ~LRUCache(); 32*07fb1d06SElliott Hughes LRUCache(const LRUCache&) = delete; 33*07fb1d06SElliott Hughes 34*07fb1d06SElliott Hughes void put(const Key& key, const Value& value); 35*07fb1d06SElliott Hughes 36*07fb1d06SElliott Hughes bool EvictLRUItem(); 37*07fb1d06SElliott Hughes 38*07fb1d06SElliott Hughes // Ensure that cache has sufficient space for a new item of |size| bytes 39*07fb1d06SElliott Hughes void EnsureSpace(size_t size); 40*07fb1d06SElliott Hughes 41*07fb1d06SElliott Hughes const Value get(const Key& key); 42*07fb1d06SElliott Hughes exists(const Key & key)43*07fb1d06SElliott Hughes bool exists(const Key& key) const { 44*07fb1d06SElliott Hughes return items_map_.find(key) != items_map_.end(); 45*07fb1d06SElliott Hughes } 46*07fb1d06SElliott Hughes size()47*07fb1d06SElliott Hughes size_t size() const { return cache_size_; } capacity()48*07fb1d06SElliott Hughes size_t capacity() const { return max_size_; } 49*07fb1d06SElliott Hughes 50*07fb1d06SElliott Hughes private: 51*07fb1d06SElliott Hughes bool WriteToDisk(const Key& key, const Value& value); 52*07fb1d06SElliott Hughes Value ReadFromDisk(const Key& key); 53*07fb1d06SElliott Hughes std::list<key_value_pair_t> items_list_; 54*07fb1d06SElliott Hughes std::unordered_map<Key, iterator> items_map_; 55*07fb1d06SElliott Hughes size_t cache_size_ = 0; 56*07fb1d06SElliott Hughes size_t max_size_; 57*07fb1d06SElliott Hughes std::filesystem::path tmpdir_; 58*07fb1d06SElliott Hughes std::set<Key> ondisk_items_; 59*07fb1d06SElliott Hughes }; 60*07fb1d06SElliott Hughes 61*07fb1d06SElliott Hughes // A class for puffing a deflate stream and huffing into a deflate stream. The 62*07fb1d06SElliott Hughes // puff stream is "imaginary", which means it doesn't really exists; It is build 63*07fb1d06SElliott Hughes // and used on demand. This class uses a given deflate stream, and puffs the 64*07fb1d06SElliott Hughes // deflate buffers in the stream as needed or vice versa. An object of this 65*07fb1d06SElliott Hughes // class can be used for reading and writing puff data but should not be used 66*07fb1d06SElliott Hughes // for both reading and writing using the same instance. In theory we can 67*07fb1d06SElliott Hughes // separate this class into two classes, namely |PuffStream| and |HuffStream|, 68*07fb1d06SElliott Hughes // but they are sharing a lot of codes which might be inconvenient and 69*07fb1d06SElliott Hughes // unnecessary to do so. In this implementation, there is no protection against 70*07fb1d06SElliott Hughes // reading and writing at the same time. 71*07fb1d06SElliott Hughes class PuffinStream : public StreamInterface { 72*07fb1d06SElliott Hughes public: 73*07fb1d06SElliott Hughes ~PuffinStream() override = default; 74*07fb1d06SElliott Hughes 75*07fb1d06SElliott Hughes // Creates a |PuffinStream| for reading puff buffers from a deflate stream. 76*07fb1d06SElliott Hughes // |stream| IN The deflate stream. 77*07fb1d06SElliott Hughes // |puffer| IN The |Puffer| used for puffing the stream. 78*07fb1d06SElliott Hughes // |puff_size| IN The size of the puff stream (assuming |stream| has been 79*07fb1d06SElliott Hughes // completely puffed. 80*07fb1d06SElliott Hughes // |deflates| IN The location of deflates in |stream|. 81*07fb1d06SElliott Hughes // |puffs| IN The location of puffs into the final puff stream. 82*07fb1d06SElliott Hughes // |max_cache_size| IN The amount of memory to use for caching puff buffers. 83*07fb1d06SElliott Hughes // If the mount is smaller than the maximum puff buffer 84*07fb1d06SElliott Hughes // size in |puffs|, then its value will be set to zero 85*07fb1d06SElliott Hughes // and no puff will be cached. 86*07fb1d06SElliott Hughes static UniqueStreamPtr CreateForPuff(UniqueStreamPtr stream, 87*07fb1d06SElliott Hughes std::shared_ptr<Puffer> puffer, 88*07fb1d06SElliott Hughes uint64_t puff_size, 89*07fb1d06SElliott Hughes const std::vector<BitExtent>& deflates, 90*07fb1d06SElliott Hughes const std::vector<ByteExtent>& puffs, 91*07fb1d06SElliott Hughes size_t max_cache_size = 0); 92*07fb1d06SElliott Hughes 93*07fb1d06SElliott Hughes // Creates a |PuffinStream| for writing puff buffers into a deflate stream. 94*07fb1d06SElliott Hughes // |stream| IN The deflate stream. 95*07fb1d06SElliott Hughes // |huffer| IN The |Huffer| used for huffing into the |stream|. 96*07fb1d06SElliott Hughes // |puff_size| IN The size of the puff stream (assuming |stream| has been 97*07fb1d06SElliott Hughes // completely puffed. 98*07fb1d06SElliott Hughes // |deflates| IN The location of deflates in |stream|. 99*07fb1d06SElliott Hughes // |puffs| IN The location of puffs into the input puff stream. 100*07fb1d06SElliott Hughes static UniqueStreamPtr CreateForHuff(UniqueStreamPtr stream, 101*07fb1d06SElliott Hughes std::shared_ptr<Huffer> huffer, 102*07fb1d06SElliott Hughes uint64_t puff_size, 103*07fb1d06SElliott Hughes const std::vector<BitExtent>& deflates, 104*07fb1d06SElliott Hughes const std::vector<ByteExtent>& puffs); 105*07fb1d06SElliott Hughes 106*07fb1d06SElliott Hughes bool GetSize(uint64_t* size) const override; 107*07fb1d06SElliott Hughes 108*07fb1d06SElliott Hughes // Returns the current offset in the imaginary puff stream. 109*07fb1d06SElliott Hughes bool GetOffset(uint64_t* offset) const override; 110*07fb1d06SElliott Hughes 111*07fb1d06SElliott Hughes // Sets the current offset in the imaginary puff stream. 112*07fb1d06SElliott Hughes bool Seek(uint64_t offset) override; 113*07fb1d06SElliott Hughes 114*07fb1d06SElliott Hughes // Reads from the deflate stream |stream_| and writes the puff stream into 115*07fb1d06SElliott Hughes // |buffer|. 116*07fb1d06SElliott Hughes bool Read(void* buffer, size_t length) override; 117*07fb1d06SElliott Hughes 118*07fb1d06SElliott Hughes // Reads the puff stream from |buffer|, huffs it and writes it into the 119*07fb1d06SElliott Hughes // deflate stream |stream_|. The current assumption for write is that data is 120*07fb1d06SElliott Hughes // wrote from beginning to end with no retraction or random change of offset. 121*07fb1d06SElliott Hughes // This function, writes non-puff data directly to |stream_| and caches the 122*07fb1d06SElliott Hughes // puff data into |puff_buffer_|. When |puff_buffer_| is full, it huffs it 123*07fb1d06SElliott Hughes // into |deflate_buffer_| and writes it to |stream_|. 124*07fb1d06SElliott Hughes bool Write(const void* buffer, size_t length) override; 125*07fb1d06SElliott Hughes 126*07fb1d06SElliott Hughes bool Close() override; 127*07fb1d06SElliott Hughes 128*07fb1d06SElliott Hughes protected: 129*07fb1d06SElliott Hughes // The non-public internal Ctor. 130*07fb1d06SElliott Hughes PuffinStream(UniqueStreamPtr stream, 131*07fb1d06SElliott Hughes std::shared_ptr<Puffer> puffer, 132*07fb1d06SElliott Hughes std::shared_ptr<Huffer> huffer, 133*07fb1d06SElliott Hughes uint64_t puff_size, 134*07fb1d06SElliott Hughes const std::vector<BitExtent>& deflates, 135*07fb1d06SElliott Hughes const std::vector<ByteExtent>& puffs, 136*07fb1d06SElliott Hughes size_t max_cache_size); 137*07fb1d06SElliott Hughes 138*07fb1d06SElliott Hughes private: 139*07fb1d06SElliott Hughes // See |extra_byte_|. 140*07fb1d06SElliott Hughes bool SetExtraByte(); 141*07fb1d06SElliott Hughes 142*07fb1d06SElliott Hughes // Returns the cache for the |puff_id|th puff. If it does not find it, either 143*07fb1d06SElliott Hughes // returns the least accessed cached (if cache is full) or creates a new empty 144*07fb1d06SElliott Hughes // buffer. It returns false if it cannot find the |puff_id|th puff cache. 145*07fb1d06SElliott Hughes bool GetPuffCache(int puff_id, 146*07fb1d06SElliott Hughes uint64_t puff_size, 147*07fb1d06SElliott Hughes std::shared_ptr<Buffer>* buffer); 148*07fb1d06SElliott Hughes 149*07fb1d06SElliott Hughes UniqueStreamPtr stream_; 150*07fb1d06SElliott Hughes 151*07fb1d06SElliott Hughes std::shared_ptr<Puffer> puffer_; 152*07fb1d06SElliott Hughes std::shared_ptr<Huffer> huffer_; 153*07fb1d06SElliott Hughes 154*07fb1d06SElliott Hughes // The size of the imaginary puff stream. 155*07fb1d06SElliott Hughes uint64_t puff_stream_size_; 156*07fb1d06SElliott Hughes 157*07fb1d06SElliott Hughes std::vector<BitExtent> deflates_; 158*07fb1d06SElliott Hughes // The current deflate is being processed. 159*07fb1d06SElliott Hughes std::vector<BitExtent>::iterator cur_deflate_; 160*07fb1d06SElliott Hughes 161*07fb1d06SElliott Hughes std::vector<ByteExtent> puffs_; 162*07fb1d06SElliott Hughes // The current puff is being processed. 163*07fb1d06SElliott Hughes std::vector<ByteExtent>::iterator cur_puff_; 164*07fb1d06SElliott Hughes 165*07fb1d06SElliott Hughes std::vector<uint64_t> upper_bounds_; 166*07fb1d06SElliott Hughes 167*07fb1d06SElliott Hughes // The current offset in the imaginary puff stream is |puff_pos_| + 168*07fb1d06SElliott Hughes // |skip_bytes_| 169*07fb1d06SElliott Hughes uint64_t puff_pos_; 170*07fb1d06SElliott Hughes uint64_t skip_bytes_; 171*07fb1d06SElliott Hughes 172*07fb1d06SElliott Hughes // The current bit offset in |stream_|. 173*07fb1d06SElliott Hughes uint64_t deflate_bit_pos_; 174*07fb1d06SElliott Hughes 175*07fb1d06SElliott Hughes // This value caches the first or last byte of a deflate stream. This is 176*07fb1d06SElliott Hughes // needed when two deflate stream end on the same byte (with greater than zero 177*07fb1d06SElliott Hughes // bit offset difference) or a deflate starts from middle of the byte. We need 178*07fb1d06SElliott Hughes // to cache the value in here before we have the rest of the puff buffer to 179*07fb1d06SElliott Hughes // make the deflate. 180*07fb1d06SElliott Hughes uint8_t last_byte_; 181*07fb1d06SElliott Hughes 182*07fb1d06SElliott Hughes // We have to figure out if we need to cache an extra puff byte for the last 183*07fb1d06SElliott Hughes // byte of the deflate. This is only needed if the last bit of the current 184*07fb1d06SElliott Hughes // deflate is not in the same byte as the first bit of the next deflate. The 185*07fb1d06SElliott Hughes // value is either 0 or 1. If 1. 186*07fb1d06SElliott Hughes size_t extra_byte_; 187*07fb1d06SElliott Hughes 188*07fb1d06SElliott Hughes // True if the stream is only for puffing. False if for huffing. 189*07fb1d06SElliott Hughes bool is_for_puff_; 190*07fb1d06SElliott Hughes 191*07fb1d06SElliott Hughes // True if the |Close()| is called. 192*07fb1d06SElliott Hughes bool closed_; 193*07fb1d06SElliott Hughes 194*07fb1d06SElliott Hughes std::unique_ptr<Buffer> deflate_buffer_; 195*07fb1d06SElliott Hughes std::shared_ptr<Buffer> puff_buffer_; 196*07fb1d06SElliott Hughes 197*07fb1d06SElliott Hughes // The LRU cache for holding puff data 198*07fb1d06SElliott Hughes LRUCache lru_cache_; 199*07fb1d06SElliott Hughes 200*07fb1d06SElliott Hughes DISALLOW_COPY_AND_ASSIGN(PuffinStream); 201*07fb1d06SElliott Hughes }; 202*07fb1d06SElliott Hughes 203*07fb1d06SElliott Hughes } // namespace puffin 204*07fb1d06SElliott Hughes 205*07fb1d06SElliott Hughes #endif // SRC_PUFFIN_STREAM_H_ 206