1*07fb1d06SElliott Hughes // Copyright 2017 The ChromiumOS Authors
2*07fb1d06SElliott Hughes // Use of this source code is governed by a BSD-style license that can be
3*07fb1d06SElliott Hughes // found in the LICENSE file.
4*07fb1d06SElliott Hughes
5*07fb1d06SElliott Hughes #include "puffin/src/puffin_stream.h"
6*07fb1d06SElliott Hughes #include <fcntl.h>
7*07fb1d06SElliott Hughes #include <sys/stat.h>
8*07fb1d06SElliott Hughes
9*07fb1d06SElliott Hughes #include <algorithm>
10*07fb1d06SElliott Hughes #include <filesystem>
11*07fb1d06SElliott Hughes #include <memory>
12*07fb1d06SElliott Hughes #include <system_error>
13*07fb1d06SElliott Hughes #include <utility>
14*07fb1d06SElliott Hughes #include <vector>
15*07fb1d06SElliott Hughes
16*07fb1d06SElliott Hughes #include "puffin/src/bit_reader.h"
17*07fb1d06SElliott Hughes #include "puffin/src/bit_writer.h"
18*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/common.h"
19*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/huffer.h"
20*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/puffer.h"
21*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/stream.h"
22*07fb1d06SElliott Hughes #include "puffin/src/logging.h"
23*07fb1d06SElliott Hughes #include "puffin/src/puff_reader.h"
24*07fb1d06SElliott Hughes #include "puffin/src/puff_writer.h"
25*07fb1d06SElliott Hughes
26*07fb1d06SElliott Hughes using std::shared_ptr;
27*07fb1d06SElliott Hughes using std::unique_ptr;
28*07fb1d06SElliott Hughes using std::vector;
29*07fb1d06SElliott Hughes
30*07fb1d06SElliott Hughes namespace puffin {
31*07fb1d06SElliott Hughes
32*07fb1d06SElliott Hughes namespace {
33*07fb1d06SElliott Hughes
CheckArgsIntegrity(uint64_t puff_size,const vector<BitExtent> & deflates,const vector<ByteExtent> & puffs)34*07fb1d06SElliott Hughes bool CheckArgsIntegrity(uint64_t puff_size,
35*07fb1d06SElliott Hughes const vector<BitExtent>& deflates,
36*07fb1d06SElliott Hughes const vector<ByteExtent>& puffs) {
37*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(puffs.size() == deflates.size());
38*07fb1d06SElliott Hughes // Check if the |puff_size| is actually greater than the last byte of the last
39*07fb1d06SElliott Hughes // puff in |puffs|.
40*07fb1d06SElliott Hughes if (!puffs.empty()) {
41*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(puff_size >=
42*07fb1d06SElliott Hughes puffs.back().offset + puffs.back().length);
43*07fb1d06SElliott Hughes }
44*07fb1d06SElliott Hughes
45*07fb1d06SElliott Hughes // Check to make sure |puffs| and |deflates| are sorted and non-overlapping.
46*07fb1d06SElliott Hughes auto is_overlap = [](const auto& a, const auto& b) {
47*07fb1d06SElliott Hughes return (a.offset + a.length) > b.offset;
48*07fb1d06SElliott Hughes };
49*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(deflates.end() == std::adjacent_find(deflates.begin(),
50*07fb1d06SElliott Hughes deflates.end(),
51*07fb1d06SElliott Hughes is_overlap));
52*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(puffs.end() == std::adjacent_find(puffs.begin(),
53*07fb1d06SElliott Hughes puffs.end(),
54*07fb1d06SElliott Hughes is_overlap));
55*07fb1d06SElliott Hughes return true;
56*07fb1d06SElliott Hughes }
57*07fb1d06SElliott Hughes
58*07fb1d06SElliott Hughes } // namespace
59*07fb1d06SElliott Hughes
CreateForPuff(UniqueStreamPtr stream,shared_ptr<Puffer> puffer,uint64_t puff_size,const vector<BitExtent> & deflates,const vector<ByteExtent> & puffs,size_t max_cache_size)60*07fb1d06SElliott Hughes UniqueStreamPtr PuffinStream::CreateForPuff(UniqueStreamPtr stream,
61*07fb1d06SElliott Hughes shared_ptr<Puffer> puffer,
62*07fb1d06SElliott Hughes uint64_t puff_size,
63*07fb1d06SElliott Hughes const vector<BitExtent>& deflates,
64*07fb1d06SElliott Hughes const vector<ByteExtent>& puffs,
65*07fb1d06SElliott Hughes size_t max_cache_size) {
66*07fb1d06SElliott Hughes TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs),
67*07fb1d06SElliott Hughes nullptr);
68*07fb1d06SElliott Hughes TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr);
69*07fb1d06SElliott Hughes
70*07fb1d06SElliott Hughes UniqueStreamPtr puffin_stream(new PuffinStream(std::move(stream), puffer,
71*07fb1d06SElliott Hughes nullptr, puff_size, deflates,
72*07fb1d06SElliott Hughes puffs, max_cache_size));
73*07fb1d06SElliott Hughes TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr);
74*07fb1d06SElliott Hughes return puffin_stream;
75*07fb1d06SElliott Hughes }
76*07fb1d06SElliott Hughes
CreateForHuff(UniqueStreamPtr stream,shared_ptr<Huffer> huffer,uint64_t puff_size,const vector<BitExtent> & deflates,const vector<ByteExtent> & puffs)77*07fb1d06SElliott Hughes UniqueStreamPtr PuffinStream::CreateForHuff(UniqueStreamPtr stream,
78*07fb1d06SElliott Hughes shared_ptr<Huffer> huffer,
79*07fb1d06SElliott Hughes uint64_t puff_size,
80*07fb1d06SElliott Hughes const vector<BitExtent>& deflates,
81*07fb1d06SElliott Hughes const vector<ByteExtent>& puffs) {
82*07fb1d06SElliott Hughes TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs),
83*07fb1d06SElliott Hughes nullptr);
84*07fb1d06SElliott Hughes TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr);
85*07fb1d06SElliott Hughes
86*07fb1d06SElliott Hughes UniqueStreamPtr puffin_stream(new PuffinStream(
87*07fb1d06SElliott Hughes std::move(stream), nullptr, huffer, puff_size, deflates, puffs, 0));
88*07fb1d06SElliott Hughes TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr);
89*07fb1d06SElliott Hughes return puffin_stream;
90*07fb1d06SElliott Hughes }
91*07fb1d06SElliott Hughes
PuffinStream(UniqueStreamPtr stream,shared_ptr<Puffer> puffer,shared_ptr<Huffer> huffer,uint64_t puff_size,const vector<BitExtent> & deflates,const vector<ByteExtent> & puffs,size_t max_cache_size)92*07fb1d06SElliott Hughes PuffinStream::PuffinStream(UniqueStreamPtr stream,
93*07fb1d06SElliott Hughes shared_ptr<Puffer> puffer,
94*07fb1d06SElliott Hughes shared_ptr<Huffer> huffer,
95*07fb1d06SElliott Hughes uint64_t puff_size,
96*07fb1d06SElliott Hughes const vector<BitExtent>& deflates,
97*07fb1d06SElliott Hughes const vector<ByteExtent>& puffs,
98*07fb1d06SElliott Hughes size_t max_cache_size)
99*07fb1d06SElliott Hughes : stream_(std::move(stream)),
100*07fb1d06SElliott Hughes puffer_(puffer),
101*07fb1d06SElliott Hughes huffer_(huffer),
102*07fb1d06SElliott Hughes puff_stream_size_(puff_size),
103*07fb1d06SElliott Hughes deflates_(deflates),
104*07fb1d06SElliott Hughes puffs_(puffs),
105*07fb1d06SElliott Hughes puff_pos_(0),
106*07fb1d06SElliott Hughes skip_bytes_(0),
107*07fb1d06SElliott Hughes deflate_bit_pos_(0),
108*07fb1d06SElliott Hughes last_byte_(0),
109*07fb1d06SElliott Hughes extra_byte_(0),
110*07fb1d06SElliott Hughes is_for_puff_(puffer_ ? true : false),
111*07fb1d06SElliott Hughes closed_(false),
112*07fb1d06SElliott Hughes lru_cache_(max_cache_size) {
113*07fb1d06SElliott Hughes // Building upper bounds for faster seek.
114*07fb1d06SElliott Hughes upper_bounds_.reserve(puffs.size());
115*07fb1d06SElliott Hughes for (const auto& puff : puffs) {
116*07fb1d06SElliott Hughes upper_bounds_.emplace_back(puff.offset + puff.length);
117*07fb1d06SElliott Hughes }
118*07fb1d06SElliott Hughes upper_bounds_.emplace_back(puff_stream_size_ + 1);
119*07fb1d06SElliott Hughes
120*07fb1d06SElliott Hughes // We can pass the size of the deflate stream too, but it is not necessary
121*07fb1d06SElliott Hughes // yet. We cannot get the size of stream from itself, because we might be
122*07fb1d06SElliott Hughes // writing into it and its size is not defined yet.
123*07fb1d06SElliott Hughes uint64_t deflate_stream_size = puff_stream_size_;
124*07fb1d06SElliott Hughes if (!puffs.empty()) {
125*07fb1d06SElliott Hughes deflate_stream_size =
126*07fb1d06SElliott Hughes ((deflates.back().offset + deflates.back().length) / 8) +
127*07fb1d06SElliott Hughes puff_stream_size_ - (puffs.back().offset + puffs.back().length);
128*07fb1d06SElliott Hughes }
129*07fb1d06SElliott Hughes
130*07fb1d06SElliott Hughes deflates_.emplace_back(deflate_stream_size * 8, 0);
131*07fb1d06SElliott Hughes puffs_.emplace_back(puff_stream_size_, 0);
132*07fb1d06SElliott Hughes
133*07fb1d06SElliott Hughes // Look for the largest puff and deflate extents and get proper size buffers.
134*07fb1d06SElliott Hughes uint64_t max_puff_length = 0;
135*07fb1d06SElliott Hughes for (const auto& puff : puffs) {
136*07fb1d06SElliott Hughes max_puff_length = std::max(max_puff_length, puff.length);
137*07fb1d06SElliott Hughes }
138*07fb1d06SElliott Hughes puff_buffer_.reset(new Buffer(max_puff_length + 1));
139*07fb1d06SElliott Hughes
140*07fb1d06SElliott Hughes uint64_t max_deflate_length = 0;
141*07fb1d06SElliott Hughes for (const auto& deflate : deflates) {
142*07fb1d06SElliott Hughes max_deflate_length = std::max(max_deflate_length, deflate.length * 8);
143*07fb1d06SElliott Hughes }
144*07fb1d06SElliott Hughes deflate_buffer_.reset(new Buffer(max_deflate_length + 2));
145*07fb1d06SElliott Hughes }
146*07fb1d06SElliott Hughes
GetSize(uint64_t * size) const147*07fb1d06SElliott Hughes bool PuffinStream::GetSize(uint64_t* size) const {
148*07fb1d06SElliott Hughes *size = puff_stream_size_;
149*07fb1d06SElliott Hughes return true;
150*07fb1d06SElliott Hughes }
151*07fb1d06SElliott Hughes
GetOffset(uint64_t * offset) const152*07fb1d06SElliott Hughes bool PuffinStream::GetOffset(uint64_t* offset) const {
153*07fb1d06SElliott Hughes *offset = puff_pos_ + skip_bytes_;
154*07fb1d06SElliott Hughes return true;
155*07fb1d06SElliott Hughes }
156*07fb1d06SElliott Hughes
Seek(uint64_t offset)157*07fb1d06SElliott Hughes bool PuffinStream::Seek(uint64_t offset) {
158*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(!closed_);
159*07fb1d06SElliott Hughes if (!is_for_puff_) {
160*07fb1d06SElliott Hughes // For huffing we should not seek, only seek to zero is accepted.
161*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(offset == 0);
162*07fb1d06SElliott Hughes }
163*07fb1d06SElliott Hughes
164*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(offset <= puff_stream_size_);
165*07fb1d06SElliott Hughes
166*07fb1d06SElliott Hughes // We are searching first available puff which either includes the |offset| or
167*07fb1d06SElliott Hughes // it is the next available puff after |offset|.
168*07fb1d06SElliott Hughes auto next_puff_iter =
169*07fb1d06SElliott Hughes std::upper_bound(upper_bounds_.begin(), upper_bounds_.end(), offset);
170*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(next_puff_iter != upper_bounds_.end());
171*07fb1d06SElliott Hughes auto next_puff_idx = std::distance(upper_bounds_.begin(), next_puff_iter);
172*07fb1d06SElliott Hughes cur_puff_ = std::next(puffs_.begin(), next_puff_idx);
173*07fb1d06SElliott Hughes cur_deflate_ = std::next(deflates_.begin(), next_puff_idx);
174*07fb1d06SElliott Hughes
175*07fb1d06SElliott Hughes if (offset < cur_puff_->offset) {
176*07fb1d06SElliott Hughes // between two puffs.
177*07fb1d06SElliott Hughes puff_pos_ = offset;
178*07fb1d06SElliott Hughes auto back_track_bytes = cur_puff_->offset - puff_pos_;
179*07fb1d06SElliott Hughes deflate_bit_pos_ = ((cur_deflate_->offset + 7) / 8 - back_track_bytes) * 8;
180*07fb1d06SElliott Hughes if (cur_puff_ != puffs_.begin()) {
181*07fb1d06SElliott Hughes auto prev_deflate = std::prev(cur_deflate_);
182*07fb1d06SElliott Hughes if (deflate_bit_pos_ < prev_deflate->offset + prev_deflate->length) {
183*07fb1d06SElliott Hughes deflate_bit_pos_ = prev_deflate->offset + prev_deflate->length;
184*07fb1d06SElliott Hughes }
185*07fb1d06SElliott Hughes }
186*07fb1d06SElliott Hughes } else {
187*07fb1d06SElliott Hughes // Inside a puff.
188*07fb1d06SElliott Hughes puff_pos_ = cur_puff_->offset;
189*07fb1d06SElliott Hughes deflate_bit_pos_ = cur_deflate_->offset;
190*07fb1d06SElliott Hughes }
191*07fb1d06SElliott Hughes skip_bytes_ = offset - puff_pos_;
192*07fb1d06SElliott Hughes if (!is_for_puff_ && offset == 0) {
193*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(stream_->Seek(0));
194*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(SetExtraByte());
195*07fb1d06SElliott Hughes }
196*07fb1d06SElliott Hughes return true;
197*07fb1d06SElliott Hughes }
198*07fb1d06SElliott Hughes
Close()199*07fb1d06SElliott Hughes bool PuffinStream::Close() {
200*07fb1d06SElliott Hughes closed_ = true;
201*07fb1d06SElliott Hughes return stream_->Close();
202*07fb1d06SElliott Hughes }
203*07fb1d06SElliott Hughes
Read(void * buffer,size_t count)204*07fb1d06SElliott Hughes bool PuffinStream::Read(void* buffer, size_t count) {
205*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(!closed_);
206*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(is_for_puff_);
207*07fb1d06SElliott Hughes if (cur_puff_ == puffs_.end()) {
208*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(count == 0);
209*07fb1d06SElliott Hughes }
210*07fb1d06SElliott Hughes auto bytes = static_cast<uint8_t*>(buffer);
211*07fb1d06SElliott Hughes uint64_t length = count;
212*07fb1d06SElliott Hughes uint64_t bytes_read = 0;
213*07fb1d06SElliott Hughes while (bytes_read < length) {
214*07fb1d06SElliott Hughes if (puff_pos_ < cur_puff_->offset) {
215*07fb1d06SElliott Hughes // Reading between two deflates. We also read bytes that have at least one
216*07fb1d06SElliott Hughes // bit of a deflate bit stream. The byte which has both deflate and raw
217*07fb1d06SElliott Hughes // data will be shifted or masked off the deflate bits and the remaining
218*07fb1d06SElliott Hughes // value will be saved in the puff stream as an byte integer.
219*07fb1d06SElliott Hughes uint64_t start_byte = (deflate_bit_pos_ / 8);
220*07fb1d06SElliott Hughes uint64_t end_byte = (cur_deflate_->offset + 7) / 8;
221*07fb1d06SElliott Hughes auto bytes_to_read = std::min(length - bytes_read, end_byte - start_byte);
222*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bytes_to_read >= 1);
223*07fb1d06SElliott Hughes
224*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(stream_->Seek(start_byte));
225*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(stream_->Read(bytes + bytes_read, bytes_to_read));
226*07fb1d06SElliott Hughes
227*07fb1d06SElliott Hughes // If true, we read the first byte of the curret deflate. So we have to
228*07fb1d06SElliott Hughes // mask out the deflate bits (which are most significant bits.)
229*07fb1d06SElliott Hughes if ((start_byte + bytes_to_read) * 8 > cur_deflate_->offset) {
230*07fb1d06SElliott Hughes bytes[bytes_read + bytes_to_read - 1] &=
231*07fb1d06SElliott Hughes (1 << (cur_deflate_->offset & 7)) - 1;
232*07fb1d06SElliott Hughes }
233*07fb1d06SElliott Hughes
234*07fb1d06SElliott Hughes // If true, we read the last byte of the previous deflate and we have to
235*07fb1d06SElliott Hughes // shift it out. The least significat bits belongs to the deflate
236*07fb1d06SElliott Hughes // stream. The order of these last two conditions are important because a
237*07fb1d06SElliott Hughes // byte can contain a finishing deflate and a starting deflate with some
238*07fb1d06SElliott Hughes // bits between them so we have to modify correctly. Keep in mind that in
239*07fb1d06SElliott Hughes // this situation both are modifying the same byte.
240*07fb1d06SElliott Hughes if (start_byte * 8 < deflate_bit_pos_) {
241*07fb1d06SElliott Hughes bytes[bytes_read] >>= deflate_bit_pos_ & 7;
242*07fb1d06SElliott Hughes }
243*07fb1d06SElliott Hughes
244*07fb1d06SElliott Hughes // Pass |deflate_bit_pos_| for all the read bytes.
245*07fb1d06SElliott Hughes deflate_bit_pos_ -= deflate_bit_pos_ & 7;
246*07fb1d06SElliott Hughes deflate_bit_pos_ += bytes_to_read * 8;
247*07fb1d06SElliott Hughes if (deflate_bit_pos_ > cur_deflate_->offset) {
248*07fb1d06SElliott Hughes // In case it reads into the first byte of the current deflate.
249*07fb1d06SElliott Hughes deflate_bit_pos_ = cur_deflate_->offset;
250*07fb1d06SElliott Hughes }
251*07fb1d06SElliott Hughes
252*07fb1d06SElliott Hughes bytes_read += bytes_to_read;
253*07fb1d06SElliott Hughes puff_pos_ += bytes_to_read;
254*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(puff_pos_ <= cur_puff_->offset);
255*07fb1d06SElliott Hughes } else {
256*07fb1d06SElliott Hughes // Reading the deflate itself. We read all bytes including the first and
257*07fb1d06SElliott Hughes // last byte (which may partially include a deflate bit). Here we keep the
258*07fb1d06SElliott Hughes // |puff_pos_| point to the first byte of the puffed stream and
259*07fb1d06SElliott Hughes // |skip_bytes_| shows how many bytes in the puff we have copied till now.
260*07fb1d06SElliott Hughes auto start_byte = (cur_deflate_->offset / 8);
261*07fb1d06SElliott Hughes auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8;
262*07fb1d06SElliott Hughes auto bytes_to_read = end_byte - start_byte;
263*07fb1d06SElliott Hughes // Puff directly to buffer if it has space.
264*07fb1d06SElliott Hughes const bool puff_directly_into_buffer =
265*07fb1d06SElliott Hughes lru_cache_.capacity() == 0 && (skip_bytes_ == 0) &&
266*07fb1d06SElliott Hughes (length - bytes_read >= cur_puff_->length);
267*07fb1d06SElliott Hughes
268*07fb1d06SElliott Hughes auto cur_puff_idx = std::distance(puffs_.begin(), cur_puff_);
269*07fb1d06SElliott Hughes if (lru_cache_.capacity() == 0 ||
270*07fb1d06SElliott Hughes !GetPuffCache(cur_puff_idx, cur_puff_->length, &puff_buffer_)) {
271*07fb1d06SElliott Hughes // Did not find the puff buffer in cache. We have to build it.
272*07fb1d06SElliott Hughes deflate_buffer_->resize(bytes_to_read);
273*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(stream_->Seek(start_byte));
274*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(
275*07fb1d06SElliott Hughes stream_->Read(deflate_buffer_->data(), bytes_to_read));
276*07fb1d06SElliott Hughes BufferBitReader bit_reader(deflate_buffer_->data(), bytes_to_read);
277*07fb1d06SElliott Hughes
278*07fb1d06SElliott Hughes BufferPuffWriter puff_writer(puff_directly_into_buffer
279*07fb1d06SElliott Hughes ? bytes + bytes_read
280*07fb1d06SElliott Hughes : puff_buffer_->data(),
281*07fb1d06SElliott Hughes cur_puff_->length);
282*07fb1d06SElliott Hughes
283*07fb1d06SElliott Hughes // Drop the first unused bits.
284*07fb1d06SElliott Hughes size_t extra_bits_len = cur_deflate_->offset & 7;
285*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bit_reader.CacheBits(extra_bits_len));
286*07fb1d06SElliott Hughes bit_reader.DropBits(extra_bits_len);
287*07fb1d06SElliott Hughes
288*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(
289*07fb1d06SElliott Hughes puffer_->PuffDeflate(&bit_reader, &puff_writer, nullptr));
290*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bytes_to_read == bit_reader.Offset());
291*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(cur_puff_->length == puff_writer.Size());
292*07fb1d06SElliott Hughes } else {
293*07fb1d06SElliott Hughes // Just seek to proper location.
294*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(stream_->Seek(start_byte + bytes_to_read));
295*07fb1d06SElliott Hughes }
296*07fb1d06SElliott Hughes // Copy from puff buffer to output if needed.
297*07fb1d06SElliott Hughes auto bytes_to_copy =
298*07fb1d06SElliott Hughes std::min(length - bytes_read, cur_puff_->length - skip_bytes_);
299*07fb1d06SElliott Hughes if (!puff_directly_into_buffer) {
300*07fb1d06SElliott Hughes memcpy(bytes + bytes_read, puff_buffer_->data() + skip_bytes_,
301*07fb1d06SElliott Hughes bytes_to_copy);
302*07fb1d06SElliott Hughes }
303*07fb1d06SElliott Hughes
304*07fb1d06SElliott Hughes skip_bytes_ += bytes_to_copy;
305*07fb1d06SElliott Hughes bytes_read += bytes_to_copy;
306*07fb1d06SElliott Hughes
307*07fb1d06SElliott Hughes // Move to next puff.
308*07fb1d06SElliott Hughes if (puff_pos_ + skip_bytes_ == cur_puff_->offset + cur_puff_->length) {
309*07fb1d06SElliott Hughes puff_pos_ += skip_bytes_;
310*07fb1d06SElliott Hughes skip_bytes_ = 0;
311*07fb1d06SElliott Hughes deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length;
312*07fb1d06SElliott Hughes cur_puff_++;
313*07fb1d06SElliott Hughes cur_deflate_++;
314*07fb1d06SElliott Hughes if (cur_puff_ == puffs_.end()) {
315*07fb1d06SElliott Hughes break;
316*07fb1d06SElliott Hughes }
317*07fb1d06SElliott Hughes }
318*07fb1d06SElliott Hughes }
319*07fb1d06SElliott Hughes }
320*07fb1d06SElliott Hughes
321*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bytes_read == length);
322*07fb1d06SElliott Hughes return true;
323*07fb1d06SElliott Hughes }
324*07fb1d06SElliott Hughes
Write(const void * buffer,size_t count)325*07fb1d06SElliott Hughes bool PuffinStream::Write(const void* buffer, size_t count) {
326*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(!closed_);
327*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(!is_for_puff_);
328*07fb1d06SElliott Hughes auto bytes = static_cast<const uint8_t*>(buffer);
329*07fb1d06SElliott Hughes uint64_t length = count;
330*07fb1d06SElliott Hughes uint64_t bytes_wrote = 0;
331*07fb1d06SElliott Hughes while (bytes_wrote < length) {
332*07fb1d06SElliott Hughes if (deflate_bit_pos_ < (cur_deflate_->offset & ~7ull)) {
333*07fb1d06SElliott Hughes // Between two puffs or before the first puff. We know that we are
334*07fb1d06SElliott Hughes // starting from the byte boundary because we have already processed the
335*07fb1d06SElliott Hughes // non-deflate bits of the last byte of the last deflate. Here we don't
336*07fb1d06SElliott Hughes // process any byte that has deflate bit.
337*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE((deflate_bit_pos_ & 7) == 0);
338*07fb1d06SElliott Hughes auto copy_len =
339*07fb1d06SElliott Hughes std::min((cur_deflate_->offset / 8) - (deflate_bit_pos_ / 8),
340*07fb1d06SElliott Hughes length - bytes_wrote);
341*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(stream_->Write(bytes + bytes_wrote, copy_len));
342*07fb1d06SElliott Hughes bytes_wrote += copy_len;
343*07fb1d06SElliott Hughes puff_pos_ += copy_len;
344*07fb1d06SElliott Hughes deflate_bit_pos_ += copy_len * 8;
345*07fb1d06SElliott Hughes } else {
346*07fb1d06SElliott Hughes // We are in a puff. We have to buffer incoming bytes until we reach the
347*07fb1d06SElliott Hughes // size of the current puff so we can huff :). If the last bit of the
348*07fb1d06SElliott Hughes // current deflate does not end in a byte boundary, then we have to read
349*07fb1d06SElliott Hughes // one more byte to fill up the last byte of the deflate stream before
350*07fb1d06SElliott Hughes // doing anything else.
351*07fb1d06SElliott Hughes
352*07fb1d06SElliott Hughes // |deflate_bit_pos_| now should be in the same byte as
353*07fb1d06SElliott Hughes // |cur_deflate->offset|.
354*07fb1d06SElliott Hughes if (deflate_bit_pos_ < cur_deflate_->offset) {
355*07fb1d06SElliott Hughes last_byte_ |= bytes[bytes_wrote++] << (deflate_bit_pos_ & 7);
356*07fb1d06SElliott Hughes skip_bytes_ = 0;
357*07fb1d06SElliott Hughes deflate_bit_pos_ = cur_deflate_->offset;
358*07fb1d06SElliott Hughes puff_pos_++;
359*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(puff_pos_ == cur_puff_->offset);
360*07fb1d06SElliott Hughes }
361*07fb1d06SElliott Hughes
362*07fb1d06SElliott Hughes auto copy_len = std::min(length - bytes_wrote,
363*07fb1d06SElliott Hughes cur_puff_->length + extra_byte_ - skip_bytes_);
364*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(puff_buffer_->size() >= skip_bytes_ + copy_len);
365*07fb1d06SElliott Hughes memcpy(puff_buffer_->data() + skip_bytes_, bytes + bytes_wrote, copy_len);
366*07fb1d06SElliott Hughes skip_bytes_ += copy_len;
367*07fb1d06SElliott Hughes bytes_wrote += copy_len;
368*07fb1d06SElliott Hughes
369*07fb1d06SElliott Hughes if (skip_bytes_ == cur_puff_->length + extra_byte_) {
370*07fb1d06SElliott Hughes // |puff_buffer_| is full, now huff into the |deflate_buffer_|.
371*07fb1d06SElliott Hughes auto start_byte = cur_deflate_->offset / 8;
372*07fb1d06SElliott Hughes auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8;
373*07fb1d06SElliott Hughes auto bytes_to_write = end_byte - start_byte;
374*07fb1d06SElliott Hughes
375*07fb1d06SElliott Hughes deflate_buffer_->resize(bytes_to_write);
376*07fb1d06SElliott Hughes BufferBitWriter bit_writer(deflate_buffer_->data(), bytes_to_write);
377*07fb1d06SElliott Hughes BufferPuffReader puff_reader(puff_buffer_->data(), cur_puff_->length);
378*07fb1d06SElliott Hughes
379*07fb1d06SElliott Hughes // Write last byte if it has any.
380*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(
381*07fb1d06SElliott Hughes bit_writer.WriteBits(cur_deflate_->offset & 7, last_byte_));
382*07fb1d06SElliott Hughes last_byte_ = 0;
383*07fb1d06SElliott Hughes
384*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(huffer_->HuffDeflate(&puff_reader, &bit_writer));
385*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bit_writer.Size() == bytes_to_write);
386*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(puff_reader.BytesLeft() == 0);
387*07fb1d06SElliott Hughes
388*07fb1d06SElliott Hughes deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length;
389*07fb1d06SElliott Hughes if (extra_byte_ == 1) {
390*07fb1d06SElliott Hughes deflate_buffer_->data()[bytes_to_write - 1] |=
391*07fb1d06SElliott Hughes puff_buffer_->data()[cur_puff_->length] << (deflate_bit_pos_ & 7);
392*07fb1d06SElliott Hughes deflate_bit_pos_ = (deflate_bit_pos_ + 7) & ~7ull;
393*07fb1d06SElliott Hughes } else if ((deflate_bit_pos_ & 7) != 0) {
394*07fb1d06SElliott Hughes // This happens if current and next deflate finish and end on the same
395*07fb1d06SElliott Hughes // byte, then we cannot write into output until we have huffed the
396*07fb1d06SElliott Hughes // next puff buffer, so untill then we cache it into |last_byte_| and
397*07fb1d06SElliott Hughes // we won't write it out.
398*07fb1d06SElliott Hughes last_byte_ = deflate_buffer_->data()[bytes_to_write - 1];
399*07fb1d06SElliott Hughes bytes_to_write--;
400*07fb1d06SElliott Hughes }
401*07fb1d06SElliott Hughes
402*07fb1d06SElliott Hughes // Write |deflate_buffer_| into output.
403*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(
404*07fb1d06SElliott Hughes stream_->Write(deflate_buffer_->data(), bytes_to_write));
405*07fb1d06SElliott Hughes
406*07fb1d06SElliott Hughes // Move to the next deflate/puff.
407*07fb1d06SElliott Hughes puff_pos_ += skip_bytes_;
408*07fb1d06SElliott Hughes skip_bytes_ = 0;
409*07fb1d06SElliott Hughes cur_puff_++;
410*07fb1d06SElliott Hughes cur_deflate_++;
411*07fb1d06SElliott Hughes if (cur_puff_ == puffs_.end()) {
412*07fb1d06SElliott Hughes break;
413*07fb1d06SElliott Hughes }
414*07fb1d06SElliott Hughes // Find if need an extra byte to cache at the end.
415*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(SetExtraByte());
416*07fb1d06SElliott Hughes }
417*07fb1d06SElliott Hughes }
418*07fb1d06SElliott Hughes }
419*07fb1d06SElliott Hughes
420*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bytes_wrote == length);
421*07fb1d06SElliott Hughes return true;
422*07fb1d06SElliott Hughes }
423*07fb1d06SElliott Hughes
SetExtraByte()424*07fb1d06SElliott Hughes bool PuffinStream::SetExtraByte() {
425*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(cur_deflate_ != deflates_.end());
426*07fb1d06SElliott Hughes if ((cur_deflate_ + 1) == deflates_.end()) {
427*07fb1d06SElliott Hughes extra_byte_ = 0;
428*07fb1d06SElliott Hughes return true;
429*07fb1d06SElliott Hughes }
430*07fb1d06SElliott Hughes uint64_t end_bit = cur_deflate_->offset + cur_deflate_->length;
431*07fb1d06SElliott Hughes if ((end_bit & 7) && ((end_bit + 7) & ~7ull) <= (cur_deflate_ + 1)->offset) {
432*07fb1d06SElliott Hughes extra_byte_ = 1;
433*07fb1d06SElliott Hughes } else {
434*07fb1d06SElliott Hughes extra_byte_ = 0;
435*07fb1d06SElliott Hughes }
436*07fb1d06SElliott Hughes return true;
437*07fb1d06SElliott Hughes }
438*07fb1d06SElliott Hughes
GetPuffCache(int puff_id,uint64_t puff_size,shared_ptr<Buffer> * buffer)439*07fb1d06SElliott Hughes bool PuffinStream::GetPuffCache(int puff_id,
440*07fb1d06SElliott Hughes uint64_t puff_size,
441*07fb1d06SElliott Hughes shared_ptr<Buffer>* buffer) {
442*07fb1d06SElliott Hughes // Search for it.
443*07fb1d06SElliott Hughes shared_ptr<Buffer> cache = lru_cache_.get(puff_id);
444*07fb1d06SElliott Hughes const bool found = cache != nullptr;
445*07fb1d06SElliott Hughes
446*07fb1d06SElliott Hughes // If not found, either create one or get one from the list.
447*07fb1d06SElliott Hughes if (!found) {
448*07fb1d06SElliott Hughes // If we have not populated the cache yet, create one.
449*07fb1d06SElliott Hughes lru_cache_.EnsureSpace(puff_size);
450*07fb1d06SElliott Hughes cache = std::make_shared<Buffer>(puff_size);
451*07fb1d06SElliott Hughes
452*07fb1d06SElliott Hughes lru_cache_.put(puff_id, cache);
453*07fb1d06SElliott Hughes }
454*07fb1d06SElliott Hughes
455*07fb1d06SElliott Hughes *buffer = std::move(cache);
456*07fb1d06SElliott Hughes return found;
457*07fb1d06SElliott Hughes }
458*07fb1d06SElliott Hughes
get(const Key & key)459*07fb1d06SElliott Hughes const LRUCache::Value LRUCache::get(const Key& key) {
460*07fb1d06SElliott Hughes auto it = items_map_.find(key);
461*07fb1d06SElliott Hughes if (it == items_map_.end()) {
462*07fb1d06SElliott Hughes if (ondisk_items_.count(key) > 0) {
463*07fb1d06SElliott Hughes auto&& data = ReadFromDisk(key);
464*07fb1d06SElliott Hughes put(key, data);
465*07fb1d06SElliott Hughes return data;
466*07fb1d06SElliott Hughes }
467*07fb1d06SElliott Hughes return nullptr;
468*07fb1d06SElliott Hughes }
469*07fb1d06SElliott Hughes items_list_.splice(items_list_.begin(), items_list_, it->second);
470*07fb1d06SElliott Hughes return it->second->second;
471*07fb1d06SElliott Hughes }
472*07fb1d06SElliott Hughes
ReadFromDisk(const LRUCache::Key & key)473*07fb1d06SElliott Hughes LRUCache::Value LRUCache::ReadFromDisk(const LRUCache::Key& key) {
474*07fb1d06SElliott Hughes const auto path = tmpdir_ / std::to_string(key);
475*07fb1d06SElliott Hughes int fd = open(path.c_str(), O_RDONLY | O_CLOEXEC);
476*07fb1d06SElliott Hughes if (fd < 0) {
477*07fb1d06SElliott Hughes PLOG(ERROR) << "Failed to open " << path;
478*07fb1d06SElliott Hughes return {};
479*07fb1d06SElliott Hughes }
480*07fb1d06SElliott Hughes auto fd_delete = [](int* fd) { close(*fd); };
481*07fb1d06SElliott Hughes std::unique_ptr<int, decltype(fd_delete)> guard(&fd, fd_delete);
482*07fb1d06SElliott Hughes struct stat st {};
483*07fb1d06SElliott Hughes const int ret = stat(path.c_str(), &st);
484*07fb1d06SElliott Hughes if (ret < 0) {
485*07fb1d06SElliott Hughes PLOG(ERROR) << "Failed to stat " << path << " ret: " << ret;
486*07fb1d06SElliott Hughes return {};
487*07fb1d06SElliott Hughes }
488*07fb1d06SElliott Hughes LRUCache::Value data = std::make_shared<std::vector<uint8_t>>(st.st_size);
489*07fb1d06SElliott Hughes const auto bytes_read =
490*07fb1d06SElliott Hughes TEMP_FAILURE_RETRY(pread(fd, data->data(), data->size(), 0));
491*07fb1d06SElliott Hughes if (static_cast<size_t>(bytes_read) != data->size()) {
492*07fb1d06SElliott Hughes PLOG(ERROR) << "Failed to read " << data->size() << " bytes data from "
493*07fb1d06SElliott Hughes << path;
494*07fb1d06SElliott Hughes return {};
495*07fb1d06SElliott Hughes }
496*07fb1d06SElliott Hughes return data;
497*07fb1d06SElliott Hughes }
498*07fb1d06SElliott Hughes
~LRUCache()499*07fb1d06SElliott Hughes LRUCache::~LRUCache() {
500*07fb1d06SElliott Hughes std::error_code ec;
501*07fb1d06SElliott Hughes std::filesystem::remove_all(tmpdir_, ec);
502*07fb1d06SElliott Hughes if (ec) {
503*07fb1d06SElliott Hughes LOG(ERROR) << "Failed to rm -rf " << tmpdir_ << " " << ec.message();
504*07fb1d06SElliott Hughes }
505*07fb1d06SElliott Hughes }
506*07fb1d06SElliott Hughes
WriteToDisk(const LRUCache::Key & key,const LRUCache::Value & value)507*07fb1d06SElliott Hughes bool LRUCache::WriteToDisk(const LRUCache::Key& key,
508*07fb1d06SElliott Hughes const LRUCache::Value& value) {
509*07fb1d06SElliott Hughes if (tmpdir_.empty()) {
510*07fb1d06SElliott Hughes return false;
511*07fb1d06SElliott Hughes }
512*07fb1d06SElliott Hughes const auto path = tmpdir_ / std::to_string(key);
513*07fb1d06SElliott Hughes int fd = open(path.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_CLOEXEC, 0644);
514*07fb1d06SElliott Hughes if (fd < 0) {
515*07fb1d06SElliott Hughes PLOG(ERROR) << "Failed to open " << path;
516*07fb1d06SElliott Hughes return false;
517*07fb1d06SElliott Hughes }
518*07fb1d06SElliott Hughes auto fd_delete = [](int* fd) { close(*fd); };
519*07fb1d06SElliott Hughes std::unique_ptr<int, decltype(fd_delete)> guard(&fd, fd_delete);
520*07fb1d06SElliott Hughes const auto written =
521*07fb1d06SElliott Hughes TEMP_FAILURE_RETRY(write(fd, value->data(), value->size()));
522*07fb1d06SElliott Hughes if (static_cast<size_t>(written) != value->size()) {
523*07fb1d06SElliott Hughes PLOG(ERROR) << "Failed to write " << value->size() << " bytes data to "
524*07fb1d06SElliott Hughes << path;
525*07fb1d06SElliott Hughes return false;
526*07fb1d06SElliott Hughes }
527*07fb1d06SElliott Hughes close(fd);
528*07fb1d06SElliott Hughes guard.release();
529*07fb1d06SElliott Hughes ondisk_items_.insert(key);
530*07fb1d06SElliott Hughes return true;
531*07fb1d06SElliott Hughes }
532*07fb1d06SElliott Hughes
put(const LRUCache::Key & key,const LRUCache::Value & value)533*07fb1d06SElliott Hughes void LRUCache::put(const LRUCache::Key& key, const LRUCache::Value& value) {
534*07fb1d06SElliott Hughes auto it = items_map_.find(key);
535*07fb1d06SElliott Hughes if (it != items_map_.end()) {
536*07fb1d06SElliott Hughes cache_size_ -= it->second->second->capacity();
537*07fb1d06SElliott Hughes items_list_.erase(it->second);
538*07fb1d06SElliott Hughes items_map_.erase(it);
539*07fb1d06SElliott Hughes }
540*07fb1d06SElliott Hughes EnsureSpace(value->capacity());
541*07fb1d06SElliott Hughes cache_size_ += value->capacity();
542*07fb1d06SElliott Hughes items_list_.push_front(key_value_pair_t(key, value));
543*07fb1d06SElliott Hughes items_map_[key] = items_list_.begin();
544*07fb1d06SElliott Hughes }
545*07fb1d06SElliott Hughes
EvictLRUItem()546*07fb1d06SElliott Hughes bool LRUCache::EvictLRUItem() {
547*07fb1d06SElliott Hughes if (items_list_.empty()) {
548*07fb1d06SElliott Hughes return false;
549*07fb1d06SElliott Hughes }
550*07fb1d06SElliott Hughes const auto last = items_list_.back();
551*07fb1d06SElliott Hughes cache_size_ -= last.second->capacity();
552*07fb1d06SElliott Hughes // Only write puffs large enough to disk, as disk writes have latency.
553*07fb1d06SElliott Hughes if (last.second->size() > 16 * 1024) {
554*07fb1d06SElliott Hughes WriteToDisk(last.first, last.second);
555*07fb1d06SElliott Hughes }
556*07fb1d06SElliott Hughes items_map_.erase(last.first);
557*07fb1d06SElliott Hughes items_list_.pop_back();
558*07fb1d06SElliott Hughes return true;
559*07fb1d06SElliott Hughes }
560*07fb1d06SElliott Hughes
EnsureSpace(size_t size)561*07fb1d06SElliott Hughes void LRUCache::EnsureSpace(size_t size) {
562*07fb1d06SElliott Hughes while (cache_size_ + size > max_size_) {
563*07fb1d06SElliott Hughes if (!EvictLRUItem()) {
564*07fb1d06SElliott Hughes return;
565*07fb1d06SElliott Hughes }
566*07fb1d06SElliott Hughes }
567*07fb1d06SElliott Hughes }
568*07fb1d06SElliott Hughes
GetTempDir()569*07fb1d06SElliott Hughes const char* GetTempDir() {
570*07fb1d06SElliott Hughes const char* tmpdir = getenv("TMPDIR");
571*07fb1d06SElliott Hughes if (tmpdir != nullptr) {
572*07fb1d06SElliott Hughes return tmpdir;
573*07fb1d06SElliott Hughes }
574*07fb1d06SElliott Hughes return "/tmp";
575*07fb1d06SElliott Hughes }
576*07fb1d06SElliott Hughes
LRUCache(size_t max_size)577*07fb1d06SElliott Hughes LRUCache::LRUCache(size_t max_size) : max_size_(max_size) {
578*07fb1d06SElliott Hughes std::error_code ec;
579*07fb1d06SElliott Hughes auto buffer = GetTempDir() + std::string("/lru.XXXXXX");
580*07fb1d06SElliott Hughes if (ec) {
581*07fb1d06SElliott Hughes LOG(ERROR) << "Failed to get temp directory for LRU cache " << ec.message()
582*07fb1d06SElliott Hughes << " " << ec.value();
583*07fb1d06SElliott Hughes return;
584*07fb1d06SElliott Hughes }
585*07fb1d06SElliott Hughes const char* dirname = mkdtemp(buffer.data());
586*07fb1d06SElliott Hughes if (dirname == nullptr) {
587*07fb1d06SElliott Hughes LOG(ERROR) << "Failed to mkdtemp " << buffer;
588*07fb1d06SElliott Hughes return;
589*07fb1d06SElliott Hughes }
590*07fb1d06SElliott Hughes
591*07fb1d06SElliott Hughes tmpdir_ = dirname;
592*07fb1d06SElliott Hughes }
593*07fb1d06SElliott Hughes
594*07fb1d06SElliott Hughes } // namespace puffin
595