1*07fb1d06SElliott Hughes // Copyright 2017 The ChromiumOS Authors
2*07fb1d06SElliott Hughes // Use of this source code is governed by a BSD-style license that can be
3*07fb1d06SElliott Hughes // found in the LICENSE file.
4*07fb1d06SElliott Hughes
5*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/puffer.h"
6*07fb1d06SElliott Hughes
7*07fb1d06SElliott Hughes #include <memory>
8*07fb1d06SElliott Hughes #include <string>
9*07fb1d06SElliott Hughes #include <vector>
10*07fb1d06SElliott Hughes
11*07fb1d06SElliott Hughes #include "puffin/src/bit_reader.h"
12*07fb1d06SElliott Hughes #include "puffin/src/huffman_table.h"
13*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/common.h"
14*07fb1d06SElliott Hughes #include "puffin/src/logging.h"
15*07fb1d06SElliott Hughes #include "puffin/src/puff_data.h"
16*07fb1d06SElliott Hughes #include "puffin/src/puff_writer.h"
17*07fb1d06SElliott Hughes
18*07fb1d06SElliott Hughes using std::string;
19*07fb1d06SElliott Hughes using std::vector;
20*07fb1d06SElliott Hughes
21*07fb1d06SElliott Hughes namespace puffin {
22*07fb1d06SElliott Hughes
Puffer(bool exclude_bad_distance_caches)23*07fb1d06SElliott Hughes Puffer::Puffer(bool exclude_bad_distance_caches)
24*07fb1d06SElliott Hughes : dyn_ht_(new HuffmanTable()),
25*07fb1d06SElliott Hughes fix_ht_(new HuffmanTable()),
26*07fb1d06SElliott Hughes exclude_bad_distance_caches_(exclude_bad_distance_caches) {}
27*07fb1d06SElliott Hughes
Puffer()28*07fb1d06SElliott Hughes Puffer::Puffer() : Puffer(false) {}
29*07fb1d06SElliott Hughes
~Puffer()30*07fb1d06SElliott Hughes Puffer::~Puffer() {}
31*07fb1d06SElliott Hughes
PuffDeflate(BitReaderInterface * br,PuffWriterInterface * pw,vector<BitExtent> * deflates) const32*07fb1d06SElliott Hughes bool Puffer::PuffDeflate(BitReaderInterface* br,
33*07fb1d06SElliott Hughes PuffWriterInterface* pw,
34*07fb1d06SElliott Hughes vector<BitExtent>* deflates) const {
35*07fb1d06SElliott Hughes PuffData pd;
36*07fb1d06SElliott Hughes HuffmanTable* cur_ht;
37*07fb1d06SElliott Hughes bool end_loop = false;
38*07fb1d06SElliott Hughes // No bits left to read, return. We try to cache at least eight bits because
39*07fb1d06SElliott Hughes // the minimum length of a deflate bit stream is 8: (fixed huffman table) 3
40*07fb1d06SElliott Hughes // bits header + 5 bits just one len/dist symbol.
41*07fb1d06SElliott Hughes while (!end_loop && br->CacheBits(8)) {
42*07fb1d06SElliott Hughes auto start_bit_offset = br->OffsetInBits();
43*07fb1d06SElliott Hughes
44*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(br->CacheBits(3));
45*07fb1d06SElliott Hughes uint8_t final_bit = br->ReadBits(1); // BFINAL
46*07fb1d06SElliott Hughes br->DropBits(1);
47*07fb1d06SElliott Hughes uint8_t type = br->ReadBits(2); // BTYPE
48*07fb1d06SElliott Hughes br->DropBits(2);
49*07fb1d06SElliott Hughes DVLOG(2) << "Read block type: "
50*07fb1d06SElliott Hughes << BlockTypeToString(static_cast<BlockType>(type));
51*07fb1d06SElliott Hughes
52*07fb1d06SElliott Hughes // If it is the final block and we are just looking for deflate locations,
53*07fb1d06SElliott Hughes // we consider this the end of the search.
54*07fb1d06SElliott Hughes if (deflates != nullptr && final_bit) {
55*07fb1d06SElliott Hughes end_loop = true;
56*07fb1d06SElliott Hughes }
57*07fb1d06SElliott Hughes
58*07fb1d06SElliott Hughes // Header structure
59*07fb1d06SElliott Hughes // +-+-+-+-+-+-+-+-+
60*07fb1d06SElliott Hughes // |F| TP| SKIP |
61*07fb1d06SElliott Hughes // +-+-+-+-+-+-+-+-+
62*07fb1d06SElliott Hughes // F -> final_bit
63*07fb1d06SElliott Hughes // TP -> type
64*07fb1d06SElliott Hughes // SKIP -> skipped_bits (only in kUncompressed type)
65*07fb1d06SElliott Hughes auto block_header = (final_bit << 7) | (type << 5);
66*07fb1d06SElliott Hughes switch (static_cast<BlockType>(type)) {
67*07fb1d06SElliott Hughes case BlockType::kUncompressed: {
68*07fb1d06SElliott Hughes auto skipped_bits = br->ReadBoundaryBits();
69*07fb1d06SElliott Hughes br->SkipBoundaryBits();
70*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(br->CacheBits(32));
71*07fb1d06SElliott Hughes auto len = br->ReadBits(16); // LEN
72*07fb1d06SElliott Hughes br->DropBits(16);
73*07fb1d06SElliott Hughes auto nlen = br->ReadBits(16); // NLEN
74*07fb1d06SElliott Hughes br->DropBits(16);
75*07fb1d06SElliott Hughes
76*07fb1d06SElliott Hughes if ((len ^ nlen) != 0xFFFF) {
77*07fb1d06SElliott Hughes LOG(ERROR) << "Length of uncompressed data is invalid;"
78*07fb1d06SElliott Hughes << " LEN(" << len << ") NLEN(" << nlen << ")";
79*07fb1d06SElliott Hughes return false;
80*07fb1d06SElliott Hughes }
81*07fb1d06SElliott Hughes
82*07fb1d06SElliott Hughes // Put skipped bits into header.
83*07fb1d06SElliott Hughes block_header |= skipped_bits;
84*07fb1d06SElliott Hughes
85*07fb1d06SElliott Hughes // Insert block header.
86*07fb1d06SElliott Hughes pd.type = PuffData::Type::kBlockMetadata;
87*07fb1d06SElliott Hughes pd.block_metadata[0] = block_header;
88*07fb1d06SElliott Hughes pd.length = 1;
89*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(pw->Insert(pd));
90*07fb1d06SElliott Hughes
91*07fb1d06SElliott Hughes // Insert all the raw literals.
92*07fb1d06SElliott Hughes pd.type = PuffData::Type::kLiterals;
93*07fb1d06SElliott Hughes pd.length = len;
94*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(br->GetByteReaderFn(pd.length, &pd.read_fn));
95*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(pw->Insert(pd));
96*07fb1d06SElliott Hughes
97*07fb1d06SElliott Hughes pd.type = PuffData::Type::kEndOfBlock;
98*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(pw->Insert(pd));
99*07fb1d06SElliott Hughes
100*07fb1d06SElliott Hughes // There is no need to insert the location of uncompressed deflates
101*07fb1d06SElliott Hughes // because we do not want the uncompressed blocks when trying to find
102*07fb1d06SElliott Hughes // the bit-addressed location of deflates. They better be ignored.
103*07fb1d06SElliott Hughes
104*07fb1d06SElliott Hughes // continue the loop. Do not read any literal/length/distance.
105*07fb1d06SElliott Hughes continue;
106*07fb1d06SElliott Hughes }
107*07fb1d06SElliott Hughes
108*07fb1d06SElliott Hughes case BlockType::kFixed:
109*07fb1d06SElliott Hughes fix_ht_->BuildFixedHuffmanTable();
110*07fb1d06SElliott Hughes cur_ht = fix_ht_.get();
111*07fb1d06SElliott Hughes pd.type = PuffData::Type::kBlockMetadata;
112*07fb1d06SElliott Hughes pd.block_metadata[0] = block_header;
113*07fb1d06SElliott Hughes pd.length = 1;
114*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(pw->Insert(pd));
115*07fb1d06SElliott Hughes break;
116*07fb1d06SElliott Hughes
117*07fb1d06SElliott Hughes case BlockType::kDynamic:
118*07fb1d06SElliott Hughes pd.type = PuffData::Type::kBlockMetadata;
119*07fb1d06SElliott Hughes pd.block_metadata[0] = block_header;
120*07fb1d06SElliott Hughes pd.length = sizeof(pd.block_metadata) - 1;
121*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(dyn_ht_->BuildDynamicHuffmanTable(
122*07fb1d06SElliott Hughes br, &pd.block_metadata[1], &pd.length));
123*07fb1d06SElliott Hughes pd.length += 1; // For the header.
124*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(pw->Insert(pd));
125*07fb1d06SElliott Hughes cur_ht = dyn_ht_.get();
126*07fb1d06SElliott Hughes break;
127*07fb1d06SElliott Hughes
128*07fb1d06SElliott Hughes default:
129*07fb1d06SElliott Hughes LOG(ERROR) << "Invalid block compression type: "
130*07fb1d06SElliott Hughes << static_cast<int>(type);
131*07fb1d06SElliott Hughes return false;
132*07fb1d06SElliott Hughes }
133*07fb1d06SElliott Hughes
134*07fb1d06SElliott Hughes // If true and the list of output |deflates| is non-null, the current
135*07fb1d06SElliott Hughes // deflate location will be added to that list.
136*07fb1d06SElliott Hughes bool include_deflate = true;
137*07fb1d06SElliott Hughes
138*07fb1d06SElliott Hughes while (true) { // Breaks when the end of block is reached.
139*07fb1d06SElliott Hughes auto max_bits = cur_ht->LitLenMaxBits();
140*07fb1d06SElliott Hughes if (!br->CacheBits(max_bits)) {
141*07fb1d06SElliott Hughes // It could be the end of buffer and the bit length of the end_of_block
142*07fb1d06SElliott Hughes // symbol has less than maximum bit length of current Huffman table. So
143*07fb1d06SElliott Hughes // only asking for the size of end of block symbol (256).
144*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(cur_ht->EndOfBlockBitLength(&max_bits));
145*07fb1d06SElliott Hughes }
146*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(br->CacheBits(max_bits));
147*07fb1d06SElliott Hughes auto bits = br->ReadBits(max_bits);
148*07fb1d06SElliott Hughes uint16_t lit_len_alphabet;
149*07fb1d06SElliott Hughes size_t nbits;
150*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(
151*07fb1d06SElliott Hughes cur_ht->LitLenAlphabet(bits, &lit_len_alphabet, &nbits));
152*07fb1d06SElliott Hughes br->DropBits(nbits);
153*07fb1d06SElliott Hughes if (lit_len_alphabet < 256) {
154*07fb1d06SElliott Hughes pd.type = PuffData::Type::kLiteral;
155*07fb1d06SElliott Hughes pd.byte = lit_len_alphabet;
156*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(pw->Insert(pd));
157*07fb1d06SElliott Hughes
158*07fb1d06SElliott Hughes } else if (256 == lit_len_alphabet) {
159*07fb1d06SElliott Hughes pd.type = PuffData::Type::kEndOfBlock;
160*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(pw->Insert(pd));
161*07fb1d06SElliott Hughes if (deflates != nullptr && include_deflate) {
162*07fb1d06SElliott Hughes deflates->emplace_back(start_bit_offset,
163*07fb1d06SElliott Hughes br->OffsetInBits() - start_bit_offset);
164*07fb1d06SElliott Hughes }
165*07fb1d06SElliott Hughes break; // Breaks the loop.
166*07fb1d06SElliott Hughes } else {
167*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(lit_len_alphabet <= 285);
168*07fb1d06SElliott Hughes // Reading length.
169*07fb1d06SElliott Hughes auto len_code_start = lit_len_alphabet - 257;
170*07fb1d06SElliott Hughes auto extra_bits_len = kLengthExtraBits[len_code_start];
171*07fb1d06SElliott Hughes uint16_t extra_bits_value = 0;
172*07fb1d06SElliott Hughes if (extra_bits_len) {
173*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
174*07fb1d06SElliott Hughes extra_bits_value = br->ReadBits(extra_bits_len);
175*07fb1d06SElliott Hughes br->DropBits(extra_bits_len);
176*07fb1d06SElliott Hughes }
177*07fb1d06SElliott Hughes auto length = kLengthBases[len_code_start] + extra_bits_value;
178*07fb1d06SElliott Hughes
179*07fb1d06SElliott Hughes auto bits_to_cache = cur_ht->DistanceMaxBits();
180*07fb1d06SElliott Hughes if (!br->CacheBits(bits_to_cache)) {
181*07fb1d06SElliott Hughes // This is a corner case that is present in the older versions of the
182*07fb1d06SElliott Hughes // puffin. So we need to catch it and correctly discard this kind of
183*07fb1d06SElliott Hughes // deflate when we encounter it. See crbug.com/915559 for more info.
184*07fb1d06SElliott Hughes bits_to_cache = br->BitsRemaining();
185*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(br->CacheBits(bits_to_cache));
186*07fb1d06SElliott Hughes if (exclude_bad_distance_caches_) {
187*07fb1d06SElliott Hughes include_deflate = false;
188*07fb1d06SElliott Hughes }
189*07fb1d06SElliott Hughes LOG(WARNING) << "A rare condition that older puffin clients fail to"
190*07fb1d06SElliott Hughes << " recognize happened. Nothing to worry about."
191*07fb1d06SElliott Hughes << " See crbug.com/915559";
192*07fb1d06SElliott Hughes }
193*07fb1d06SElliott Hughes auto bits = br->ReadBits(bits_to_cache);
194*07fb1d06SElliott Hughes uint16_t distance_alphabet;
195*07fb1d06SElliott Hughes size_t nbits;
196*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(
197*07fb1d06SElliott Hughes cur_ht->DistanceAlphabet(bits, &distance_alphabet, &nbits));
198*07fb1d06SElliott Hughes br->DropBits(nbits);
199*07fb1d06SElliott Hughes
200*07fb1d06SElliott Hughes // Reading distance.
201*07fb1d06SElliott Hughes extra_bits_len = kDistanceExtraBits[distance_alphabet];
202*07fb1d06SElliott Hughes extra_bits_value = 0;
203*07fb1d06SElliott Hughes if (extra_bits_len) {
204*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
205*07fb1d06SElliott Hughes extra_bits_value = br->ReadBits(extra_bits_len);
206*07fb1d06SElliott Hughes br->DropBits(extra_bits_len);
207*07fb1d06SElliott Hughes }
208*07fb1d06SElliott Hughes
209*07fb1d06SElliott Hughes pd.type = PuffData::Type::kLenDist;
210*07fb1d06SElliott Hughes pd.length = length;
211*07fb1d06SElliott Hughes pd.distance = kDistanceBases[distance_alphabet] + extra_bits_value;
212*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(pw->Insert(pd));
213*07fb1d06SElliott Hughes }
214*07fb1d06SElliott Hughes }
215*07fb1d06SElliott Hughes }
216*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(pw->Flush());
217*07fb1d06SElliott Hughes return true;
218*07fb1d06SElliott Hughes }
219*07fb1d06SElliott Hughes
220*07fb1d06SElliott Hughes } // namespace puffin
221