1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "puffin/src/include/puffin/puffer.h"
6
7 #include <memory>
8 #include <string>
9 #include <vector>
10
11 #include "puffin/src/bit_reader.h"
12 #include "puffin/src/huffman_table.h"
13 #include "puffin/src/include/puffin/common.h"
14 #include "puffin/src/logging.h"
15 #include "puffin/src/puff_data.h"
16 #include "puffin/src/puff_writer.h"
17
18 using std::string;
19 using std::vector;
20
21 namespace puffin {
22
Puffer(bool exclude_bad_distance_caches)23 Puffer::Puffer(bool exclude_bad_distance_caches)
24 : dyn_ht_(new HuffmanTable()),
25 fix_ht_(new HuffmanTable()),
26 exclude_bad_distance_caches_(exclude_bad_distance_caches) {}
27
Puffer()28 Puffer::Puffer() : Puffer(false) {}
29
~Puffer()30 Puffer::~Puffer() {}
31
PuffDeflate(BitReaderInterface * br,PuffWriterInterface * pw,vector<BitExtent> * deflates) const32 bool Puffer::PuffDeflate(BitReaderInterface* br,
33 PuffWriterInterface* pw,
34 vector<BitExtent>* deflates) const {
35 PuffData pd;
36 HuffmanTable* cur_ht;
37 bool end_loop = false;
38 // No bits left to read, return. We try to cache at least eight bits because
39 // the minimum length of a deflate bit stream is 8: (fixed huffman table) 3
40 // bits header + 5 bits just one len/dist symbol.
41 while (!end_loop && br->CacheBits(8)) {
42 auto start_bit_offset = br->OffsetInBits();
43
44 TEST_AND_RETURN_FALSE(br->CacheBits(3));
45 uint8_t final_bit = br->ReadBits(1); // BFINAL
46 br->DropBits(1);
47 uint8_t type = br->ReadBits(2); // BTYPE
48 br->DropBits(2);
49 DVLOG(2) << "Read block type: "
50 << BlockTypeToString(static_cast<BlockType>(type));
51
52 // If it is the final block and we are just looking for deflate locations,
53 // we consider this the end of the search.
54 if (deflates != nullptr && final_bit) {
55 end_loop = true;
56 }
57
58 // Header structure
59 // +-+-+-+-+-+-+-+-+
60 // |F| TP| SKIP |
61 // +-+-+-+-+-+-+-+-+
62 // F -> final_bit
63 // TP -> type
64 // SKIP -> skipped_bits (only in kUncompressed type)
65 auto block_header = (final_bit << 7) | (type << 5);
66 switch (static_cast<BlockType>(type)) {
67 case BlockType::kUncompressed: {
68 auto skipped_bits = br->ReadBoundaryBits();
69 br->SkipBoundaryBits();
70 TEST_AND_RETURN_FALSE(br->CacheBits(32));
71 auto len = br->ReadBits(16); // LEN
72 br->DropBits(16);
73 auto nlen = br->ReadBits(16); // NLEN
74 br->DropBits(16);
75
76 if ((len ^ nlen) != 0xFFFF) {
77 LOG(ERROR) << "Length of uncompressed data is invalid;"
78 << " LEN(" << len << ") NLEN(" << nlen << ")";
79 return false;
80 }
81
82 // Put skipped bits into header.
83 block_header |= skipped_bits;
84
85 // Insert block header.
86 pd.type = PuffData::Type::kBlockMetadata;
87 pd.block_metadata[0] = block_header;
88 pd.length = 1;
89 TEST_AND_RETURN_FALSE(pw->Insert(pd));
90
91 // Insert all the raw literals.
92 pd.type = PuffData::Type::kLiterals;
93 pd.length = len;
94 TEST_AND_RETURN_FALSE(br->GetByteReaderFn(pd.length, &pd.read_fn));
95 TEST_AND_RETURN_FALSE(pw->Insert(pd));
96
97 pd.type = PuffData::Type::kEndOfBlock;
98 TEST_AND_RETURN_FALSE(pw->Insert(pd));
99
100 // There is no need to insert the location of uncompressed deflates
101 // because we do not want the uncompressed blocks when trying to find
102 // the bit-addressed location of deflates. They better be ignored.
103
104 // continue the loop. Do not read any literal/length/distance.
105 continue;
106 }
107
108 case BlockType::kFixed:
109 fix_ht_->BuildFixedHuffmanTable();
110 cur_ht = fix_ht_.get();
111 pd.type = PuffData::Type::kBlockMetadata;
112 pd.block_metadata[0] = block_header;
113 pd.length = 1;
114 TEST_AND_RETURN_FALSE(pw->Insert(pd));
115 break;
116
117 case BlockType::kDynamic:
118 pd.type = PuffData::Type::kBlockMetadata;
119 pd.block_metadata[0] = block_header;
120 pd.length = sizeof(pd.block_metadata) - 1;
121 TEST_AND_RETURN_FALSE(dyn_ht_->BuildDynamicHuffmanTable(
122 br, &pd.block_metadata[1], &pd.length));
123 pd.length += 1; // For the header.
124 TEST_AND_RETURN_FALSE(pw->Insert(pd));
125 cur_ht = dyn_ht_.get();
126 break;
127
128 default:
129 LOG(ERROR) << "Invalid block compression type: "
130 << static_cast<int>(type);
131 return false;
132 }
133
134 // If true and the list of output |deflates| is non-null, the current
135 // deflate location will be added to that list.
136 bool include_deflate = true;
137
138 while (true) { // Breaks when the end of block is reached.
139 auto max_bits = cur_ht->LitLenMaxBits();
140 if (!br->CacheBits(max_bits)) {
141 // It could be the end of buffer and the bit length of the end_of_block
142 // symbol has less than maximum bit length of current Huffman table. So
143 // only asking for the size of end of block symbol (256).
144 TEST_AND_RETURN_FALSE(cur_ht->EndOfBlockBitLength(&max_bits));
145 }
146 TEST_AND_RETURN_FALSE(br->CacheBits(max_bits));
147 auto bits = br->ReadBits(max_bits);
148 uint16_t lit_len_alphabet;
149 size_t nbits;
150 TEST_AND_RETURN_FALSE(
151 cur_ht->LitLenAlphabet(bits, &lit_len_alphabet, &nbits));
152 br->DropBits(nbits);
153 if (lit_len_alphabet < 256) {
154 pd.type = PuffData::Type::kLiteral;
155 pd.byte = lit_len_alphabet;
156 TEST_AND_RETURN_FALSE(pw->Insert(pd));
157
158 } else if (256 == lit_len_alphabet) {
159 pd.type = PuffData::Type::kEndOfBlock;
160 TEST_AND_RETURN_FALSE(pw->Insert(pd));
161 if (deflates != nullptr && include_deflate) {
162 deflates->emplace_back(start_bit_offset,
163 br->OffsetInBits() - start_bit_offset);
164 }
165 break; // Breaks the loop.
166 } else {
167 TEST_AND_RETURN_FALSE(lit_len_alphabet <= 285);
168 // Reading length.
169 auto len_code_start = lit_len_alphabet - 257;
170 auto extra_bits_len = kLengthExtraBits[len_code_start];
171 uint16_t extra_bits_value = 0;
172 if (extra_bits_len) {
173 TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
174 extra_bits_value = br->ReadBits(extra_bits_len);
175 br->DropBits(extra_bits_len);
176 }
177 auto length = kLengthBases[len_code_start] + extra_bits_value;
178
179 auto bits_to_cache = cur_ht->DistanceMaxBits();
180 if (!br->CacheBits(bits_to_cache)) {
181 // This is a corner case that is present in the older versions of the
182 // puffin. So we need to catch it and correctly discard this kind of
183 // deflate when we encounter it. See crbug.com/915559 for more info.
184 bits_to_cache = br->BitsRemaining();
185 TEST_AND_RETURN_FALSE(br->CacheBits(bits_to_cache));
186 if (exclude_bad_distance_caches_) {
187 include_deflate = false;
188 }
189 LOG(WARNING) << "A rare condition that older puffin clients fail to"
190 << " recognize happened. Nothing to worry about."
191 << " See crbug.com/915559";
192 }
193 auto bits = br->ReadBits(bits_to_cache);
194 uint16_t distance_alphabet;
195 size_t nbits;
196 TEST_AND_RETURN_FALSE(
197 cur_ht->DistanceAlphabet(bits, &distance_alphabet, &nbits));
198 br->DropBits(nbits);
199
200 // Reading distance.
201 extra_bits_len = kDistanceExtraBits[distance_alphabet];
202 extra_bits_value = 0;
203 if (extra_bits_len) {
204 TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
205 extra_bits_value = br->ReadBits(extra_bits_len);
206 br->DropBits(extra_bits_len);
207 }
208
209 pd.type = PuffData::Type::kLenDist;
210 pd.length = length;
211 pd.distance = kDistanceBases[distance_alphabet] + extra_bits_value;
212 TEST_AND_RETURN_FALSE(pw->Insert(pd));
213 }
214 }
215 }
216 TEST_AND_RETURN_FALSE(pw->Flush());
217 return true;
218 }
219
220 } // namespace puffin
221