1*07fb1d06SElliott Hughes // Copyright 2017 The ChromiumOS Authors
2*07fb1d06SElliott Hughes // Use of this source code is governed by a BSD-style license that can be
3*07fb1d06SElliott Hughes // found in the LICENSE file.
4*07fb1d06SElliott Hughes
5*07fb1d06SElliott Hughes #include "puffin/src/huffman_table.h"
6*07fb1d06SElliott Hughes
7*07fb1d06SElliott Hughes #include <algorithm>
8*07fb1d06SElliott Hughes #include <vector>
9*07fb1d06SElliott Hughes
10*07fb1d06SElliott Hughes #include "puffin/src/logging.h"
11*07fb1d06SElliott Hughes
12*07fb1d06SElliott Hughes using std::string;
13*07fb1d06SElliott Hughes using std::vector;
14*07fb1d06SElliott Hughes
15*07fb1d06SElliott Hughes namespace puffin {
16*07fb1d06SElliott Hughes
17*07fb1d06SElliott Hughes // Permutations of input Huffman code lengths (used only to read code lengths
18*07fb1d06SElliott Hughes // necessary for reading Huffman table.)
19*07fb1d06SElliott Hughes const uint8_t kPermutations[19] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5,
20*07fb1d06SElliott Hughes 11, 4, 12, 3, 13, 2, 14, 1, 15};
21*07fb1d06SElliott Hughes
22*07fb1d06SElliott Hughes // The bases of each alphabet which is added to the integer value of extra
23*07fb1d06SElliott Hughes // bits that comes after the Huffman code in the input to create the given
24*07fb1d06SElliott Hughes // length value. The last element is a guard.
25*07fb1d06SElliott Hughes const uint16_t kLengthBases[30] = {
26*07fb1d06SElliott Hughes 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27,
27*07fb1d06SElliott Hughes 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0xFFFF};
28*07fb1d06SElliott Hughes
29*07fb1d06SElliott Hughes // Number of extra bits that comes after the associating Huffman code.
30*07fb1d06SElliott Hughes const uint8_t kLengthExtraBits[29] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
31*07fb1d06SElliott Hughes 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
32*07fb1d06SElliott Hughes 4, 4, 4, 4, 5, 5, 5, 5, 0};
33*07fb1d06SElliott Hughes
34*07fb1d06SElliott Hughes // Same as |kLengthBases| but for the distances instead of lengths. The last
35*07fb1d06SElliott Hughes // element is a guard.
36*07fb1d06SElliott Hughes const uint16_t kDistanceBases[31] = {
37*07fb1d06SElliott Hughes 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33,
38*07fb1d06SElliott Hughes 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537,
39*07fb1d06SElliott Hughes 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0xFFFF};
40*07fb1d06SElliott Hughes
41*07fb1d06SElliott Hughes // Same as |kLengthExtraBits| but for distances instead of lengths.
42*07fb1d06SElliott Hughes const uint8_t kDistanceExtraBits[30] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
43*07fb1d06SElliott Hughes 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
44*07fb1d06SElliott Hughes 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
45*07fb1d06SElliott Hughes
46*07fb1d06SElliott Hughes // 288 is the maximum number of needed huffman codes for an alphabet. Fixed
47*07fb1d06SElliott Hughes // huffman table needs 288 and dynamic huffman table needs maximum 286.
48*07fb1d06SElliott Hughes // 286 = 256 (coding a byte) +
49*07fb1d06SElliott Hughes // 1 (coding the end of block symbole) +
50*07fb1d06SElliott Hughes // 29 (coding the lengths)
HuffmanTable()51*07fb1d06SElliott Hughes HuffmanTable::HuffmanTable() : codeindexpairs_(288), initialized_(false) {}
52*07fb1d06SElliott Hughes
InitHuffmanCodes(const Buffer & lens,size_t * max_bits)53*07fb1d06SElliott Hughes bool HuffmanTable::InitHuffmanCodes(const Buffer& lens, size_t* max_bits) {
54*07fb1d06SElliott Hughes // Temporary buffers used in |InitHuffmanCodes|.
55*07fb1d06SElliott Hughes uint16_t len_count_[kMaxHuffmanBits + 1] = {0};
56*07fb1d06SElliott Hughes uint16_t next_code_[kMaxHuffmanBits + 1] = {0};
57*07fb1d06SElliott Hughes
58*07fb1d06SElliott Hughes // 1. Count the number of codes for each length;
59*07fb1d06SElliott Hughes for (auto len : lens) {
60*07fb1d06SElliott Hughes len_count_[len]++;
61*07fb1d06SElliott Hughes }
62*07fb1d06SElliott Hughes
63*07fb1d06SElliott Hughes for (*max_bits = kMaxHuffmanBits; *max_bits >= 1; (*max_bits)--) {
64*07fb1d06SElliott Hughes if (len_count_[*max_bits] != 0) {
65*07fb1d06SElliott Hughes break;
66*07fb1d06SElliott Hughes }
67*07fb1d06SElliott Hughes }
68*07fb1d06SElliott Hughes
69*07fb1d06SElliott Hughes // Check for oversubscribed code lengths. (A code with length 'L' cannot have
70*07fb1d06SElliott Hughes // more than 2^L items.
71*07fb1d06SElliott Hughes for (size_t idx = 1; idx <= *max_bits; idx++) {
72*07fb1d06SElliott Hughes if (len_count_[idx] > (1 << idx)) {
73*07fb1d06SElliott Hughes LOG(ERROR) << "Oversubscribed code lengths error!";
74*07fb1d06SElliott Hughes return false;
75*07fb1d06SElliott Hughes }
76*07fb1d06SElliott Hughes }
77*07fb1d06SElliott Hughes
78*07fb1d06SElliott Hughes // 2. Compute the coding of the first element for each length.
79*07fb1d06SElliott Hughes uint16_t code = 0;
80*07fb1d06SElliott Hughes len_count_[0] = 0;
81*07fb1d06SElliott Hughes for (size_t bits = 1; bits <= kMaxHuffmanBits; bits++) {
82*07fb1d06SElliott Hughes code = (code + len_count_[bits - 1]) << 1;
83*07fb1d06SElliott Hughes next_code_[bits] = code;
84*07fb1d06SElliott Hughes }
85*07fb1d06SElliott Hughes
86*07fb1d06SElliott Hughes codeindexpairs_.clear();
87*07fb1d06SElliott Hughes // 3. Calculate all the code values.
88*07fb1d06SElliott Hughes for (size_t idx = 0; idx < lens.size(); idx++) {
89*07fb1d06SElliott Hughes auto len = lens[idx];
90*07fb1d06SElliott Hughes if (len == 0) {
91*07fb1d06SElliott Hughes continue;
92*07fb1d06SElliott Hughes }
93*07fb1d06SElliott Hughes
94*07fb1d06SElliott Hughes // Reverse the code
95*07fb1d06SElliott Hughes CodeIndexPair cip;
96*07fb1d06SElliott Hughes cip.code = 0;
97*07fb1d06SElliott Hughes auto tmp_code = next_code_[len];
98*07fb1d06SElliott Hughes for (size_t r = 0; r < len; r++) {
99*07fb1d06SElliott Hughes cip.code <<= 1;
100*07fb1d06SElliott Hughes cip.code |= tmp_code & 1U;
101*07fb1d06SElliott Hughes tmp_code >>= 1;
102*07fb1d06SElliott Hughes }
103*07fb1d06SElliott Hughes cip.index = idx;
104*07fb1d06SElliott Hughes codeindexpairs_.push_back(cip);
105*07fb1d06SElliott Hughes next_code_[len]++;
106*07fb1d06SElliott Hughes }
107*07fb1d06SElliott Hughes return true;
108*07fb1d06SElliott Hughes }
109*07fb1d06SElliott Hughes
BuildHuffmanCodes(const Buffer & lens,vector<uint16_t> * hcodes,size_t * max_bits)110*07fb1d06SElliott Hughes bool HuffmanTable::BuildHuffmanCodes(const Buffer& lens,
111*07fb1d06SElliott Hughes vector<uint16_t>* hcodes,
112*07fb1d06SElliott Hughes size_t* max_bits) {
113*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
114*07fb1d06SElliott Hughes // Sort descending based on the bit-length of the code.
115*07fb1d06SElliott Hughes std::sort(codeindexpairs_.begin(), codeindexpairs_.end(),
116*07fb1d06SElliott Hughes [&lens](const CodeIndexPair& a, const CodeIndexPair& b) {
117*07fb1d06SElliott Hughes return lens[a.index] > lens[b.index];
118*07fb1d06SElliott Hughes });
119*07fb1d06SElliott Hughes
120*07fb1d06SElliott Hughes // Only zero out the part of hcodes which is valuable.
121*07fb1d06SElliott Hughes memset(hcodes->data(), 0, (1 << *max_bits) * sizeof(uint16_t));
122*07fb1d06SElliott Hughes for (const auto& cip : codeindexpairs_) {
123*07fb1d06SElliott Hughes // The MSB bit of the code in hcodes is set if it is a valid code and its
124*07fb1d06SElliott Hughes // code exists in the input Huffman table.
125*07fb1d06SElliott Hughes (*hcodes)[cip.code] = cip.index | 0x8000;
126*07fb1d06SElliott Hughes auto fill_bits = *max_bits - lens[cip.index];
127*07fb1d06SElliott Hughes for (auto idx = 1; idx < (1 << fill_bits); idx++) {
128*07fb1d06SElliott Hughes auto location = (idx << lens[cip.index]) | cip.code;
129*07fb1d06SElliott Hughes if (!((*hcodes)[location] & 0x8000)) {
130*07fb1d06SElliott Hughes (*hcodes)[location] = cip.index | 0x8000;
131*07fb1d06SElliott Hughes }
132*07fb1d06SElliott Hughes }
133*07fb1d06SElliott Hughes }
134*07fb1d06SElliott Hughes return true;
135*07fb1d06SElliott Hughes }
136*07fb1d06SElliott Hughes
BuildHuffmanReverseCodes(const Buffer & lens,vector<uint16_t> * rcodes,size_t * max_bits)137*07fb1d06SElliott Hughes bool HuffmanTable::BuildHuffmanReverseCodes(const Buffer& lens,
138*07fb1d06SElliott Hughes vector<uint16_t>* rcodes,
139*07fb1d06SElliott Hughes size_t* max_bits) {
140*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
141*07fb1d06SElliott Hughes // Sort ascending based on the index.
142*07fb1d06SElliott Hughes std::sort(codeindexpairs_.begin(), codeindexpairs_.end(),
143*07fb1d06SElliott Hughes [](const CodeIndexPair& a, const CodeIndexPair& b) -> bool {
144*07fb1d06SElliott Hughes return a.index < b.index;
145*07fb1d06SElliott Hughes });
146*07fb1d06SElliott Hughes
147*07fb1d06SElliott Hughes size_t index = 0;
148*07fb1d06SElliott Hughes for (size_t idx = 0; idx < rcodes->size(); idx++) {
149*07fb1d06SElliott Hughes if (index < codeindexpairs_.size() && idx == codeindexpairs_[index].index) {
150*07fb1d06SElliott Hughes (*rcodes)[idx] = codeindexpairs_[index].code;
151*07fb1d06SElliott Hughes index++;
152*07fb1d06SElliott Hughes } else {
153*07fb1d06SElliott Hughes (*rcodes)[idx] = 0;
154*07fb1d06SElliott Hughes }
155*07fb1d06SElliott Hughes }
156*07fb1d06SElliott Hughes return true;
157*07fb1d06SElliott Hughes }
158*07fb1d06SElliott Hughes
BuildFixedHuffmanTable()159*07fb1d06SElliott Hughes bool HuffmanTable::BuildFixedHuffmanTable() {
160*07fb1d06SElliott Hughes if (!initialized_) {
161*07fb1d06SElliott Hughes // For all the vectors used in this class, we set the size in the
162*07fb1d06SElliott Hughes // constructor and we do not change the size later. This is for optimization
163*07fb1d06SElliott Hughes // purposes. The total size of data in this class is approximately
164*07fb1d06SElliott Hughes // 2KB. Because it is a constructor return values cannot be checked.
165*07fb1d06SElliott Hughes lit_len_lens_.resize(288);
166*07fb1d06SElliott Hughes lit_len_rcodes_.resize(288);
167*07fb1d06SElliott Hughes lit_len_hcodes_.resize(1 << 9);
168*07fb1d06SElliott Hughes
169*07fb1d06SElliott Hughes distance_lens_.resize(30);
170*07fb1d06SElliott Hughes distance_rcodes_.resize(30);
171*07fb1d06SElliott Hughes distance_hcodes_.resize(1 << 5);
172*07fb1d06SElliott Hughes
173*07fb1d06SElliott Hughes size_t i = 0;
174*07fb1d06SElliott Hughes while (i < 144) {
175*07fb1d06SElliott Hughes lit_len_lens_[i++] = 8;
176*07fb1d06SElliott Hughes }
177*07fb1d06SElliott Hughes while (i < 256) {
178*07fb1d06SElliott Hughes lit_len_lens_[i++] = 9;
179*07fb1d06SElliott Hughes }
180*07fb1d06SElliott Hughes while (i < 280) {
181*07fb1d06SElliott Hughes lit_len_lens_[i++] = 7;
182*07fb1d06SElliott Hughes }
183*07fb1d06SElliott Hughes while (i < 288) {
184*07fb1d06SElliott Hughes lit_len_lens_[i++] = 8;
185*07fb1d06SElliott Hughes }
186*07fb1d06SElliott Hughes
187*07fb1d06SElliott Hughes i = 0;
188*07fb1d06SElliott Hughes while (i < 30) {
189*07fb1d06SElliott Hughes distance_lens_[i++] = 5;
190*07fb1d06SElliott Hughes }
191*07fb1d06SElliott Hughes
192*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(
193*07fb1d06SElliott Hughes BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_));
194*07fb1d06SElliott Hughes
195*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(BuildHuffmanCodes(distance_lens_, &distance_hcodes_,
196*07fb1d06SElliott Hughes &distance_max_bits_));
197*07fb1d06SElliott Hughes
198*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
199*07fb1d06SElliott Hughes lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_));
200*07fb1d06SElliott Hughes
201*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
202*07fb1d06SElliott Hughes distance_lens_, &distance_rcodes_, &distance_max_bits_));
203*07fb1d06SElliott Hughes
204*07fb1d06SElliott Hughes initialized_ = true;
205*07fb1d06SElliott Hughes }
206*07fb1d06SElliott Hughes return true;
207*07fb1d06SElliott Hughes }
208*07fb1d06SElliott Hughes
BuildDynamicHuffmanTable(BitReaderInterface * br,uint8_t * buffer,size_t * length)209*07fb1d06SElliott Hughes bool HuffmanTable::BuildDynamicHuffmanTable(BitReaderInterface* br,
210*07fb1d06SElliott Hughes uint8_t* buffer,
211*07fb1d06SElliott Hughes size_t* length) {
212*07fb1d06SElliott Hughes // Initilize only once and reuse.
213*07fb1d06SElliott Hughes if (!initialized_) {
214*07fb1d06SElliott Hughes // Only resizing the arrays needed.
215*07fb1d06SElliott Hughes code_lens_.resize(19);
216*07fb1d06SElliott Hughes code_hcodes_.resize(1 << 7);
217*07fb1d06SElliott Hughes
218*07fb1d06SElliott Hughes lit_len_lens_.resize(286);
219*07fb1d06SElliott Hughes lit_len_hcodes_.resize(1 << 15);
220*07fb1d06SElliott Hughes
221*07fb1d06SElliott Hughes distance_lens_.resize(30);
222*07fb1d06SElliott Hughes distance_hcodes_.resize(1 << 15);
223*07fb1d06SElliott Hughes
224*07fb1d06SElliott Hughes // 286: Maximum number of literal/lengths symbols.
225*07fb1d06SElliott Hughes // 30: Maximum number of distance symbols.
226*07fb1d06SElliott Hughes // The reason we reserve this to the sum of both maximum sizes is that we
227*07fb1d06SElliott Hughes // need to calculate both huffman codes contiguously. See b/72815313.
228*07fb1d06SElliott Hughes tmp_lens_.resize(286 + 30);
229*07fb1d06SElliott Hughes initialized_ = true;
230*07fb1d06SElliott Hughes }
231*07fb1d06SElliott Hughes
232*07fb1d06SElliott Hughes // Read the header. Reads the first portion of the Huffman data from input and
233*07fb1d06SElliott Hughes // writes it into the puff |buffer|. The first portion includes the size
234*07fb1d06SElliott Hughes // (|num_lit_len|) of the literals/lengths Huffman code length array
235*07fb1d06SElliott Hughes // (|dynamic_lit_len_lens_|), the size (|num_distance|) of distance Huffman
236*07fb1d06SElliott Hughes // code length array (|dynamic_distance_lens_|), and the size (|num_codes|) of
237*07fb1d06SElliott Hughes // Huffman code length array (dynamic_code_lens_) for reading
238*07fb1d06SElliott Hughes // |dynamic_lit_len_lens_| and |dynamic_distance_lens_|. Then it follows by
239*07fb1d06SElliott Hughes // reading |dynamic_code_lens_|.
240*07fb1d06SElliott Hughes
241*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(*length >= 3);
242*07fb1d06SElliott Hughes size_t index = 0;
243*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(br->CacheBits(14));
244*07fb1d06SElliott Hughes buffer[index++] = br->ReadBits(5); // HLIST
245*07fb1d06SElliott Hughes auto num_lit_len = br->ReadBits(5) + 257;
246*07fb1d06SElliott Hughes br->DropBits(5);
247*07fb1d06SElliott Hughes
248*07fb1d06SElliott Hughes buffer[index++] = br->ReadBits(5); // HDIST
249*07fb1d06SElliott Hughes auto num_distance = br->ReadBits(5) + 1;
250*07fb1d06SElliott Hughes br->DropBits(5);
251*07fb1d06SElliott Hughes
252*07fb1d06SElliott Hughes buffer[index++] = br->ReadBits(4); // HCLEN
253*07fb1d06SElliott Hughes auto num_codes = br->ReadBits(4) + 4;
254*07fb1d06SElliott Hughes br->DropBits(4);
255*07fb1d06SElliott Hughes
256*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(
257*07fb1d06SElliott Hughes CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes));
258*07fb1d06SElliott Hughes
259*07fb1d06SElliott Hughes bool checked = false;
260*07fb1d06SElliott Hughes size_t idx = 0;
261*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(*length - index >= (num_codes + 1) / 2);
262*07fb1d06SElliott Hughes // Two codes per byte
263*07fb1d06SElliott Hughes for (; idx < num_codes; idx++) {
264*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(br->CacheBits(3));
265*07fb1d06SElliott Hughes code_lens_[kPermutations[idx]] = br->ReadBits(3);
266*07fb1d06SElliott Hughes if (checked) {
267*07fb1d06SElliott Hughes buffer[index++] |= br->ReadBits(3);
268*07fb1d06SElliott Hughes } else {
269*07fb1d06SElliott Hughes buffer[index] = br->ReadBits(3) << 4;
270*07fb1d06SElliott Hughes }
271*07fb1d06SElliott Hughes checked = !checked;
272*07fb1d06SElliott Hughes br->DropBits(3);
273*07fb1d06SElliott Hughes }
274*07fb1d06SElliott Hughes // Pad the last byte if odd number of codes.
275*07fb1d06SElliott Hughes if (checked) {
276*07fb1d06SElliott Hughes index++;
277*07fb1d06SElliott Hughes }
278*07fb1d06SElliott Hughes for (; idx < 19; idx++) {
279*07fb1d06SElliott Hughes code_lens_[kPermutations[idx]] = 0;
280*07fb1d06SElliott Hughes }
281*07fb1d06SElliott Hughes
282*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(
283*07fb1d06SElliott Hughes BuildHuffmanCodes(code_lens_, &code_hcodes_, &code_max_bits_));
284*07fb1d06SElliott Hughes
285*07fb1d06SElliott Hughes // Build literals/lengths and distance Huffman code length arrays.
286*07fb1d06SElliott Hughes auto bytes_available = (*length - index);
287*07fb1d06SElliott Hughes tmp_lens_.clear();
288*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(BuildHuffmanCodeLengths(
289*07fb1d06SElliott Hughes br, buffer + index, &bytes_available, code_max_bits_,
290*07fb1d06SElliott Hughes num_lit_len + num_distance, &tmp_lens_));
291*07fb1d06SElliott Hughes index += bytes_available;
292*07fb1d06SElliott Hughes
293*07fb1d06SElliott Hughes // TODO(ahassani): Optimize this so the memcpy is not needed anymore.
294*07fb1d06SElliott Hughes lit_len_lens_.clear();
295*07fb1d06SElliott Hughes lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
296*07fb1d06SElliott Hughes tmp_lens_.begin() + num_lit_len);
297*07fb1d06SElliott Hughes
298*07fb1d06SElliott Hughes distance_lens_.clear();
299*07fb1d06SElliott Hughes distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
300*07fb1d06SElliott Hughes tmp_lens_.end());
301*07fb1d06SElliott Hughes
302*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(
303*07fb1d06SElliott Hughes BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_));
304*07fb1d06SElliott Hughes
305*07fb1d06SElliott Hughes // Build distance Huffman codes.
306*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(BuildHuffmanCodes(distance_lens_, &distance_hcodes_,
307*07fb1d06SElliott Hughes &distance_max_bits_));
308*07fb1d06SElliott Hughes
309*07fb1d06SElliott Hughes *length = index;
310*07fb1d06SElliott Hughes return true;
311*07fb1d06SElliott Hughes }
312*07fb1d06SElliott Hughes
BuildHuffmanCodeLengths(BitReaderInterface * br,uint8_t * buffer,size_t * length,size_t max_bits,size_t num_codes,Buffer * lens)313*07fb1d06SElliott Hughes bool HuffmanTable::BuildHuffmanCodeLengths(BitReaderInterface* br,
314*07fb1d06SElliott Hughes uint8_t* buffer,
315*07fb1d06SElliott Hughes size_t* length,
316*07fb1d06SElliott Hughes size_t max_bits,
317*07fb1d06SElliott Hughes size_t num_codes,
318*07fb1d06SElliott Hughes Buffer* lens) {
319*07fb1d06SElliott Hughes size_t index = 0;
320*07fb1d06SElliott Hughes lens->clear();
321*07fb1d06SElliott Hughes for (size_t idx = 0; idx < num_codes;) {
322*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(br->CacheBits(max_bits));
323*07fb1d06SElliott Hughes auto bits = br->ReadBits(max_bits);
324*07fb1d06SElliott Hughes uint16_t code;
325*07fb1d06SElliott Hughes size_t nbits;
326*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(CodeAlphabet(bits, &code, &nbits));
327*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(index < *length);
328*07fb1d06SElliott Hughes br->DropBits(nbits);
329*07fb1d06SElliott Hughes if (code < 16) {
330*07fb1d06SElliott Hughes buffer[index++] = code;
331*07fb1d06SElliott Hughes lens->push_back(code);
332*07fb1d06SElliott Hughes idx++;
333*07fb1d06SElliott Hughes } else {
334*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(code < 19);
335*07fb1d06SElliott Hughes size_t copy_num = 0;
336*07fb1d06SElliott Hughes uint8_t copy_val;
337*07fb1d06SElliott Hughes switch (code) {
338*07fb1d06SElliott Hughes case 16:
339*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(idx != 0);
340*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(br->CacheBits(2));
341*07fb1d06SElliott Hughes copy_num = 3 + br->ReadBits(2);
342*07fb1d06SElliott Hughes buffer[index++] = 16 + br->ReadBits(2); // 3 - 6 times
343*07fb1d06SElliott Hughes copy_val = (*lens)[idx - 1];
344*07fb1d06SElliott Hughes br->DropBits(2);
345*07fb1d06SElliott Hughes break;
346*07fb1d06SElliott Hughes
347*07fb1d06SElliott Hughes case 17:
348*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(br->CacheBits(3));
349*07fb1d06SElliott Hughes copy_num = 3 + br->ReadBits(3);
350*07fb1d06SElliott Hughes buffer[index++] = 20 + br->ReadBits(3); // 3 - 10 times
351*07fb1d06SElliott Hughes copy_val = 0;
352*07fb1d06SElliott Hughes br->DropBits(3);
353*07fb1d06SElliott Hughes break;
354*07fb1d06SElliott Hughes
355*07fb1d06SElliott Hughes case 18:
356*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(br->CacheBits(7));
357*07fb1d06SElliott Hughes copy_num = 11 + br->ReadBits(7);
358*07fb1d06SElliott Hughes buffer[index++] = 28 + br->ReadBits(7); // 11 - 138 times
359*07fb1d06SElliott Hughes copy_val = 0;
360*07fb1d06SElliott Hughes br->DropBits(7);
361*07fb1d06SElliott Hughes break;
362*07fb1d06SElliott Hughes
363*07fb1d06SElliott Hughes default:
364*07fb1d06SElliott Hughes LOG(ERROR) << "Invalid code!";
365*07fb1d06SElliott Hughes return false;
366*07fb1d06SElliott Hughes }
367*07fb1d06SElliott Hughes idx += copy_num;
368*07fb1d06SElliott Hughes while (copy_num--) {
369*07fb1d06SElliott Hughes lens->push_back(copy_val);
370*07fb1d06SElliott Hughes }
371*07fb1d06SElliott Hughes }
372*07fb1d06SElliott Hughes }
373*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(lens->size() == num_codes);
374*07fb1d06SElliott Hughes *length = index;
375*07fb1d06SElliott Hughes return true;
376*07fb1d06SElliott Hughes }
377*07fb1d06SElliott Hughes
BuildDynamicHuffmanTable(const uint8_t * buffer,size_t length,BitWriterInterface * bw)378*07fb1d06SElliott Hughes bool HuffmanTable::BuildDynamicHuffmanTable(const uint8_t* buffer,
379*07fb1d06SElliott Hughes size_t length,
380*07fb1d06SElliott Hughes BitWriterInterface* bw) {
381*07fb1d06SElliott Hughes if (!initialized_) {
382*07fb1d06SElliott Hughes // Only resizing the arrays needed.
383*07fb1d06SElliott Hughes code_lens_.resize(19);
384*07fb1d06SElliott Hughes code_rcodes_.resize(19);
385*07fb1d06SElliott Hughes
386*07fb1d06SElliott Hughes lit_len_lens_.resize(286);
387*07fb1d06SElliott Hughes lit_len_rcodes_.resize(286);
388*07fb1d06SElliott Hughes
389*07fb1d06SElliott Hughes distance_lens_.resize(30);
390*07fb1d06SElliott Hughes distance_rcodes_.resize(30);
391*07fb1d06SElliott Hughes
392*07fb1d06SElliott Hughes tmp_lens_.resize(286 + 30);
393*07fb1d06SElliott Hughes
394*07fb1d06SElliott Hughes initialized_ = true;
395*07fb1d06SElliott Hughes }
396*07fb1d06SElliott Hughes
397*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(length >= 3);
398*07fb1d06SElliott Hughes size_t index = 0;
399*07fb1d06SElliott Hughes // Write the header.
400*07fb1d06SElliott Hughes size_t num_lit_len = buffer[index] + 257;
401*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bw->WriteBits(5, buffer[index++]));
402*07fb1d06SElliott Hughes
403*07fb1d06SElliott Hughes size_t num_distance = buffer[index] + 1;
404*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bw->WriteBits(5, buffer[index++]));
405*07fb1d06SElliott Hughes
406*07fb1d06SElliott Hughes size_t num_codes = buffer[index] + 4;
407*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bw->WriteBits(4, buffer[index++]));
408*07fb1d06SElliott Hughes
409*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(
410*07fb1d06SElliott Hughes CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes));
411*07fb1d06SElliott Hughes
412*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(length - index >= (num_codes + 1) / 2);
413*07fb1d06SElliott Hughes bool checked = false;
414*07fb1d06SElliott Hughes size_t idx = 0;
415*07fb1d06SElliott Hughes for (; idx < num_codes; idx++) {
416*07fb1d06SElliott Hughes uint8_t len;
417*07fb1d06SElliott Hughes if (checked) {
418*07fb1d06SElliott Hughes len = buffer[index++] & 0x0F;
419*07fb1d06SElliott Hughes } else {
420*07fb1d06SElliott Hughes len = buffer[index] >> 4;
421*07fb1d06SElliott Hughes }
422*07fb1d06SElliott Hughes checked = !checked;
423*07fb1d06SElliott Hughes code_lens_[kPermutations[idx]] = len;
424*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bw->WriteBits(3, len));
425*07fb1d06SElliott Hughes }
426*07fb1d06SElliott Hughes if (checked) {
427*07fb1d06SElliott Hughes index++;
428*07fb1d06SElliott Hughes }
429*07fb1d06SElliott Hughes for (; idx < 19; idx++) {
430*07fb1d06SElliott Hughes code_lens_[kPermutations[idx]] = 0;
431*07fb1d06SElliott Hughes }
432*07fb1d06SElliott Hughes
433*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(
434*07fb1d06SElliott Hughes BuildHuffmanReverseCodes(code_lens_, &code_rcodes_, &code_max_bits_));
435*07fb1d06SElliott Hughes
436*07fb1d06SElliott Hughes // Build literal/lengths and distance Huffman code length arrays.
437*07fb1d06SElliott Hughes auto bytes_available = length - index;
438*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(
439*07fb1d06SElliott Hughes BuildHuffmanCodeLengths(buffer + index, &bytes_available, bw,
440*07fb1d06SElliott Hughes num_lit_len + num_distance, &tmp_lens_));
441*07fb1d06SElliott Hughes index += bytes_available;
442*07fb1d06SElliott Hughes
443*07fb1d06SElliott Hughes lit_len_lens_.clear();
444*07fb1d06SElliott Hughes lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
445*07fb1d06SElliott Hughes tmp_lens_.begin() + num_lit_len);
446*07fb1d06SElliott Hughes
447*07fb1d06SElliott Hughes distance_lens_.clear();
448*07fb1d06SElliott Hughes distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
449*07fb1d06SElliott Hughes tmp_lens_.end());
450*07fb1d06SElliott Hughes
451*07fb1d06SElliott Hughes // Build literal/lengths Huffman reverse codes.
452*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
453*07fb1d06SElliott Hughes lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_));
454*07fb1d06SElliott Hughes
455*07fb1d06SElliott Hughes // Build distance Huffman reverse codes.
456*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
457*07fb1d06SElliott Hughes distance_lens_, &distance_rcodes_, &distance_max_bits_));
458*07fb1d06SElliott Hughes
459*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(length == index);
460*07fb1d06SElliott Hughes
461*07fb1d06SElliott Hughes return true;
462*07fb1d06SElliott Hughes }
463*07fb1d06SElliott Hughes
BuildHuffmanCodeLengths(const uint8_t * buffer,size_t * length,BitWriterInterface * bw,size_t num_codes,Buffer * lens)464*07fb1d06SElliott Hughes bool HuffmanTable::BuildHuffmanCodeLengths(const uint8_t* buffer,
465*07fb1d06SElliott Hughes size_t* length,
466*07fb1d06SElliott Hughes BitWriterInterface* bw,
467*07fb1d06SElliott Hughes size_t num_codes,
468*07fb1d06SElliott Hughes Buffer* lens) {
469*07fb1d06SElliott Hughes lens->clear();
470*07fb1d06SElliott Hughes uint16_t hcode;
471*07fb1d06SElliott Hughes size_t nbits;
472*07fb1d06SElliott Hughes size_t index = 0;
473*07fb1d06SElliott Hughes for (size_t idx = 0; idx < num_codes;) {
474*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(index < *length);
475*07fb1d06SElliott Hughes auto pcode = buffer[index++];
476*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(pcode <= 155);
477*07fb1d06SElliott Hughes
478*07fb1d06SElliott Hughes auto code = pcode < 16 ? pcode : pcode < 20 ? 16 : pcode < 28 ? 17 : 18;
479*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(CodeHuffman(code, &hcode, &nbits));
480*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bw->WriteBits(nbits, hcode));
481*07fb1d06SElliott Hughes if (code < 16) {
482*07fb1d06SElliott Hughes lens->push_back(code);
483*07fb1d06SElliott Hughes idx++;
484*07fb1d06SElliott Hughes } else {
485*07fb1d06SElliott Hughes size_t copy_num = 0;
486*07fb1d06SElliott Hughes uint8_t copy_val;
487*07fb1d06SElliott Hughes switch (code) {
488*07fb1d06SElliott Hughes case 16:
489*07fb1d06SElliott Hughes // Cannot repeat a non-existent last code if idx == 0.
490*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(idx != 0);
491*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bw->WriteBits(2, pcode - 16));
492*07fb1d06SElliott Hughes copy_num = 3 + pcode - 16;
493*07fb1d06SElliott Hughes copy_val = (*lens)[idx - 1];
494*07fb1d06SElliott Hughes break;
495*07fb1d06SElliott Hughes
496*07fb1d06SElliott Hughes case 17:
497*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bw->WriteBits(3, pcode - 20));
498*07fb1d06SElliott Hughes copy_num = 3 + pcode - 20;
499*07fb1d06SElliott Hughes copy_val = 0;
500*07fb1d06SElliott Hughes break;
501*07fb1d06SElliott Hughes
502*07fb1d06SElliott Hughes case 18:
503*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(bw->WriteBits(7, pcode - 28));
504*07fb1d06SElliott Hughes copy_num = 11 + pcode - 28;
505*07fb1d06SElliott Hughes copy_val = 0;
506*07fb1d06SElliott Hughes break;
507*07fb1d06SElliott Hughes
508*07fb1d06SElliott Hughes default:
509*07fb1d06SElliott Hughes break;
510*07fb1d06SElliott Hughes }
511*07fb1d06SElliott Hughes idx += copy_num;
512*07fb1d06SElliott Hughes while (copy_num--) {
513*07fb1d06SElliott Hughes lens->push_back(copy_val);
514*07fb1d06SElliott Hughes }
515*07fb1d06SElliott Hughes }
516*07fb1d06SElliott Hughes }
517*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(lens->size() == num_codes);
518*07fb1d06SElliott Hughes *length = index;
519*07fb1d06SElliott Hughes return true;
520*07fb1d06SElliott Hughes }
521*07fb1d06SElliott Hughes
BlockTypeToString(BlockType type)522*07fb1d06SElliott Hughes string BlockTypeToString(BlockType type) {
523*07fb1d06SElliott Hughes switch (type) {
524*07fb1d06SElliott Hughes case BlockType::kUncompressed:
525*07fb1d06SElliott Hughes return "Uncompressed";
526*07fb1d06SElliott Hughes
527*07fb1d06SElliott Hughes case BlockType::kFixed:
528*07fb1d06SElliott Hughes return "Fixed";
529*07fb1d06SElliott Hughes
530*07fb1d06SElliott Hughes case BlockType::kDynamic:
531*07fb1d06SElliott Hughes return "Dynamic";
532*07fb1d06SElliott Hughes
533*07fb1d06SElliott Hughes default:
534*07fb1d06SElliott Hughes return "Unknown";
535*07fb1d06SElliott Hughes }
536*07fb1d06SElliott Hughes }
537*07fb1d06SElliott Hughes
538*07fb1d06SElliott Hughes } // namespace puffin
539