xref: /aosp_15_r20/external/puffin/src/puffer.cc (revision 07fb1d065b7cfb4729786fadd42a612532d2f466)
1*07fb1d06SElliott Hughes // Copyright 2017 The ChromiumOS Authors
2*07fb1d06SElliott Hughes // Use of this source code is governed by a BSD-style license that can be
3*07fb1d06SElliott Hughes // found in the LICENSE file.
4*07fb1d06SElliott Hughes 
5*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/puffer.h"
6*07fb1d06SElliott Hughes 
7*07fb1d06SElliott Hughes #include <memory>
8*07fb1d06SElliott Hughes #include <string>
9*07fb1d06SElliott Hughes #include <vector>
10*07fb1d06SElliott Hughes 
11*07fb1d06SElliott Hughes #include "puffin/src/bit_reader.h"
12*07fb1d06SElliott Hughes #include "puffin/src/huffman_table.h"
13*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/common.h"
14*07fb1d06SElliott Hughes #include "puffin/src/logging.h"
15*07fb1d06SElliott Hughes #include "puffin/src/puff_data.h"
16*07fb1d06SElliott Hughes #include "puffin/src/puff_writer.h"
17*07fb1d06SElliott Hughes 
18*07fb1d06SElliott Hughes using std::string;
19*07fb1d06SElliott Hughes using std::vector;
20*07fb1d06SElliott Hughes 
21*07fb1d06SElliott Hughes namespace puffin {
22*07fb1d06SElliott Hughes 
Puffer(bool exclude_bad_distance_caches)23*07fb1d06SElliott Hughes Puffer::Puffer(bool exclude_bad_distance_caches)
24*07fb1d06SElliott Hughes     : dyn_ht_(new HuffmanTable()),
25*07fb1d06SElliott Hughes       fix_ht_(new HuffmanTable()),
26*07fb1d06SElliott Hughes       exclude_bad_distance_caches_(exclude_bad_distance_caches) {}
27*07fb1d06SElliott Hughes 
Puffer()28*07fb1d06SElliott Hughes Puffer::Puffer() : Puffer(false) {}
29*07fb1d06SElliott Hughes 
~Puffer()30*07fb1d06SElliott Hughes Puffer::~Puffer() {}
31*07fb1d06SElliott Hughes 
PuffDeflate(BitReaderInterface * br,PuffWriterInterface * pw,vector<BitExtent> * deflates) const32*07fb1d06SElliott Hughes bool Puffer::PuffDeflate(BitReaderInterface* br,
33*07fb1d06SElliott Hughes                          PuffWriterInterface* pw,
34*07fb1d06SElliott Hughes                          vector<BitExtent>* deflates) const {
35*07fb1d06SElliott Hughes   PuffData pd;
36*07fb1d06SElliott Hughes   HuffmanTable* cur_ht;
37*07fb1d06SElliott Hughes   bool end_loop = false;
38*07fb1d06SElliott Hughes   // No bits left to read, return. We try to cache at least eight bits because
39*07fb1d06SElliott Hughes   // the minimum length of a deflate bit stream is 8: (fixed huffman table) 3
40*07fb1d06SElliott Hughes   // bits header + 5 bits just one len/dist symbol.
41*07fb1d06SElliott Hughes   while (!end_loop && br->CacheBits(8)) {
42*07fb1d06SElliott Hughes     auto start_bit_offset = br->OffsetInBits();
43*07fb1d06SElliott Hughes 
44*07fb1d06SElliott Hughes     TEST_AND_RETURN_FALSE(br->CacheBits(3));
45*07fb1d06SElliott Hughes     uint8_t final_bit = br->ReadBits(1);  // BFINAL
46*07fb1d06SElliott Hughes     br->DropBits(1);
47*07fb1d06SElliott Hughes     uint8_t type = br->ReadBits(2);  // BTYPE
48*07fb1d06SElliott Hughes     br->DropBits(2);
49*07fb1d06SElliott Hughes     DVLOG(2) << "Read block type: "
50*07fb1d06SElliott Hughes              << BlockTypeToString(static_cast<BlockType>(type));
51*07fb1d06SElliott Hughes 
52*07fb1d06SElliott Hughes     // If it is the final block and we are just looking for deflate locations,
53*07fb1d06SElliott Hughes     // we consider this the end of the search.
54*07fb1d06SElliott Hughes     if (deflates != nullptr && final_bit) {
55*07fb1d06SElliott Hughes       end_loop = true;
56*07fb1d06SElliott Hughes     }
57*07fb1d06SElliott Hughes 
58*07fb1d06SElliott Hughes     // Header structure
59*07fb1d06SElliott Hughes     // +-+-+-+-+-+-+-+-+
60*07fb1d06SElliott Hughes     // |F| TP|   SKIP  |
61*07fb1d06SElliott Hughes     // +-+-+-+-+-+-+-+-+
62*07fb1d06SElliott Hughes     // F -> final_bit
63*07fb1d06SElliott Hughes     // TP -> type
64*07fb1d06SElliott Hughes     // SKIP -> skipped_bits (only in kUncompressed type)
65*07fb1d06SElliott Hughes     auto block_header = (final_bit << 7) | (type << 5);
66*07fb1d06SElliott Hughes     switch (static_cast<BlockType>(type)) {
67*07fb1d06SElliott Hughes       case BlockType::kUncompressed: {
68*07fb1d06SElliott Hughes         auto skipped_bits = br->ReadBoundaryBits();
69*07fb1d06SElliott Hughes         br->SkipBoundaryBits();
70*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(br->CacheBits(32));
71*07fb1d06SElliott Hughes         auto len = br->ReadBits(16);  // LEN
72*07fb1d06SElliott Hughes         br->DropBits(16);
73*07fb1d06SElliott Hughes         auto nlen = br->ReadBits(16);  // NLEN
74*07fb1d06SElliott Hughes         br->DropBits(16);
75*07fb1d06SElliott Hughes 
76*07fb1d06SElliott Hughes         if ((len ^ nlen) != 0xFFFF) {
77*07fb1d06SElliott Hughes           LOG(ERROR) << "Length of uncompressed data is invalid;"
78*07fb1d06SElliott Hughes                      << " LEN(" << len << ") NLEN(" << nlen << ")";
79*07fb1d06SElliott Hughes           return false;
80*07fb1d06SElliott Hughes         }
81*07fb1d06SElliott Hughes 
82*07fb1d06SElliott Hughes         // Put skipped bits into header.
83*07fb1d06SElliott Hughes         block_header |= skipped_bits;
84*07fb1d06SElliott Hughes 
85*07fb1d06SElliott Hughes         // Insert block header.
86*07fb1d06SElliott Hughes         pd.type = PuffData::Type::kBlockMetadata;
87*07fb1d06SElliott Hughes         pd.block_metadata[0] = block_header;
88*07fb1d06SElliott Hughes         pd.length = 1;
89*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(pw->Insert(pd));
90*07fb1d06SElliott Hughes 
91*07fb1d06SElliott Hughes         // Insert all the raw literals.
92*07fb1d06SElliott Hughes         pd.type = PuffData::Type::kLiterals;
93*07fb1d06SElliott Hughes         pd.length = len;
94*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(br->GetByteReaderFn(pd.length, &pd.read_fn));
95*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(pw->Insert(pd));
96*07fb1d06SElliott Hughes 
97*07fb1d06SElliott Hughes         pd.type = PuffData::Type::kEndOfBlock;
98*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(pw->Insert(pd));
99*07fb1d06SElliott Hughes 
100*07fb1d06SElliott Hughes         // There is no need to insert the location of uncompressed deflates
101*07fb1d06SElliott Hughes         // because we do not want the uncompressed blocks when trying to find
102*07fb1d06SElliott Hughes         // the bit-addressed location of deflates. They better be ignored.
103*07fb1d06SElliott Hughes 
104*07fb1d06SElliott Hughes         // continue the loop. Do not read any literal/length/distance.
105*07fb1d06SElliott Hughes         continue;
106*07fb1d06SElliott Hughes       }
107*07fb1d06SElliott Hughes 
108*07fb1d06SElliott Hughes       case BlockType::kFixed:
109*07fb1d06SElliott Hughes         fix_ht_->BuildFixedHuffmanTable();
110*07fb1d06SElliott Hughes         cur_ht = fix_ht_.get();
111*07fb1d06SElliott Hughes         pd.type = PuffData::Type::kBlockMetadata;
112*07fb1d06SElliott Hughes         pd.block_metadata[0] = block_header;
113*07fb1d06SElliott Hughes         pd.length = 1;
114*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(pw->Insert(pd));
115*07fb1d06SElliott Hughes         break;
116*07fb1d06SElliott Hughes 
117*07fb1d06SElliott Hughes       case BlockType::kDynamic:
118*07fb1d06SElliott Hughes         pd.type = PuffData::Type::kBlockMetadata;
119*07fb1d06SElliott Hughes         pd.block_metadata[0] = block_header;
120*07fb1d06SElliott Hughes         pd.length = sizeof(pd.block_metadata) - 1;
121*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(dyn_ht_->BuildDynamicHuffmanTable(
122*07fb1d06SElliott Hughes             br, &pd.block_metadata[1], &pd.length));
123*07fb1d06SElliott Hughes         pd.length += 1;  // For the header.
124*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(pw->Insert(pd));
125*07fb1d06SElliott Hughes         cur_ht = dyn_ht_.get();
126*07fb1d06SElliott Hughes         break;
127*07fb1d06SElliott Hughes 
128*07fb1d06SElliott Hughes       default:
129*07fb1d06SElliott Hughes         LOG(ERROR) << "Invalid block compression type: "
130*07fb1d06SElliott Hughes                    << static_cast<int>(type);
131*07fb1d06SElliott Hughes         return false;
132*07fb1d06SElliott Hughes     }
133*07fb1d06SElliott Hughes 
134*07fb1d06SElliott Hughes     // If true and the list of output |deflates| is non-null, the current
135*07fb1d06SElliott Hughes     // deflate location will be added to that list.
136*07fb1d06SElliott Hughes     bool include_deflate = true;
137*07fb1d06SElliott Hughes 
138*07fb1d06SElliott Hughes     while (true) {  // Breaks when the end of block is reached.
139*07fb1d06SElliott Hughes       auto max_bits = cur_ht->LitLenMaxBits();
140*07fb1d06SElliott Hughes       if (!br->CacheBits(max_bits)) {
141*07fb1d06SElliott Hughes         // It could be the end of buffer and the bit length of the end_of_block
142*07fb1d06SElliott Hughes         // symbol has less than maximum bit length of current Huffman table. So
143*07fb1d06SElliott Hughes         // only asking for the size of end of block symbol (256).
144*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(cur_ht->EndOfBlockBitLength(&max_bits));
145*07fb1d06SElliott Hughes       }
146*07fb1d06SElliott Hughes       TEST_AND_RETURN_FALSE(br->CacheBits(max_bits));
147*07fb1d06SElliott Hughes       auto bits = br->ReadBits(max_bits);
148*07fb1d06SElliott Hughes       uint16_t lit_len_alphabet;
149*07fb1d06SElliott Hughes       size_t nbits;
150*07fb1d06SElliott Hughes       TEST_AND_RETURN_FALSE(
151*07fb1d06SElliott Hughes           cur_ht->LitLenAlphabet(bits, &lit_len_alphabet, &nbits));
152*07fb1d06SElliott Hughes       br->DropBits(nbits);
153*07fb1d06SElliott Hughes       if (lit_len_alphabet < 256) {
154*07fb1d06SElliott Hughes         pd.type = PuffData::Type::kLiteral;
155*07fb1d06SElliott Hughes         pd.byte = lit_len_alphabet;
156*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(pw->Insert(pd));
157*07fb1d06SElliott Hughes 
158*07fb1d06SElliott Hughes       } else if (256 == lit_len_alphabet) {
159*07fb1d06SElliott Hughes         pd.type = PuffData::Type::kEndOfBlock;
160*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(pw->Insert(pd));
161*07fb1d06SElliott Hughes         if (deflates != nullptr && include_deflate) {
162*07fb1d06SElliott Hughes           deflates->emplace_back(start_bit_offset,
163*07fb1d06SElliott Hughes                                  br->OffsetInBits() - start_bit_offset);
164*07fb1d06SElliott Hughes         }
165*07fb1d06SElliott Hughes         break;  // Breaks the loop.
166*07fb1d06SElliott Hughes       } else {
167*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(lit_len_alphabet <= 285);
168*07fb1d06SElliott Hughes         // Reading length.
169*07fb1d06SElliott Hughes         auto len_code_start = lit_len_alphabet - 257;
170*07fb1d06SElliott Hughes         auto extra_bits_len = kLengthExtraBits[len_code_start];
171*07fb1d06SElliott Hughes         uint16_t extra_bits_value = 0;
172*07fb1d06SElliott Hughes         if (extra_bits_len) {
173*07fb1d06SElliott Hughes           TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
174*07fb1d06SElliott Hughes           extra_bits_value = br->ReadBits(extra_bits_len);
175*07fb1d06SElliott Hughes           br->DropBits(extra_bits_len);
176*07fb1d06SElliott Hughes         }
177*07fb1d06SElliott Hughes         auto length = kLengthBases[len_code_start] + extra_bits_value;
178*07fb1d06SElliott Hughes 
179*07fb1d06SElliott Hughes         auto bits_to_cache = cur_ht->DistanceMaxBits();
180*07fb1d06SElliott Hughes         if (!br->CacheBits(bits_to_cache)) {
181*07fb1d06SElliott Hughes           // This is a corner case that is present in the older versions of the
182*07fb1d06SElliott Hughes           // puffin. So we need to catch it and correctly discard this kind of
183*07fb1d06SElliott Hughes           // deflate when we encounter it. See crbug.com/915559 for more info.
184*07fb1d06SElliott Hughes           bits_to_cache = br->BitsRemaining();
185*07fb1d06SElliott Hughes           TEST_AND_RETURN_FALSE(br->CacheBits(bits_to_cache));
186*07fb1d06SElliott Hughes           if (exclude_bad_distance_caches_) {
187*07fb1d06SElliott Hughes             include_deflate = false;
188*07fb1d06SElliott Hughes           }
189*07fb1d06SElliott Hughes           LOG(WARNING) << "A rare condition that older puffin clients fail to"
190*07fb1d06SElliott Hughes                        << " recognize happened. Nothing to worry about."
191*07fb1d06SElliott Hughes                        << " See crbug.com/915559";
192*07fb1d06SElliott Hughes         }
193*07fb1d06SElliott Hughes         auto bits = br->ReadBits(bits_to_cache);
194*07fb1d06SElliott Hughes         uint16_t distance_alphabet;
195*07fb1d06SElliott Hughes         size_t nbits;
196*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(
197*07fb1d06SElliott Hughes             cur_ht->DistanceAlphabet(bits, &distance_alphabet, &nbits));
198*07fb1d06SElliott Hughes         br->DropBits(nbits);
199*07fb1d06SElliott Hughes 
200*07fb1d06SElliott Hughes         // Reading distance.
201*07fb1d06SElliott Hughes         extra_bits_len = kDistanceExtraBits[distance_alphabet];
202*07fb1d06SElliott Hughes         extra_bits_value = 0;
203*07fb1d06SElliott Hughes         if (extra_bits_len) {
204*07fb1d06SElliott Hughes           TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
205*07fb1d06SElliott Hughes           extra_bits_value = br->ReadBits(extra_bits_len);
206*07fb1d06SElliott Hughes           br->DropBits(extra_bits_len);
207*07fb1d06SElliott Hughes         }
208*07fb1d06SElliott Hughes 
209*07fb1d06SElliott Hughes         pd.type = PuffData::Type::kLenDist;
210*07fb1d06SElliott Hughes         pd.length = length;
211*07fb1d06SElliott Hughes         pd.distance = kDistanceBases[distance_alphabet] + extra_bits_value;
212*07fb1d06SElliott Hughes         TEST_AND_RETURN_FALSE(pw->Insert(pd));
213*07fb1d06SElliott Hughes       }
214*07fb1d06SElliott Hughes     }
215*07fb1d06SElliott Hughes   }
216*07fb1d06SElliott Hughes   TEST_AND_RETURN_FALSE(pw->Flush());
217*07fb1d06SElliott Hughes   return true;
218*07fb1d06SElliott Hughes }
219*07fb1d06SElliott Hughes 
220*07fb1d06SElliott Hughes }  // namespace puffin
221