xref: /aosp_15_r20/external/puffin/src/puffer.cc (revision 07fb1d065b7cfb4729786fadd42a612532d2f466)
1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "puffin/src/include/puffin/puffer.h"
6 
7 #include <memory>
8 #include <string>
9 #include <vector>
10 
11 #include "puffin/src/bit_reader.h"
12 #include "puffin/src/huffman_table.h"
13 #include "puffin/src/include/puffin/common.h"
14 #include "puffin/src/logging.h"
15 #include "puffin/src/puff_data.h"
16 #include "puffin/src/puff_writer.h"
17 
18 using std::string;
19 using std::vector;
20 
21 namespace puffin {
22 
Puffer(bool exclude_bad_distance_caches)23 Puffer::Puffer(bool exclude_bad_distance_caches)
24     : dyn_ht_(new HuffmanTable()),
25       fix_ht_(new HuffmanTable()),
26       exclude_bad_distance_caches_(exclude_bad_distance_caches) {}
27 
Puffer()28 Puffer::Puffer() : Puffer(false) {}
29 
~Puffer()30 Puffer::~Puffer() {}
31 
PuffDeflate(BitReaderInterface * br,PuffWriterInterface * pw,vector<BitExtent> * deflates) const32 bool Puffer::PuffDeflate(BitReaderInterface* br,
33                          PuffWriterInterface* pw,
34                          vector<BitExtent>* deflates) const {
35   PuffData pd;
36   HuffmanTable* cur_ht;
37   bool end_loop = false;
38   // No bits left to read, return. We try to cache at least eight bits because
39   // the minimum length of a deflate bit stream is 8: (fixed huffman table) 3
40   // bits header + 5 bits just one len/dist symbol.
41   while (!end_loop && br->CacheBits(8)) {
42     auto start_bit_offset = br->OffsetInBits();
43 
44     TEST_AND_RETURN_FALSE(br->CacheBits(3));
45     uint8_t final_bit = br->ReadBits(1);  // BFINAL
46     br->DropBits(1);
47     uint8_t type = br->ReadBits(2);  // BTYPE
48     br->DropBits(2);
49     DVLOG(2) << "Read block type: "
50              << BlockTypeToString(static_cast<BlockType>(type));
51 
52     // If it is the final block and we are just looking for deflate locations,
53     // we consider this the end of the search.
54     if (deflates != nullptr && final_bit) {
55       end_loop = true;
56     }
57 
58     // Header structure
59     // +-+-+-+-+-+-+-+-+
60     // |F| TP|   SKIP  |
61     // +-+-+-+-+-+-+-+-+
62     // F -> final_bit
63     // TP -> type
64     // SKIP -> skipped_bits (only in kUncompressed type)
65     auto block_header = (final_bit << 7) | (type << 5);
66     switch (static_cast<BlockType>(type)) {
67       case BlockType::kUncompressed: {
68         auto skipped_bits = br->ReadBoundaryBits();
69         br->SkipBoundaryBits();
70         TEST_AND_RETURN_FALSE(br->CacheBits(32));
71         auto len = br->ReadBits(16);  // LEN
72         br->DropBits(16);
73         auto nlen = br->ReadBits(16);  // NLEN
74         br->DropBits(16);
75 
76         if ((len ^ nlen) != 0xFFFF) {
77           LOG(ERROR) << "Length of uncompressed data is invalid;"
78                      << " LEN(" << len << ") NLEN(" << nlen << ")";
79           return false;
80         }
81 
82         // Put skipped bits into header.
83         block_header |= skipped_bits;
84 
85         // Insert block header.
86         pd.type = PuffData::Type::kBlockMetadata;
87         pd.block_metadata[0] = block_header;
88         pd.length = 1;
89         TEST_AND_RETURN_FALSE(pw->Insert(pd));
90 
91         // Insert all the raw literals.
92         pd.type = PuffData::Type::kLiterals;
93         pd.length = len;
94         TEST_AND_RETURN_FALSE(br->GetByteReaderFn(pd.length, &pd.read_fn));
95         TEST_AND_RETURN_FALSE(pw->Insert(pd));
96 
97         pd.type = PuffData::Type::kEndOfBlock;
98         TEST_AND_RETURN_FALSE(pw->Insert(pd));
99 
100         // There is no need to insert the location of uncompressed deflates
101         // because we do not want the uncompressed blocks when trying to find
102         // the bit-addressed location of deflates. They better be ignored.
103 
104         // continue the loop. Do not read any literal/length/distance.
105         continue;
106       }
107 
108       case BlockType::kFixed:
109         fix_ht_->BuildFixedHuffmanTable();
110         cur_ht = fix_ht_.get();
111         pd.type = PuffData::Type::kBlockMetadata;
112         pd.block_metadata[0] = block_header;
113         pd.length = 1;
114         TEST_AND_RETURN_FALSE(pw->Insert(pd));
115         break;
116 
117       case BlockType::kDynamic:
118         pd.type = PuffData::Type::kBlockMetadata;
119         pd.block_metadata[0] = block_header;
120         pd.length = sizeof(pd.block_metadata) - 1;
121         TEST_AND_RETURN_FALSE(dyn_ht_->BuildDynamicHuffmanTable(
122             br, &pd.block_metadata[1], &pd.length));
123         pd.length += 1;  // For the header.
124         TEST_AND_RETURN_FALSE(pw->Insert(pd));
125         cur_ht = dyn_ht_.get();
126         break;
127 
128       default:
129         LOG(ERROR) << "Invalid block compression type: "
130                    << static_cast<int>(type);
131         return false;
132     }
133 
134     // If true and the list of output |deflates| is non-null, the current
135     // deflate location will be added to that list.
136     bool include_deflate = true;
137 
138     while (true) {  // Breaks when the end of block is reached.
139       auto max_bits = cur_ht->LitLenMaxBits();
140       if (!br->CacheBits(max_bits)) {
141         // It could be the end of buffer and the bit length of the end_of_block
142         // symbol has less than maximum bit length of current Huffman table. So
143         // only asking for the size of end of block symbol (256).
144         TEST_AND_RETURN_FALSE(cur_ht->EndOfBlockBitLength(&max_bits));
145       }
146       TEST_AND_RETURN_FALSE(br->CacheBits(max_bits));
147       auto bits = br->ReadBits(max_bits);
148       uint16_t lit_len_alphabet;
149       size_t nbits;
150       TEST_AND_RETURN_FALSE(
151           cur_ht->LitLenAlphabet(bits, &lit_len_alphabet, &nbits));
152       br->DropBits(nbits);
153       if (lit_len_alphabet < 256) {
154         pd.type = PuffData::Type::kLiteral;
155         pd.byte = lit_len_alphabet;
156         TEST_AND_RETURN_FALSE(pw->Insert(pd));
157 
158       } else if (256 == lit_len_alphabet) {
159         pd.type = PuffData::Type::kEndOfBlock;
160         TEST_AND_RETURN_FALSE(pw->Insert(pd));
161         if (deflates != nullptr && include_deflate) {
162           deflates->emplace_back(start_bit_offset,
163                                  br->OffsetInBits() - start_bit_offset);
164         }
165         break;  // Breaks the loop.
166       } else {
167         TEST_AND_RETURN_FALSE(lit_len_alphabet <= 285);
168         // Reading length.
169         auto len_code_start = lit_len_alphabet - 257;
170         auto extra_bits_len = kLengthExtraBits[len_code_start];
171         uint16_t extra_bits_value = 0;
172         if (extra_bits_len) {
173           TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
174           extra_bits_value = br->ReadBits(extra_bits_len);
175           br->DropBits(extra_bits_len);
176         }
177         auto length = kLengthBases[len_code_start] + extra_bits_value;
178 
179         auto bits_to_cache = cur_ht->DistanceMaxBits();
180         if (!br->CacheBits(bits_to_cache)) {
181           // This is a corner case that is present in the older versions of the
182           // puffin. So we need to catch it and correctly discard this kind of
183           // deflate when we encounter it. See crbug.com/915559 for more info.
184           bits_to_cache = br->BitsRemaining();
185           TEST_AND_RETURN_FALSE(br->CacheBits(bits_to_cache));
186           if (exclude_bad_distance_caches_) {
187             include_deflate = false;
188           }
189           LOG(WARNING) << "A rare condition that older puffin clients fail to"
190                        << " recognize happened. Nothing to worry about."
191                        << " See crbug.com/915559";
192         }
193         auto bits = br->ReadBits(bits_to_cache);
194         uint16_t distance_alphabet;
195         size_t nbits;
196         TEST_AND_RETURN_FALSE(
197             cur_ht->DistanceAlphabet(bits, &distance_alphabet, &nbits));
198         br->DropBits(nbits);
199 
200         // Reading distance.
201         extra_bits_len = kDistanceExtraBits[distance_alphabet];
202         extra_bits_value = 0;
203         if (extra_bits_len) {
204           TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
205           extra_bits_value = br->ReadBits(extra_bits_len);
206           br->DropBits(extra_bits_len);
207         }
208 
209         pd.type = PuffData::Type::kLenDist;
210         pd.length = length;
211         pd.distance = kDistanceBases[distance_alphabet] + extra_bits_value;
212         TEST_AND_RETURN_FALSE(pw->Insert(pd));
213       }
214     }
215   }
216   TEST_AND_RETURN_FALSE(pw->Flush());
217   return true;
218 }
219 
220 }  // namespace puffin
221