xref: /aosp_15_r20/external/cronet/net/extras/preload_data/decoder.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2018 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker #include "net/extras/preload_data/decoder.h"
6*6777b538SAndroid Build Coastguard Worker #include "base/check_op.h"
7*6777b538SAndroid Build Coastguard Worker #include "base/notreached.h"
8*6777b538SAndroid Build Coastguard Worker 
9*6777b538SAndroid Build Coastguard Worker namespace net::extras {
10*6777b538SAndroid Build Coastguard Worker 
BitReader(const uint8_t * bytes,size_t num_bits)11*6777b538SAndroid Build Coastguard Worker PreloadDecoder::BitReader::BitReader(const uint8_t* bytes, size_t num_bits)
12*6777b538SAndroid Build Coastguard Worker     : bytes_(bytes), num_bits_(num_bits), num_bytes_((num_bits + 7) / 8) {}
13*6777b538SAndroid Build Coastguard Worker 
14*6777b538SAndroid Build Coastguard Worker // Next sets |*out| to the next bit from the input. It returns false if no
15*6777b538SAndroid Build Coastguard Worker // more bits are available or true otherwise.
Next(bool * out)16*6777b538SAndroid Build Coastguard Worker bool PreloadDecoder::BitReader::Next(bool* out) {
17*6777b538SAndroid Build Coastguard Worker   if (num_bits_used_ == 8) {
18*6777b538SAndroid Build Coastguard Worker     if (current_byte_index_ >= num_bytes_) {
19*6777b538SAndroid Build Coastguard Worker       return false;
20*6777b538SAndroid Build Coastguard Worker     }
21*6777b538SAndroid Build Coastguard Worker     current_byte_ = bytes_[current_byte_index_++];
22*6777b538SAndroid Build Coastguard Worker     num_bits_used_ = 0;
23*6777b538SAndroid Build Coastguard Worker   }
24*6777b538SAndroid Build Coastguard Worker 
25*6777b538SAndroid Build Coastguard Worker   *out = 1 & (current_byte_ >> (7 - num_bits_used_));
26*6777b538SAndroid Build Coastguard Worker   num_bits_used_++;
27*6777b538SAndroid Build Coastguard Worker   return true;
28*6777b538SAndroid Build Coastguard Worker }
29*6777b538SAndroid Build Coastguard Worker 
30*6777b538SAndroid Build Coastguard Worker // Read sets the |num_bits| least-significant bits of |*out| to the value of
31*6777b538SAndroid Build Coastguard Worker // the next |num_bits| bits from the input. It returns false if there are
32*6777b538SAndroid Build Coastguard Worker // insufficient bits in the input or true otherwise.
Read(unsigned num_bits,uint32_t * out)33*6777b538SAndroid Build Coastguard Worker bool PreloadDecoder::BitReader::Read(unsigned num_bits, uint32_t* out) {
34*6777b538SAndroid Build Coastguard Worker   DCHECK_LE(num_bits, 32u);
35*6777b538SAndroid Build Coastguard Worker 
36*6777b538SAndroid Build Coastguard Worker   uint32_t ret = 0;
37*6777b538SAndroid Build Coastguard Worker   for (unsigned i = 0; i < num_bits; ++i) {
38*6777b538SAndroid Build Coastguard Worker     bool bit;
39*6777b538SAndroid Build Coastguard Worker     if (!Next(&bit)) {
40*6777b538SAndroid Build Coastguard Worker       return false;
41*6777b538SAndroid Build Coastguard Worker     }
42*6777b538SAndroid Build Coastguard Worker     ret |= static_cast<uint32_t>(bit) << (num_bits - 1 - i);
43*6777b538SAndroid Build Coastguard Worker   }
44*6777b538SAndroid Build Coastguard Worker 
45*6777b538SAndroid Build Coastguard Worker   *out = ret;
46*6777b538SAndroid Build Coastguard Worker   return true;
47*6777b538SAndroid Build Coastguard Worker }
48*6777b538SAndroid Build Coastguard Worker 
49*6777b538SAndroid Build Coastguard Worker namespace {
50*6777b538SAndroid Build Coastguard Worker 
51*6777b538SAndroid Build Coastguard Worker // Reads one bit from |reader|, shifts |*bits| left by 1, and adds the read bit
52*6777b538SAndroid Build Coastguard Worker // to the end of |*bits|.
ReadBit(PreloadDecoder::BitReader * reader,uint8_t * bits)53*6777b538SAndroid Build Coastguard Worker bool ReadBit(PreloadDecoder::BitReader* reader, uint8_t* bits) {
54*6777b538SAndroid Build Coastguard Worker   bool bit;
55*6777b538SAndroid Build Coastguard Worker   if (!reader->Next(&bit)) {
56*6777b538SAndroid Build Coastguard Worker     return false;
57*6777b538SAndroid Build Coastguard Worker   }
58*6777b538SAndroid Build Coastguard Worker   *bits <<= 1;
59*6777b538SAndroid Build Coastguard Worker   if (bit) {
60*6777b538SAndroid Build Coastguard Worker     (*bits)++;
61*6777b538SAndroid Build Coastguard Worker   }
62*6777b538SAndroid Build Coastguard Worker   return true;
63*6777b538SAndroid Build Coastguard Worker }
64*6777b538SAndroid Build Coastguard Worker 
65*6777b538SAndroid Build Coastguard Worker }  // namespace
66*6777b538SAndroid Build Coastguard Worker 
DecodeSize(size_t * out)67*6777b538SAndroid Build Coastguard Worker bool PreloadDecoder::BitReader::DecodeSize(size_t* out) {
68*6777b538SAndroid Build Coastguard Worker   uint8_t bits = 0;
69*6777b538SAndroid Build Coastguard Worker   if (!ReadBit(this, &bits) || !ReadBit(this, &bits)) {
70*6777b538SAndroid Build Coastguard Worker     return false;
71*6777b538SAndroid Build Coastguard Worker   }
72*6777b538SAndroid Build Coastguard Worker   if (bits == 0) {
73*6777b538SAndroid Build Coastguard Worker     *out = 0;
74*6777b538SAndroid Build Coastguard Worker     return true;
75*6777b538SAndroid Build Coastguard Worker   }
76*6777b538SAndroid Build Coastguard Worker   if (!ReadBit(this, &bits)) {
77*6777b538SAndroid Build Coastguard Worker     return false;
78*6777b538SAndroid Build Coastguard Worker   }
79*6777b538SAndroid Build Coastguard Worker   // We've parsed 3 bits so far. Check all possible combinations:
80*6777b538SAndroid Build Coastguard Worker   bool is_even;
81*6777b538SAndroid Build Coastguard Worker   switch (bits) {
82*6777b538SAndroid Build Coastguard Worker     case 0b000:
83*6777b538SAndroid Build Coastguard Worker     case 0b001:
84*6777b538SAndroid Build Coastguard Worker       // This should have been handled in the if (bits == 0) check.
85*6777b538SAndroid Build Coastguard Worker       NOTREACHED();
86*6777b538SAndroid Build Coastguard Worker       return false;
87*6777b538SAndroid Build Coastguard Worker     case 0b010:
88*6777b538SAndroid Build Coastguard Worker       // A specialization of the 0b01 prefix for unary-like even numbers.
89*6777b538SAndroid Build Coastguard Worker       *out = 4;
90*6777b538SAndroid Build Coastguard Worker       return true;
91*6777b538SAndroid Build Coastguard Worker     case 0b011:
92*6777b538SAndroid Build Coastguard Worker       // This will be handled with the prefixes for unary-like encoding below.
93*6777b538SAndroid Build Coastguard Worker       is_even = true;
94*6777b538SAndroid Build Coastguard Worker       break;
95*6777b538SAndroid Build Coastguard Worker     case 0b100:
96*6777b538SAndroid Build Coastguard Worker       *out = 1;
97*6777b538SAndroid Build Coastguard Worker       return true;
98*6777b538SAndroid Build Coastguard Worker     case 0b101:
99*6777b538SAndroid Build Coastguard Worker       *out = 2;
100*6777b538SAndroid Build Coastguard Worker       return true;
101*6777b538SAndroid Build Coastguard Worker     case 0b110:
102*6777b538SAndroid Build Coastguard Worker       *out = 3;
103*6777b538SAndroid Build Coastguard Worker       return true;
104*6777b538SAndroid Build Coastguard Worker     case 0b111:
105*6777b538SAndroid Build Coastguard Worker       // This will be handled with the prefixes for unary-like encoding below.
106*6777b538SAndroid Build Coastguard Worker       is_even = false;
107*6777b538SAndroid Build Coastguard Worker       break;
108*6777b538SAndroid Build Coastguard Worker     default:
109*6777b538SAndroid Build Coastguard Worker       // All cases should be covered above.
110*6777b538SAndroid Build Coastguard Worker       NOTREACHED();
111*6777b538SAndroid Build Coastguard Worker       return false;
112*6777b538SAndroid Build Coastguard Worker   }
113*6777b538SAndroid Build Coastguard Worker   size_t bit_length = 3;
114*6777b538SAndroid Build Coastguard Worker   while (true) {
115*6777b538SAndroid Build Coastguard Worker     bit_length++;
116*6777b538SAndroid Build Coastguard Worker     bool bit;
117*6777b538SAndroid Build Coastguard Worker     if (!Next(&bit)) {
118*6777b538SAndroid Build Coastguard Worker       return false;
119*6777b538SAndroid Build Coastguard Worker     }
120*6777b538SAndroid Build Coastguard Worker     if (!bit) {
121*6777b538SAndroid Build Coastguard Worker       break;
122*6777b538SAndroid Build Coastguard Worker     }
123*6777b538SAndroid Build Coastguard Worker   }
124*6777b538SAndroid Build Coastguard Worker   size_t ret = (bit_length - 2) * 2;
125*6777b538SAndroid Build Coastguard Worker   if (!is_even) {
126*6777b538SAndroid Build Coastguard Worker     ret--;
127*6777b538SAndroid Build Coastguard Worker   }
128*6777b538SAndroid Build Coastguard Worker   *out = ret;
129*6777b538SAndroid Build Coastguard Worker   return true;
130*6777b538SAndroid Build Coastguard Worker }
131*6777b538SAndroid Build Coastguard Worker 
132*6777b538SAndroid Build Coastguard Worker // Seek sets the current offest in the input to bit number |offset|. It
133*6777b538SAndroid Build Coastguard Worker // returns true if |offset| is within the range of the input and false
134*6777b538SAndroid Build Coastguard Worker // otherwise.
Seek(size_t offset)135*6777b538SAndroid Build Coastguard Worker bool PreloadDecoder::BitReader::Seek(size_t offset) {
136*6777b538SAndroid Build Coastguard Worker   if (offset >= num_bits_) {
137*6777b538SAndroid Build Coastguard Worker     return false;
138*6777b538SAndroid Build Coastguard Worker   }
139*6777b538SAndroid Build Coastguard Worker   current_byte_index_ = offset / 8;
140*6777b538SAndroid Build Coastguard Worker   current_byte_ = bytes_[current_byte_index_++];
141*6777b538SAndroid Build Coastguard Worker   num_bits_used_ = offset % 8;
142*6777b538SAndroid Build Coastguard Worker   return true;
143*6777b538SAndroid Build Coastguard Worker }
144*6777b538SAndroid Build Coastguard Worker 
HuffmanDecoder(const uint8_t * tree,size_t tree_bytes)145*6777b538SAndroid Build Coastguard Worker PreloadDecoder::HuffmanDecoder::HuffmanDecoder(const uint8_t* tree,
146*6777b538SAndroid Build Coastguard Worker                                                size_t tree_bytes)
147*6777b538SAndroid Build Coastguard Worker     : tree_(tree), tree_bytes_(tree_bytes) {}
148*6777b538SAndroid Build Coastguard Worker 
Decode(PreloadDecoder::BitReader * reader,char * out) const149*6777b538SAndroid Build Coastguard Worker bool PreloadDecoder::HuffmanDecoder::Decode(PreloadDecoder::BitReader* reader,
150*6777b538SAndroid Build Coastguard Worker                                             char* out) const {
151*6777b538SAndroid Build Coastguard Worker   const uint8_t* current = &tree_[tree_bytes_ - 2];
152*6777b538SAndroid Build Coastguard Worker 
153*6777b538SAndroid Build Coastguard Worker   for (;;) {
154*6777b538SAndroid Build Coastguard Worker     bool bit;
155*6777b538SAndroid Build Coastguard Worker     if (!reader->Next(&bit)) {
156*6777b538SAndroid Build Coastguard Worker       return false;
157*6777b538SAndroid Build Coastguard Worker     }
158*6777b538SAndroid Build Coastguard Worker 
159*6777b538SAndroid Build Coastguard Worker     uint8_t b = current[bit];
160*6777b538SAndroid Build Coastguard Worker     if (b & 0x80) {
161*6777b538SAndroid Build Coastguard Worker       *out = static_cast<char>(b & 0x7f);
162*6777b538SAndroid Build Coastguard Worker       return true;
163*6777b538SAndroid Build Coastguard Worker     }
164*6777b538SAndroid Build Coastguard Worker 
165*6777b538SAndroid Build Coastguard Worker     unsigned offset = static_cast<unsigned>(b) * 2;
166*6777b538SAndroid Build Coastguard Worker     DCHECK_LT(offset, tree_bytes_);
167*6777b538SAndroid Build Coastguard Worker     if (offset >= tree_bytes_) {
168*6777b538SAndroid Build Coastguard Worker       return false;
169*6777b538SAndroid Build Coastguard Worker     }
170*6777b538SAndroid Build Coastguard Worker 
171*6777b538SAndroid Build Coastguard Worker     current = &tree_[offset];
172*6777b538SAndroid Build Coastguard Worker   }
173*6777b538SAndroid Build Coastguard Worker }
174*6777b538SAndroid Build Coastguard Worker 
PreloadDecoder(const uint8_t * huffman_tree,size_t huffman_tree_size,const uint8_t * trie,size_t trie_bits,size_t trie_root_position)175*6777b538SAndroid Build Coastguard Worker PreloadDecoder::PreloadDecoder(const uint8_t* huffman_tree,
176*6777b538SAndroid Build Coastguard Worker                                size_t huffman_tree_size,
177*6777b538SAndroid Build Coastguard Worker                                const uint8_t* trie,
178*6777b538SAndroid Build Coastguard Worker                                size_t trie_bits,
179*6777b538SAndroid Build Coastguard Worker                                size_t trie_root_position)
180*6777b538SAndroid Build Coastguard Worker     : huffman_decoder_(huffman_tree, huffman_tree_size),
181*6777b538SAndroid Build Coastguard Worker       bit_reader_(trie, trie_bits),
182*6777b538SAndroid Build Coastguard Worker       trie_root_position_(trie_root_position) {}
183*6777b538SAndroid Build Coastguard Worker 
184*6777b538SAndroid Build Coastguard Worker PreloadDecoder::~PreloadDecoder() = default;
185*6777b538SAndroid Build Coastguard Worker 
Decode(const std::string & search,bool * out_found)186*6777b538SAndroid Build Coastguard Worker bool PreloadDecoder::Decode(const std::string& search, bool* out_found) {
187*6777b538SAndroid Build Coastguard Worker   size_t bit_offset = trie_root_position_;
188*6777b538SAndroid Build Coastguard Worker   *out_found = false;
189*6777b538SAndroid Build Coastguard Worker 
190*6777b538SAndroid Build Coastguard Worker   // current_search_offset contains one more than the index of the current
191*6777b538SAndroid Build Coastguard Worker   // character in the search keyword that is being considered. It's one greater
192*6777b538SAndroid Build Coastguard Worker   // so that we can represent the position just before the beginning (with
193*6777b538SAndroid Build Coastguard Worker   // zero).
194*6777b538SAndroid Build Coastguard Worker   size_t current_search_offset = search.size();
195*6777b538SAndroid Build Coastguard Worker 
196*6777b538SAndroid Build Coastguard Worker   for (;;) {
197*6777b538SAndroid Build Coastguard Worker     // Seek to the desired location.
198*6777b538SAndroid Build Coastguard Worker     if (!bit_reader_.Seek(bit_offset)) {
199*6777b538SAndroid Build Coastguard Worker       return false;
200*6777b538SAndroid Build Coastguard Worker     }
201*6777b538SAndroid Build Coastguard Worker 
202*6777b538SAndroid Build Coastguard Worker     // Decode the length of the common prefix.
203*6777b538SAndroid Build Coastguard Worker     size_t prefix_length;
204*6777b538SAndroid Build Coastguard Worker     if (!bit_reader_.DecodeSize(&prefix_length)) {
205*6777b538SAndroid Build Coastguard Worker       return false;
206*6777b538SAndroid Build Coastguard Worker     }
207*6777b538SAndroid Build Coastguard Worker 
208*6777b538SAndroid Build Coastguard Worker     // Match each character in the prefix.
209*6777b538SAndroid Build Coastguard Worker     for (size_t i = 0; i < prefix_length; ++i) {
210*6777b538SAndroid Build Coastguard Worker       if (current_search_offset == 0) {
211*6777b538SAndroid Build Coastguard Worker         // We can't match the terminator with a prefix string.
212*6777b538SAndroid Build Coastguard Worker         return true;
213*6777b538SAndroid Build Coastguard Worker       }
214*6777b538SAndroid Build Coastguard Worker 
215*6777b538SAndroid Build Coastguard Worker       char c;
216*6777b538SAndroid Build Coastguard Worker       if (!huffman_decoder_.Decode(&bit_reader_, &c)) {
217*6777b538SAndroid Build Coastguard Worker         return false;
218*6777b538SAndroid Build Coastguard Worker       }
219*6777b538SAndroid Build Coastguard Worker       if (search[current_search_offset - 1] != c) {
220*6777b538SAndroid Build Coastguard Worker         return true;
221*6777b538SAndroid Build Coastguard Worker       }
222*6777b538SAndroid Build Coastguard Worker       current_search_offset--;
223*6777b538SAndroid Build Coastguard Worker     }
224*6777b538SAndroid Build Coastguard Worker 
225*6777b538SAndroid Build Coastguard Worker     bool is_first_offset = true;
226*6777b538SAndroid Build Coastguard Worker     size_t current_offset = 0;
227*6777b538SAndroid Build Coastguard Worker 
228*6777b538SAndroid Build Coastguard Worker     // Next is the dispatch table.
229*6777b538SAndroid Build Coastguard Worker     for (;;) {
230*6777b538SAndroid Build Coastguard Worker       char c;
231*6777b538SAndroid Build Coastguard Worker       if (!huffman_decoder_.Decode(&bit_reader_, &c)) {
232*6777b538SAndroid Build Coastguard Worker         return false;
233*6777b538SAndroid Build Coastguard Worker       }
234*6777b538SAndroid Build Coastguard Worker       if (c == kEndOfTable) {
235*6777b538SAndroid Build Coastguard Worker         // No exact match.
236*6777b538SAndroid Build Coastguard Worker         return true;
237*6777b538SAndroid Build Coastguard Worker       }
238*6777b538SAndroid Build Coastguard Worker 
239*6777b538SAndroid Build Coastguard Worker       if (c == kEndOfString) {
240*6777b538SAndroid Build Coastguard Worker         if (!ReadEntry(&bit_reader_, search, current_search_offset,
241*6777b538SAndroid Build Coastguard Worker                        out_found)) {
242*6777b538SAndroid Build Coastguard Worker           return false;
243*6777b538SAndroid Build Coastguard Worker         }
244*6777b538SAndroid Build Coastguard Worker         if (current_search_offset == 0) {
245*6777b538SAndroid Build Coastguard Worker           CHECK(*out_found);
246*6777b538SAndroid Build Coastguard Worker           return true;
247*6777b538SAndroid Build Coastguard Worker         }
248*6777b538SAndroid Build Coastguard Worker         continue;
249*6777b538SAndroid Build Coastguard Worker       }
250*6777b538SAndroid Build Coastguard Worker 
251*6777b538SAndroid Build Coastguard Worker       // The entries in a dispatch table are in order thus we can tell if there
252*6777b538SAndroid Build Coastguard Worker       // will be no match if the current character past the one that we want.
253*6777b538SAndroid Build Coastguard Worker       if (current_search_offset == 0 || search[current_search_offset - 1] < c) {
254*6777b538SAndroid Build Coastguard Worker         return true;
255*6777b538SAndroid Build Coastguard Worker       }
256*6777b538SAndroid Build Coastguard Worker 
257*6777b538SAndroid Build Coastguard Worker       if (is_first_offset) {
258*6777b538SAndroid Build Coastguard Worker         // The first offset is backwards from the current position.
259*6777b538SAndroid Build Coastguard Worker         uint32_t jump_delta_bits;
260*6777b538SAndroid Build Coastguard Worker         uint32_t jump_delta;
261*6777b538SAndroid Build Coastguard Worker         if (!bit_reader_.Read(5, &jump_delta_bits) ||
262*6777b538SAndroid Build Coastguard Worker             !bit_reader_.Read(jump_delta_bits, &jump_delta)) {
263*6777b538SAndroid Build Coastguard Worker           return false;
264*6777b538SAndroid Build Coastguard Worker         }
265*6777b538SAndroid Build Coastguard Worker 
266*6777b538SAndroid Build Coastguard Worker         if (bit_offset < jump_delta) {
267*6777b538SAndroid Build Coastguard Worker           return false;
268*6777b538SAndroid Build Coastguard Worker         }
269*6777b538SAndroid Build Coastguard Worker 
270*6777b538SAndroid Build Coastguard Worker         current_offset = bit_offset - jump_delta;
271*6777b538SAndroid Build Coastguard Worker         is_first_offset = false;
272*6777b538SAndroid Build Coastguard Worker       } else {
273*6777b538SAndroid Build Coastguard Worker         // Subsequent offsets are forward from the target of the first offset.
274*6777b538SAndroid Build Coastguard Worker         uint32_t is_long_jump;
275*6777b538SAndroid Build Coastguard Worker         if (!bit_reader_.Read(1, &is_long_jump)) {
276*6777b538SAndroid Build Coastguard Worker           return false;
277*6777b538SAndroid Build Coastguard Worker         }
278*6777b538SAndroid Build Coastguard Worker 
279*6777b538SAndroid Build Coastguard Worker         uint32_t jump_delta;
280*6777b538SAndroid Build Coastguard Worker         if (!is_long_jump) {
281*6777b538SAndroid Build Coastguard Worker           if (!bit_reader_.Read(7, &jump_delta)) {
282*6777b538SAndroid Build Coastguard Worker             return false;
283*6777b538SAndroid Build Coastguard Worker           }
284*6777b538SAndroid Build Coastguard Worker         } else {
285*6777b538SAndroid Build Coastguard Worker           uint32_t jump_delta_bits;
286*6777b538SAndroid Build Coastguard Worker           if (!bit_reader_.Read(4, &jump_delta_bits) ||
287*6777b538SAndroid Build Coastguard Worker               !bit_reader_.Read(jump_delta_bits + 8, &jump_delta)) {
288*6777b538SAndroid Build Coastguard Worker             return false;
289*6777b538SAndroid Build Coastguard Worker           }
290*6777b538SAndroid Build Coastguard Worker         }
291*6777b538SAndroid Build Coastguard Worker 
292*6777b538SAndroid Build Coastguard Worker         current_offset += jump_delta;
293*6777b538SAndroid Build Coastguard Worker         if (current_offset >= bit_offset) {
294*6777b538SAndroid Build Coastguard Worker           return false;
295*6777b538SAndroid Build Coastguard Worker         }
296*6777b538SAndroid Build Coastguard Worker       }
297*6777b538SAndroid Build Coastguard Worker 
298*6777b538SAndroid Build Coastguard Worker       DCHECK_LT(0u, current_search_offset);
299*6777b538SAndroid Build Coastguard Worker       if (search[current_search_offset - 1] == c) {
300*6777b538SAndroid Build Coastguard Worker         bit_offset = current_offset;
301*6777b538SAndroid Build Coastguard Worker         current_search_offset--;
302*6777b538SAndroid Build Coastguard Worker         break;
303*6777b538SAndroid Build Coastguard Worker       }
304*6777b538SAndroid Build Coastguard Worker     }
305*6777b538SAndroid Build Coastguard Worker   }
306*6777b538SAndroid Build Coastguard Worker   NOTREACHED();
307*6777b538SAndroid Build Coastguard Worker }
308*6777b538SAndroid Build Coastguard Worker 
309*6777b538SAndroid Build Coastguard Worker }  // namespace net::extras
310