xref: /aosp_15_r20/external/cronet/net/extras/preload_data/decoder.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2018 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker #ifndef NET_EXTRAS_PRELOAD_DATA_DECODER_H_
6*6777b538SAndroid Build Coastguard Worker #define NET_EXTRAS_PRELOAD_DATA_DECODER_H_
7*6777b538SAndroid Build Coastguard Worker 
8*6777b538SAndroid Build Coastguard Worker #include <stdint.h>
9*6777b538SAndroid Build Coastguard Worker 
10*6777b538SAndroid Build Coastguard Worker #include <string>
11*6777b538SAndroid Build Coastguard Worker 
12*6777b538SAndroid Build Coastguard Worker namespace net::extras {
13*6777b538SAndroid Build Coastguard Worker 
14*6777b538SAndroid Build Coastguard Worker // Decodes an entry from preloaded data.
15*6777b538SAndroid Build Coastguard Worker // Clients must implement ReadEntry() method to read the specific type of data
16*6777b538SAndroid Build Coastguard Worker // they are interested in.
17*6777b538SAndroid Build Coastguard Worker class PreloadDecoder {
18*6777b538SAndroid Build Coastguard Worker  public:
19*6777b538SAndroid Build Coastguard Worker   // These must match the values in net/tools/huffman_trie/trie/trie_writer.h.
20*6777b538SAndroid Build Coastguard Worker   enum : char { kEndOfString = 0, kEndOfTable = 127 };
21*6777b538SAndroid Build Coastguard Worker 
22*6777b538SAndroid Build Coastguard Worker   // BitReader is a class that allows a bytestring to be read bit-by-bit.
23*6777b538SAndroid Build Coastguard Worker   class BitReader {
24*6777b538SAndroid Build Coastguard Worker    public:
25*6777b538SAndroid Build Coastguard Worker     BitReader(const uint8_t* bytes, size_t num_bits);
26*6777b538SAndroid Build Coastguard Worker 
27*6777b538SAndroid Build Coastguard Worker     BitReader(const BitReader&) = delete;
28*6777b538SAndroid Build Coastguard Worker     BitReader& operator=(const BitReader&) = delete;
29*6777b538SAndroid Build Coastguard Worker 
30*6777b538SAndroid Build Coastguard Worker     // Next sets |*out| to the next bit from the input. It returns false if no
31*6777b538SAndroid Build Coastguard Worker     // more bits are available or true otherwise.
32*6777b538SAndroid Build Coastguard Worker     bool Next(bool* out);
33*6777b538SAndroid Build Coastguard Worker 
34*6777b538SAndroid Build Coastguard Worker     // Read sets the |num_bits| least-significant bits of |*out| to the value of
35*6777b538SAndroid Build Coastguard Worker     // the next |num_bits| bits from the input. It returns false if there are
36*6777b538SAndroid Build Coastguard Worker     // insufficient bits in the input or true otherwise.
37*6777b538SAndroid Build Coastguard Worker     bool Read(unsigned num_bits, uint32_t* out);
38*6777b538SAndroid Build Coastguard Worker 
39*6777b538SAndroid Build Coastguard Worker     // Decodes a size_t from the reader, putting the resulting value in |*out|.
40*6777b538SAndroid Build Coastguard Worker     // Returns false if there are insufficient bits to read and true otherwise.
41*6777b538SAndroid Build Coastguard Worker     //
42*6777b538SAndroid Build Coastguard Worker     // This function's inverse is TrieBitBuffer::WriteSize.
43*6777b538SAndroid Build Coastguard Worker     //
44*6777b538SAndroid Build Coastguard Worker     // The encoding is a prefix code optimized for small values (less than 4).
45*6777b538SAndroid Build Coastguard Worker     // It is designed for the lengths of prefixes in the HSTS Preload list trie.
46*6777b538SAndroid Build Coastguard Worker     // Compared to the unary encoding that was previously used (where the number
47*6777b538SAndroid Build Coastguard Worker     // of bits used is one plus the value being encoded), this uses one more bit
48*6777b538SAndroid Build Coastguard Worker     // for encoding 0 and 1, and the same number of bits for encoding 2, and
49*6777b538SAndroid Build Coastguard Worker     // fewer bits for encoding values greater than 2. At the time of writing,
50*6777b538SAndroid Build Coastguard Worker     // 35% of the lengths encoded in the trie were 0 or 1, 11% were 2, and the
51*6777b538SAndroid Build Coastguard Worker     // remaining 54% were greater than 2.
52*6777b538SAndroid Build Coastguard Worker     //
53*6777b538SAndroid Build Coastguard Worker     // This encoding scheme uses a variable number of bits to encode each value.
54*6777b538SAndroid Build Coastguard Worker     // There are fixed values for 0, 1, 2, and 3, and then a simple rule is used
55*6777b538SAndroid Build Coastguard Worker     // for 4 and greater. 0 uses 2 bits; 1 through 3 use 3 bits. The fixed
56*6777b538SAndroid Build Coastguard Worker     // values are as follows:
57*6777b538SAndroid Build Coastguard Worker     //
58*6777b538SAndroid Build Coastguard Worker     //   0: 0b00
59*6777b538SAndroid Build Coastguard Worker     //   1: 0b100
60*6777b538SAndroid Build Coastguard Worker     //   2: 0b101
61*6777b538SAndroid Build Coastguard Worker     //   3: 0b110
62*6777b538SAndroid Build Coastguard Worker     //
63*6777b538SAndroid Build Coastguard Worker     // Note that none of the fixed values are prefixed with 0b01 or 0b111. These
64*6777b538SAndroid Build Coastguard Worker     // prefixes are used with a unary-like encoding for values 4 and above.
65*6777b538SAndroid Build Coastguard Worker     // Zero or more 1s, followed by a 0, are appended to one of those prefixes.
66*6777b538SAndroid Build Coastguard Worker     // Even values use the prefix 0b01, and odd values use the prefix 0b111. The
67*6777b538SAndroid Build Coastguard Worker     // number of 1s to append is half the value (rounded down) minus 1.
68*6777b538SAndroid Build Coastguard Worker     bool DecodeSize(size_t* out);
69*6777b538SAndroid Build Coastguard Worker 
70*6777b538SAndroid Build Coastguard Worker     // Seek sets the current offest in the input to bit number |offset|. It
71*6777b538SAndroid Build Coastguard Worker     // returns true if |offset| is within the range of the input and false
72*6777b538SAndroid Build Coastguard Worker     // otherwise.
73*6777b538SAndroid Build Coastguard Worker     bool Seek(size_t offset);
74*6777b538SAndroid Build Coastguard Worker 
75*6777b538SAndroid Build Coastguard Worker    private:
76*6777b538SAndroid Build Coastguard Worker     const uint8_t* const bytes_;
77*6777b538SAndroid Build Coastguard Worker     const size_t num_bits_;
78*6777b538SAndroid Build Coastguard Worker     const size_t num_bytes_;
79*6777b538SAndroid Build Coastguard Worker     // current_byte_index_ contains the current byte offset in |bytes_|.
80*6777b538SAndroid Build Coastguard Worker     size_t current_byte_index_ = 0;
81*6777b538SAndroid Build Coastguard Worker     // current_byte_ contains the current byte of the input.
82*6777b538SAndroid Build Coastguard Worker     uint8_t current_byte_;
83*6777b538SAndroid Build Coastguard Worker     // num_bits_used_ contains the number of bits of |current_byte_| that have
84*6777b538SAndroid Build Coastguard Worker     // been read.
85*6777b538SAndroid Build Coastguard Worker     unsigned num_bits_used_ = 8;
86*6777b538SAndroid Build Coastguard Worker   };
87*6777b538SAndroid Build Coastguard Worker 
88*6777b538SAndroid Build Coastguard Worker   // HuffmanDecoder is a very simple Huffman reader. The input Huffman tree is
89*6777b538SAndroid Build Coastguard Worker   // simply encoded as a series of two-byte structures. The first byte
90*6777b538SAndroid Build Coastguard Worker   // determines the "0" pointer for that node and the second the "1" pointer.
91*6777b538SAndroid Build Coastguard Worker   // Each byte either has the MSB set, in which case the bottom 7 bits are the
92*6777b538SAndroid Build Coastguard Worker   // value for that position, or else the bottom seven bits contain the index of
93*6777b538SAndroid Build Coastguard Worker   // a node.
94*6777b538SAndroid Build Coastguard Worker   //
95*6777b538SAndroid Build Coastguard Worker   // The tree is decoded by walking rather than a table-driven approach.
96*6777b538SAndroid Build Coastguard Worker   class HuffmanDecoder {
97*6777b538SAndroid Build Coastguard Worker    public:
98*6777b538SAndroid Build Coastguard Worker     HuffmanDecoder(const uint8_t* tree, size_t tree_bytes);
99*6777b538SAndroid Build Coastguard Worker 
100*6777b538SAndroid Build Coastguard Worker     HuffmanDecoder(const HuffmanDecoder&) = delete;
101*6777b538SAndroid Build Coastguard Worker     HuffmanDecoder& operator=(const HuffmanDecoder&) = delete;
102*6777b538SAndroid Build Coastguard Worker 
103*6777b538SAndroid Build Coastguard Worker     bool Decode(PreloadDecoder::BitReader* reader, char* out) const;
104*6777b538SAndroid Build Coastguard Worker 
105*6777b538SAndroid Build Coastguard Worker    private:
106*6777b538SAndroid Build Coastguard Worker     const uint8_t* const tree_;
107*6777b538SAndroid Build Coastguard Worker     const size_t tree_bytes_;
108*6777b538SAndroid Build Coastguard Worker   };
109*6777b538SAndroid Build Coastguard Worker 
110*6777b538SAndroid Build Coastguard Worker   PreloadDecoder(const uint8_t* huffman_tree,
111*6777b538SAndroid Build Coastguard Worker                  size_t huffman_tree_size,
112*6777b538SAndroid Build Coastguard Worker                  const uint8_t* trie,
113*6777b538SAndroid Build Coastguard Worker                  size_t trie_bits,
114*6777b538SAndroid Build Coastguard Worker                  size_t trie_root_position);
115*6777b538SAndroid Build Coastguard Worker 
116*6777b538SAndroid Build Coastguard Worker   PreloadDecoder(const PreloadDecoder&) = delete;
117*6777b538SAndroid Build Coastguard Worker   PreloadDecoder& operator=(const PreloadDecoder&) = delete;
118*6777b538SAndroid Build Coastguard Worker 
119*6777b538SAndroid Build Coastguard Worker   virtual ~PreloadDecoder();
120*6777b538SAndroid Build Coastguard Worker 
121*6777b538SAndroid Build Coastguard Worker   // Resolves search keyword given by |search| in the preloaded data. Returns
122*6777b538SAndroid Build Coastguard Worker   // false on internal error and true otherwise. After a successful return,
123*6777b538SAndroid Build Coastguard Worker   // |*out_found| is true iff a relevant entry has been found. In the case of
124*6777b538SAndroid Build Coastguard Worker   // HSTS data, |search| is the hostname being searched.
125*6777b538SAndroid Build Coastguard Worker   //
126*6777b538SAndroid Build Coastguard Worker   // Although this code should be robust, it never processes attacker-controlled
127*6777b538SAndroid Build Coastguard Worker   // data -- it only operates on the preloaded data built into the binary.
128*6777b538SAndroid Build Coastguard Worker   //
129*6777b538SAndroid Build Coastguard Worker   // The preloaded data is represented as a trie and matches |search|
130*6777b538SAndroid Build Coastguard Worker   // backwards. Each node in the trie starts with a number of characters, which
131*6777b538SAndroid Build Coastguard Worker   // must match exactly. After that is a dispatch table which maps the next
132*6777b538SAndroid Build Coastguard Worker   // character in the search keyword to another node in the trie.
133*6777b538SAndroid Build Coastguard Worker   //
134*6777b538SAndroid Build Coastguard Worker   // In the dispatch table, the zero character represents the "end of string"
135*6777b538SAndroid Build Coastguard Worker   // (which is the *beginning* of the search keyword since we process it
136*6777b538SAndroid Build Coastguard Worker   // backwards). The value in that case is special -- rather than an offset to
137*6777b538SAndroid Build Coastguard Worker   // another trie node, it contains the searched entry (for HSTS data, it
138*6777b538SAndroid Build Coastguard Worker   // contains whether subdomains are included, pinsets etc.). Clients must
139*6777b538SAndroid Build Coastguard Worker   // implement ReadEntry to read the entry at this location.
140*6777b538SAndroid Build Coastguard Worker   //
141*6777b538SAndroid Build Coastguard Worker   // Dispatch tables are always given in order, but the "end of string" (zero)
142*6777b538SAndroid Build Coastguard Worker   // value always comes before an entry for '.'.
143*6777b538SAndroid Build Coastguard Worker   bool Decode(const std::string& search, bool* out_found);
144*6777b538SAndroid Build Coastguard Worker 
145*6777b538SAndroid Build Coastguard Worker  protected:
146*6777b538SAndroid Build Coastguard Worker   virtual bool ReadEntry(BitReader* reader,
147*6777b538SAndroid Build Coastguard Worker                          const std::string& search,
148*6777b538SAndroid Build Coastguard Worker                          size_t current_search_offset,
149*6777b538SAndroid Build Coastguard Worker                          bool* out_found) = 0;
150*6777b538SAndroid Build Coastguard Worker 
huffman_decoder()151*6777b538SAndroid Build Coastguard Worker   const HuffmanDecoder& huffman_decoder() const { return huffman_decoder_; }
152*6777b538SAndroid Build Coastguard Worker 
153*6777b538SAndroid Build Coastguard Worker  private:
154*6777b538SAndroid Build Coastguard Worker   HuffmanDecoder huffman_decoder_;
155*6777b538SAndroid Build Coastguard Worker   BitReader bit_reader_;
156*6777b538SAndroid Build Coastguard Worker 
157*6777b538SAndroid Build Coastguard Worker   const size_t trie_root_position_;
158*6777b538SAndroid Build Coastguard Worker };
159*6777b538SAndroid Build Coastguard Worker 
160*6777b538SAndroid Build Coastguard Worker }  // namespace net::extras
161*6777b538SAndroid Build Coastguard Worker 
162*6777b538SAndroid Build Coastguard Worker #endif  // NET_EXTRAS_PRELOAD_DATA_DECODER_H_
163