1*6777b538SAndroid Build Coastguard Worker // Copyright 2018 The Chromium Authors 2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be 3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file. 4*6777b538SAndroid Build Coastguard Worker 5*6777b538SAndroid Build Coastguard Worker #ifndef NET_EXTRAS_PRELOAD_DATA_DECODER_H_ 6*6777b538SAndroid Build Coastguard Worker #define NET_EXTRAS_PRELOAD_DATA_DECODER_H_ 7*6777b538SAndroid Build Coastguard Worker 8*6777b538SAndroid Build Coastguard Worker #include <stdint.h> 9*6777b538SAndroid Build Coastguard Worker 10*6777b538SAndroid Build Coastguard Worker #include <string> 11*6777b538SAndroid Build Coastguard Worker 12*6777b538SAndroid Build Coastguard Worker namespace net::extras { 13*6777b538SAndroid Build Coastguard Worker 14*6777b538SAndroid Build Coastguard Worker // Decodes an entry from preloaded data. 15*6777b538SAndroid Build Coastguard Worker // Clients must implement ReadEntry() method to read the specific type of data 16*6777b538SAndroid Build Coastguard Worker // they are interested in. 17*6777b538SAndroid Build Coastguard Worker class PreloadDecoder { 18*6777b538SAndroid Build Coastguard Worker public: 19*6777b538SAndroid Build Coastguard Worker // These must match the values in net/tools/huffman_trie/trie/trie_writer.h. 20*6777b538SAndroid Build Coastguard Worker enum : char { kEndOfString = 0, kEndOfTable = 127 }; 21*6777b538SAndroid Build Coastguard Worker 22*6777b538SAndroid Build Coastguard Worker // BitReader is a class that allows a bytestring to be read bit-by-bit. 23*6777b538SAndroid Build Coastguard Worker class BitReader { 24*6777b538SAndroid Build Coastguard Worker public: 25*6777b538SAndroid Build Coastguard Worker BitReader(const uint8_t* bytes, size_t num_bits); 26*6777b538SAndroid Build Coastguard Worker 27*6777b538SAndroid Build Coastguard Worker BitReader(const BitReader&) = delete; 28*6777b538SAndroid Build Coastguard Worker BitReader& operator=(const BitReader&) = delete; 29*6777b538SAndroid Build Coastguard Worker 30*6777b538SAndroid Build Coastguard Worker // Next sets |*out| to the next bit from the input. It returns false if no 31*6777b538SAndroid Build Coastguard Worker // more bits are available or true otherwise. 32*6777b538SAndroid Build Coastguard Worker bool Next(bool* out); 33*6777b538SAndroid Build Coastguard Worker 34*6777b538SAndroid Build Coastguard Worker // Read sets the |num_bits| least-significant bits of |*out| to the value of 35*6777b538SAndroid Build Coastguard Worker // the next |num_bits| bits from the input. It returns false if there are 36*6777b538SAndroid Build Coastguard Worker // insufficient bits in the input or true otherwise. 37*6777b538SAndroid Build Coastguard Worker bool Read(unsigned num_bits, uint32_t* out); 38*6777b538SAndroid Build Coastguard Worker 39*6777b538SAndroid Build Coastguard Worker // Decodes a size_t from the reader, putting the resulting value in |*out|. 40*6777b538SAndroid Build Coastguard Worker // Returns false if there are insufficient bits to read and true otherwise. 41*6777b538SAndroid Build Coastguard Worker // 42*6777b538SAndroid Build Coastguard Worker // This function's inverse is TrieBitBuffer::WriteSize. 43*6777b538SAndroid Build Coastguard Worker // 44*6777b538SAndroid Build Coastguard Worker // The encoding is a prefix code optimized for small values (less than 4). 45*6777b538SAndroid Build Coastguard Worker // It is designed for the lengths of prefixes in the HSTS Preload list trie. 46*6777b538SAndroid Build Coastguard Worker // Compared to the unary encoding that was previously used (where the number 47*6777b538SAndroid Build Coastguard Worker // of bits used is one plus the value being encoded), this uses one more bit 48*6777b538SAndroid Build Coastguard Worker // for encoding 0 and 1, and the same number of bits for encoding 2, and 49*6777b538SAndroid Build Coastguard Worker // fewer bits for encoding values greater than 2. At the time of writing, 50*6777b538SAndroid Build Coastguard Worker // 35% of the lengths encoded in the trie were 0 or 1, 11% were 2, and the 51*6777b538SAndroid Build Coastguard Worker // remaining 54% were greater than 2. 52*6777b538SAndroid Build Coastguard Worker // 53*6777b538SAndroid Build Coastguard Worker // This encoding scheme uses a variable number of bits to encode each value. 54*6777b538SAndroid Build Coastguard Worker // There are fixed values for 0, 1, 2, and 3, and then a simple rule is used 55*6777b538SAndroid Build Coastguard Worker // for 4 and greater. 0 uses 2 bits; 1 through 3 use 3 bits. The fixed 56*6777b538SAndroid Build Coastguard Worker // values are as follows: 57*6777b538SAndroid Build Coastguard Worker // 58*6777b538SAndroid Build Coastguard Worker // 0: 0b00 59*6777b538SAndroid Build Coastguard Worker // 1: 0b100 60*6777b538SAndroid Build Coastguard Worker // 2: 0b101 61*6777b538SAndroid Build Coastguard Worker // 3: 0b110 62*6777b538SAndroid Build Coastguard Worker // 63*6777b538SAndroid Build Coastguard Worker // Note that none of the fixed values are prefixed with 0b01 or 0b111. These 64*6777b538SAndroid Build Coastguard Worker // prefixes are used with a unary-like encoding for values 4 and above. 65*6777b538SAndroid Build Coastguard Worker // Zero or more 1s, followed by a 0, are appended to one of those prefixes. 66*6777b538SAndroid Build Coastguard Worker // Even values use the prefix 0b01, and odd values use the prefix 0b111. The 67*6777b538SAndroid Build Coastguard Worker // number of 1s to append is half the value (rounded down) minus 1. 68*6777b538SAndroid Build Coastguard Worker bool DecodeSize(size_t* out); 69*6777b538SAndroid Build Coastguard Worker 70*6777b538SAndroid Build Coastguard Worker // Seek sets the current offest in the input to bit number |offset|. It 71*6777b538SAndroid Build Coastguard Worker // returns true if |offset| is within the range of the input and false 72*6777b538SAndroid Build Coastguard Worker // otherwise. 73*6777b538SAndroid Build Coastguard Worker bool Seek(size_t offset); 74*6777b538SAndroid Build Coastguard Worker 75*6777b538SAndroid Build Coastguard Worker private: 76*6777b538SAndroid Build Coastguard Worker const uint8_t* const bytes_; 77*6777b538SAndroid Build Coastguard Worker const size_t num_bits_; 78*6777b538SAndroid Build Coastguard Worker const size_t num_bytes_; 79*6777b538SAndroid Build Coastguard Worker // current_byte_index_ contains the current byte offset in |bytes_|. 80*6777b538SAndroid Build Coastguard Worker size_t current_byte_index_ = 0; 81*6777b538SAndroid Build Coastguard Worker // current_byte_ contains the current byte of the input. 82*6777b538SAndroid Build Coastguard Worker uint8_t current_byte_; 83*6777b538SAndroid Build Coastguard Worker // num_bits_used_ contains the number of bits of |current_byte_| that have 84*6777b538SAndroid Build Coastguard Worker // been read. 85*6777b538SAndroid Build Coastguard Worker unsigned num_bits_used_ = 8; 86*6777b538SAndroid Build Coastguard Worker }; 87*6777b538SAndroid Build Coastguard Worker 88*6777b538SAndroid Build Coastguard Worker // HuffmanDecoder is a very simple Huffman reader. The input Huffman tree is 89*6777b538SAndroid Build Coastguard Worker // simply encoded as a series of two-byte structures. The first byte 90*6777b538SAndroid Build Coastguard Worker // determines the "0" pointer for that node and the second the "1" pointer. 91*6777b538SAndroid Build Coastguard Worker // Each byte either has the MSB set, in which case the bottom 7 bits are the 92*6777b538SAndroid Build Coastguard Worker // value for that position, or else the bottom seven bits contain the index of 93*6777b538SAndroid Build Coastguard Worker // a node. 94*6777b538SAndroid Build Coastguard Worker // 95*6777b538SAndroid Build Coastguard Worker // The tree is decoded by walking rather than a table-driven approach. 96*6777b538SAndroid Build Coastguard Worker class HuffmanDecoder { 97*6777b538SAndroid Build Coastguard Worker public: 98*6777b538SAndroid Build Coastguard Worker HuffmanDecoder(const uint8_t* tree, size_t tree_bytes); 99*6777b538SAndroid Build Coastguard Worker 100*6777b538SAndroid Build Coastguard Worker HuffmanDecoder(const HuffmanDecoder&) = delete; 101*6777b538SAndroid Build Coastguard Worker HuffmanDecoder& operator=(const HuffmanDecoder&) = delete; 102*6777b538SAndroid Build Coastguard Worker 103*6777b538SAndroid Build Coastguard Worker bool Decode(PreloadDecoder::BitReader* reader, char* out) const; 104*6777b538SAndroid Build Coastguard Worker 105*6777b538SAndroid Build Coastguard Worker private: 106*6777b538SAndroid Build Coastguard Worker const uint8_t* const tree_; 107*6777b538SAndroid Build Coastguard Worker const size_t tree_bytes_; 108*6777b538SAndroid Build Coastguard Worker }; 109*6777b538SAndroid Build Coastguard Worker 110*6777b538SAndroid Build Coastguard Worker PreloadDecoder(const uint8_t* huffman_tree, 111*6777b538SAndroid Build Coastguard Worker size_t huffman_tree_size, 112*6777b538SAndroid Build Coastguard Worker const uint8_t* trie, 113*6777b538SAndroid Build Coastguard Worker size_t trie_bits, 114*6777b538SAndroid Build Coastguard Worker size_t trie_root_position); 115*6777b538SAndroid Build Coastguard Worker 116*6777b538SAndroid Build Coastguard Worker PreloadDecoder(const PreloadDecoder&) = delete; 117*6777b538SAndroid Build Coastguard Worker PreloadDecoder& operator=(const PreloadDecoder&) = delete; 118*6777b538SAndroid Build Coastguard Worker 119*6777b538SAndroid Build Coastguard Worker virtual ~PreloadDecoder(); 120*6777b538SAndroid Build Coastguard Worker 121*6777b538SAndroid Build Coastguard Worker // Resolves search keyword given by |search| in the preloaded data. Returns 122*6777b538SAndroid Build Coastguard Worker // false on internal error and true otherwise. After a successful return, 123*6777b538SAndroid Build Coastguard Worker // |*out_found| is true iff a relevant entry has been found. In the case of 124*6777b538SAndroid Build Coastguard Worker // HSTS data, |search| is the hostname being searched. 125*6777b538SAndroid Build Coastguard Worker // 126*6777b538SAndroid Build Coastguard Worker // Although this code should be robust, it never processes attacker-controlled 127*6777b538SAndroid Build Coastguard Worker // data -- it only operates on the preloaded data built into the binary. 128*6777b538SAndroid Build Coastguard Worker // 129*6777b538SAndroid Build Coastguard Worker // The preloaded data is represented as a trie and matches |search| 130*6777b538SAndroid Build Coastguard Worker // backwards. Each node in the trie starts with a number of characters, which 131*6777b538SAndroid Build Coastguard Worker // must match exactly. After that is a dispatch table which maps the next 132*6777b538SAndroid Build Coastguard Worker // character in the search keyword to another node in the trie. 133*6777b538SAndroid Build Coastguard Worker // 134*6777b538SAndroid Build Coastguard Worker // In the dispatch table, the zero character represents the "end of string" 135*6777b538SAndroid Build Coastguard Worker // (which is the *beginning* of the search keyword since we process it 136*6777b538SAndroid Build Coastguard Worker // backwards). The value in that case is special -- rather than an offset to 137*6777b538SAndroid Build Coastguard Worker // another trie node, it contains the searched entry (for HSTS data, it 138*6777b538SAndroid Build Coastguard Worker // contains whether subdomains are included, pinsets etc.). Clients must 139*6777b538SAndroid Build Coastguard Worker // implement ReadEntry to read the entry at this location. 140*6777b538SAndroid Build Coastguard Worker // 141*6777b538SAndroid Build Coastguard Worker // Dispatch tables are always given in order, but the "end of string" (zero) 142*6777b538SAndroid Build Coastguard Worker // value always comes before an entry for '.'. 143*6777b538SAndroid Build Coastguard Worker bool Decode(const std::string& search, bool* out_found); 144*6777b538SAndroid Build Coastguard Worker 145*6777b538SAndroid Build Coastguard Worker protected: 146*6777b538SAndroid Build Coastguard Worker virtual bool ReadEntry(BitReader* reader, 147*6777b538SAndroid Build Coastguard Worker const std::string& search, 148*6777b538SAndroid Build Coastguard Worker size_t current_search_offset, 149*6777b538SAndroid Build Coastguard Worker bool* out_found) = 0; 150*6777b538SAndroid Build Coastguard Worker huffman_decoder()151*6777b538SAndroid Build Coastguard Worker const HuffmanDecoder& huffman_decoder() const { return huffman_decoder_; } 152*6777b538SAndroid Build Coastguard Worker 153*6777b538SAndroid Build Coastguard Worker private: 154*6777b538SAndroid Build Coastguard Worker HuffmanDecoder huffman_decoder_; 155*6777b538SAndroid Build Coastguard Worker BitReader bit_reader_; 156*6777b538SAndroid Build Coastguard Worker 157*6777b538SAndroid Build Coastguard Worker const size_t trie_root_position_; 158*6777b538SAndroid Build Coastguard Worker }; 159*6777b538SAndroid Build Coastguard Worker 160*6777b538SAndroid Build Coastguard Worker } // namespace net::extras 161*6777b538SAndroid Build Coastguard Worker 162*6777b538SAndroid Build Coastguard Worker #endif // NET_EXTRAS_PRELOAD_DATA_DECODER_H_ 163