1 // Copyright 2016 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_TOOLS_HUFFMAN_TRIE_HUFFMAN_HUFFMAN_BUILDER_H_ 6 #define NET_TOOLS_HUFFMAN_TRIE_HUFFMAN_HUFFMAN_BUILDER_H_ 7 8 #include <stdint.h> 9 10 #include <map> 11 #include <memory> 12 #include <vector> 13 14 namespace net::huffman_trie { 15 16 namespace { 17 class HuffmanNode; 18 } // namespace 19 20 struct HuffmanRepresentation { 21 uint32_t bits; 22 uint32_t number_of_bits; 23 }; 24 25 // A HuffmanRepresentationTable maps the original characters to their Huffman 26 // representation. The Huffman representation consists of the number of bits 27 // needed to represent the character and the actual bits. 28 using HuffmanRepresentationTable = std::map<uint8_t, HuffmanRepresentation>; 29 using HuffmanRepresentationPair = std::pair<uint8_t, HuffmanRepresentation>; 30 31 // This class tracks the number of times each character is used and calculates 32 // a space efficient way to represent all tracked characters by constructing a 33 // Huffman tree based on the number of times each character is seen. 34 class HuffmanBuilder { 35 public: 36 HuffmanBuilder(); 37 ~HuffmanBuilder(); 38 39 // Will increase the count for |character| by one, indicating it has been 40 // used. |character| must be in the range 0-127. 41 void RecordUsage(uint8_t character); 42 43 // Returns a HuffmanRepresentationTable based on the usage data collected 44 // through RecordUsage(). 45 HuffmanRepresentationTable ToTable(); 46 47 // Outputs the Huffman representation as a vector of uint8_t's in a format 48 // Chromium can use to reconstruct the tree. 49 // 50 // The nodes of the tree are pairs of uint8s. The last node in the array is 51 // the root of the tree. Each pair is two uint8_t values, the first is "left" 52 // and the second is "right". If a uint8_t value has the MSB set then it 53 // represents a literal leaf value. Otherwise it's a pointer to the n'th 54 // element of the array. 55 std::vector<uint8_t> ToVector(); 56 57 private: 58 // Determines the Huffman representation of the characters under |node| and 59 // inserts them into |*table|. |bits| and |number_of_bits| are used as a 60 // prefix. 61 void TreeToTable(HuffmanNode* node, 62 uint32_t bits, 63 uint32_t number_of_bits, 64 HuffmanRepresentationTable* table); 65 66 // Converts the tree under |*node| into a byte representation in |*vector|. 67 // See ToVector() for more information on the format. 68 uint32_t WriteToVector(HuffmanNode* node, std::vector<uint8_t>* vector); 69 70 // Constructs a Huffman tree based on |counts_|. Appends additional nodes to 71 // the tree until it contains at least 2 leafs. 72 std::unique_ptr<HuffmanNode> BuildTree(); 73 74 // Holds usage information for the tracked characters. Maps the character to 75 // the number of times its usage has been recorded through RecordUsage(). 76 std::map<uint8_t, uint32_t> counts_; 77 }; 78 79 } // namespace net::huffman_trie 80 81 #endif // NET_TOOLS_HUFFMAN_TRIE_HUFFMAN_HUFFMAN_BUILDER_H_ 82