xref: /aosp_15_r20/external/cronet/net/tools/huffman_trie/huffman/huffman_builder.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2016 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef NET_TOOLS_HUFFMAN_TRIE_HUFFMAN_HUFFMAN_BUILDER_H_
6 #define NET_TOOLS_HUFFMAN_TRIE_HUFFMAN_HUFFMAN_BUILDER_H_
7 
8 #include <stdint.h>
9 
10 #include <map>
11 #include <memory>
12 #include <vector>
13 
14 namespace net::huffman_trie {
15 
16 namespace {
17 class HuffmanNode;
18 }  // namespace
19 
20 struct HuffmanRepresentation {
21   uint32_t bits;
22   uint32_t number_of_bits;
23 };
24 
25 // A HuffmanRepresentationTable maps the original characters to their Huffman
26 // representation. The Huffman representation consists of the number of bits
27 // needed to represent the character and the actual bits.
28 using HuffmanRepresentationTable = std::map<uint8_t, HuffmanRepresentation>;
29 using HuffmanRepresentationPair = std::pair<uint8_t, HuffmanRepresentation>;
30 
31 // This class tracks the number of times each character is used and calculates
32 // a space efficient way to represent all tracked characters by constructing a
33 // Huffman tree based on the number of times each character is seen.
34 class HuffmanBuilder {
35  public:
36   HuffmanBuilder();
37   ~HuffmanBuilder();
38 
39   // Will increase the count for |character| by one, indicating it has been
40   // used. |character| must be in the range 0-127.
41   void RecordUsage(uint8_t character);
42 
43   // Returns a HuffmanRepresentationTable based on the usage data collected
44   // through RecordUsage().
45   HuffmanRepresentationTable ToTable();
46 
47   // Outputs the Huffman representation as a vector of uint8_t's in a format
48   // Chromium can use to reconstruct the tree.
49   //
50   // The nodes of the tree are pairs of uint8s. The last node in the array is
51   // the root of the tree. Each pair is two uint8_t values, the first is "left"
52   // and the second is "right". If a uint8_t value has the MSB set then it
53   // represents a literal leaf value. Otherwise it's a pointer to the n'th
54   // element of the array.
55   std::vector<uint8_t> ToVector();
56 
57  private:
58   // Determines the Huffman representation of the characters under |node| and
59   // inserts them into |*table|. |bits| and |number_of_bits| are used as a
60   // prefix.
61   void TreeToTable(HuffmanNode* node,
62                    uint32_t bits,
63                    uint32_t number_of_bits,
64                    HuffmanRepresentationTable* table);
65 
66   // Converts the tree under |*node| into a byte representation in |*vector|.
67   // See ToVector() for more information on the format.
68   uint32_t WriteToVector(HuffmanNode* node, std::vector<uint8_t>* vector);
69 
70   // Constructs a Huffman tree based on |counts_|. Appends additional nodes to
71   // the tree until it contains at least 2 leafs.
72   std::unique_ptr<HuffmanNode> BuildTree();
73 
74   // Holds usage information for the tracked characters. Maps the character to
75   // the number of times its usage has been recorded through RecordUsage().
76   std::map<uint8_t, uint32_t> counts_;
77 };
78 
79 }  // namespace net::huffman_trie
80 
81 #endif  // NET_TOOLS_HUFFMAN_TRIE_HUFFMAN_HUFFMAN_BUILDER_H_
82