1*07fb1d06SElliott Hughes // Copyright 2017 The ChromiumOS Authors 2*07fb1d06SElliott Hughes // Use of this source code is governed by a BSD-style license that can be 3*07fb1d06SElliott Hughes // found in the LICENSE file. 4*07fb1d06SElliott Hughes 5*07fb1d06SElliott Hughes #ifndef SRC_HUFFMAN_TABLE_H_ 6*07fb1d06SElliott Hughes #define SRC_HUFFMAN_TABLE_H_ 7*07fb1d06SElliott Hughes 8*07fb1d06SElliott Hughes #include <cstddef> 9*07fb1d06SElliott Hughes #include <cstdint> 10*07fb1d06SElliott Hughes #include <string> 11*07fb1d06SElliott Hughes #include <vector> 12*07fb1d06SElliott Hughes 13*07fb1d06SElliott Hughes #include "puffin/src/bit_reader.h" 14*07fb1d06SElliott Hughes #include "puffin/src/bit_writer.h" 15*07fb1d06SElliott Hughes #include "puffin/src/include/puffin/common.h" 16*07fb1d06SElliott Hughes #include "puffin/src/logging.h" 17*07fb1d06SElliott Hughes 18*07fb1d06SElliott Hughes namespace puffin { 19*07fb1d06SElliott Hughes 20*07fb1d06SElliott Hughes // Maximum Huffman code length based on RFC1951. 21*07fb1d06SElliott Hughes constexpr size_t kMaxHuffmanBits = 15; 22*07fb1d06SElliott Hughes 23*07fb1d06SElliott Hughes // Permutations of input Huffman code lengths (used only to read 24*07fb1d06SElliott Hughes // |dynamic_code_lens_|). 25*07fb1d06SElliott Hughes extern const uint8_t kPermutations[]; 26*07fb1d06SElliott Hughes 27*07fb1d06SElliott Hughes // The bases of each alphabet which is added to the integer value of extra 28*07fb1d06SElliott Hughes // bits that comes after the Huffman code in the input to create the given 29*07fb1d06SElliott Hughes // length value. The last element is a guard. 30*07fb1d06SElliott Hughes extern const uint16_t kLengthBases[]; 31*07fb1d06SElliott Hughes 32*07fb1d06SElliott Hughes // Number of extra bits that comes after the associating Huffman code. 33*07fb1d06SElliott Hughes extern const uint8_t kLengthExtraBits[]; 34*07fb1d06SElliott Hughes 35*07fb1d06SElliott Hughes // same as |kLengthBases| except for the the distances instead of lengths. 36*07fb1d06SElliott Hughes // The last element is a guard. 37*07fb1d06SElliott Hughes extern const uint16_t kDistanceBases[]; 38*07fb1d06SElliott Hughes 39*07fb1d06SElliott Hughes // Same as |kLengthExtraBits| except for distances instead of lengths. 40*07fb1d06SElliott Hughes extern const uint8_t kDistanceExtraBits[]; 41*07fb1d06SElliott Hughes 42*07fb1d06SElliott Hughes class HuffmanTable { 43*07fb1d06SElliott Hughes public: 44*07fb1d06SElliott Hughes HuffmanTable(); 45*07fb1d06SElliott Hughes virtual ~HuffmanTable() = default; 46*07fb1d06SElliott Hughes 47*07fb1d06SElliott Hughes // Checks the lengths of Huffman length arrays for correctness 48*07fb1d06SElliott Hughes // 49*07fb1d06SElliott Hughes // |num_lit_len| IN The number of literal/lengths code lengths 50*07fb1d06SElliott Hughes // |num_distance| IN The number of distance code lengths 51*07fb1d06SElliott Hughes // |num_codes| IN The number of code lengths for reading Huffman table. CheckHuffmanArrayLengths(size_t num_lit_len,size_t num_distance,size_t num_codes)52*07fb1d06SElliott Hughes inline bool CheckHuffmanArrayLengths(size_t num_lit_len, 53*07fb1d06SElliott Hughes size_t num_distance, 54*07fb1d06SElliott Hughes size_t num_codes) { 55*07fb1d06SElliott Hughes if (num_lit_len > 286 || num_distance > 30 || num_codes > 19) { 56*07fb1d06SElliott Hughes LOG(ERROR) << "The lengths of the dynamic Huffman table are invalid: " 57*07fb1d06SElliott Hughes << "num_lit_len(" << num_lit_len << ") " 58*07fb1d06SElliott Hughes << "num_distance(" << num_distance << ") " 59*07fb1d06SElliott Hughes << "num_codes(" << num_codes << ")"; 60*07fb1d06SElliott Hughes return false; 61*07fb1d06SElliott Hughes } 62*07fb1d06SElliott Hughes return true; 63*07fb1d06SElliott Hughes } 64*07fb1d06SElliott Hughes 65*07fb1d06SElliott Hughes // Returns the maximum number of bits used in the current literal/length 66*07fb1d06SElliott Hughes // Huffman codes. LitLenMaxBits()67*07fb1d06SElliott Hughes inline size_t LitLenMaxBits() { return lit_len_max_bits_; } 68*07fb1d06SElliott Hughes 69*07fb1d06SElliott Hughes // Returns the maximum number of bits used in the current distance Huffman 70*07fb1d06SElliott Hughes // codes. DistanceMaxBits()71*07fb1d06SElliott Hughes inline size_t DistanceMaxBits() { return distance_max_bits_; } 72*07fb1d06SElliott Hughes 73*07fb1d06SElliott Hughes // Returns the alphabet associated with the set of input bits for the code 74*07fb1d06SElliott Hughes // length array. 75*07fb1d06SElliott Hughes // 76*07fb1d06SElliott Hughes // |bits| IN The input Huffman bits read from the deflate stream. 77*07fb1d06SElliott Hughes // |alphabet| OUT The alphabet associated with the given |bits|. 78*07fb1d06SElliott Hughes // |nbits| OUT The number of bits in the Huffman code of alphabet. 79*07fb1d06SElliott Hughes // Returns true if there is an alphabet associated with |bits|. CodeAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)80*07fb1d06SElliott Hughes inline bool CodeAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) { 81*07fb1d06SElliott Hughes auto hc = code_hcodes_[bits]; 82*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(hc & 0x8000); 83*07fb1d06SElliott Hughes *alphabet = hc & 0x7FFF; 84*07fb1d06SElliott Hughes *nbits = code_lens_[*alphabet]; 85*07fb1d06SElliott Hughes return true; 86*07fb1d06SElliott Hughes } 87*07fb1d06SElliott Hughes 88*07fb1d06SElliott Hughes // Returns the alphabet associated with the set of input bits for the 89*07fb1d06SElliott Hughes // literal/length code length array. 90*07fb1d06SElliott Hughes // 91*07fb1d06SElliott Hughes // |bits| IN The input Huffman bits read from the deflate stream. 92*07fb1d06SElliott Hughes // |alphabet| OUT The alphabet associated with the given |bits|. 93*07fb1d06SElliott Hughes // |nbits| OUT The number of bits in the Huffman code of the |alphabet|. 94*07fb1d06SElliott Hughes // Returns true if there is an alphabet associated with |bits|. LitLenAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)95*07fb1d06SElliott Hughes inline bool LitLenAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) { 96*07fb1d06SElliott Hughes auto hc = lit_len_hcodes_[bits]; 97*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(hc & 0x8000); 98*07fb1d06SElliott Hughes *alphabet = hc & 0x7FFF; 99*07fb1d06SElliott Hughes *nbits = lit_len_lens_[*alphabet]; 100*07fb1d06SElliott Hughes return true; 101*07fb1d06SElliott Hughes } 102*07fb1d06SElliott Hughes 103*07fb1d06SElliott Hughes // Returns the alphabet associated with the set of input bits for the 104*07fb1d06SElliott Hughes // distance code length array. 105*07fb1d06SElliott Hughes // 106*07fb1d06SElliott Hughes // |bits| IN The input Huffman bits read from the deflate stream. 107*07fb1d06SElliott Hughes // |alphabet| OUT The alphabet associated with the given |bits|. 108*07fb1d06SElliott Hughes // |nbits| OUT The number of bits in the Huffman code of the |alphabet|. 109*07fb1d06SElliott Hughes // Returns true if there is an alphabet associated with |bits|. DistanceAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)110*07fb1d06SElliott Hughes inline bool DistanceAlphabet(uint32_t bits, 111*07fb1d06SElliott Hughes uint16_t* alphabet, 112*07fb1d06SElliott Hughes size_t* nbits) { 113*07fb1d06SElliott Hughes auto hc = distance_hcodes_[bits]; 114*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(hc & 0x8000); 115*07fb1d06SElliott Hughes *alphabet = hc & 0x7FFF; 116*07fb1d06SElliott Hughes *nbits = distance_lens_[*alphabet]; 117*07fb1d06SElliott Hughes return true; 118*07fb1d06SElliott Hughes } 119*07fb1d06SElliott Hughes 120*07fb1d06SElliott Hughes // Returns the Huffman code of a give alphabet for Huffman table codes. 121*07fb1d06SElliott Hughes // 122*07fb1d06SElliott Hughes // |alphabet| IN The alphabet. 123*07fb1d06SElliott Hughes // |huffman| OUT The Huffman code for |alphabet|. 124*07fb1d06SElliott Hughes // |nbits| OUT The maximum number of bits in the Huffman code of the 125*07fb1d06SElliott Hughes // |alphabet|. CodeHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)126*07fb1d06SElliott Hughes inline bool CodeHuffman(uint16_t alphabet, uint16_t* huffman, size_t* nbits) { 127*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(alphabet < code_lens_.size()); 128*07fb1d06SElliott Hughes *huffman = code_rcodes_[alphabet]; 129*07fb1d06SElliott Hughes *nbits = code_lens_[alphabet]; 130*07fb1d06SElliott Hughes return true; 131*07fb1d06SElliott Hughes } 132*07fb1d06SElliott Hughes 133*07fb1d06SElliott Hughes // Returns the Huffman code of a give alphabet for literal/length codes. 134*07fb1d06SElliott Hughes // 135*07fb1d06SElliott Hughes // |alphabet| IN The alphabet. 136*07fb1d06SElliott Hughes // |huffman| OUT The Huffman code for |alphabet|. 137*07fb1d06SElliott Hughes // |nbits| OUT The maximum number of bits in the Huffman code of the 138*07fb1d06SElliott Hughes // |alphabet|. LitLenHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)139*07fb1d06SElliott Hughes inline bool LitLenHuffman(uint16_t alphabet, 140*07fb1d06SElliott Hughes uint16_t* huffman, 141*07fb1d06SElliott Hughes size_t* nbits) { 142*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(alphabet < lit_len_lens_.size()); 143*07fb1d06SElliott Hughes *huffman = lit_len_rcodes_[alphabet]; 144*07fb1d06SElliott Hughes *nbits = lit_len_lens_[alphabet]; 145*07fb1d06SElliott Hughes return true; 146*07fb1d06SElliott Hughes } 147*07fb1d06SElliott Hughes EndOfBlockBitLength(size_t * nbits)148*07fb1d06SElliott Hughes inline bool EndOfBlockBitLength(size_t* nbits) { 149*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(256 < lit_len_lens_.size()); 150*07fb1d06SElliott Hughes *nbits = lit_len_lens_[256]; 151*07fb1d06SElliott Hughes return true; 152*07fb1d06SElliott Hughes } 153*07fb1d06SElliott Hughes 154*07fb1d06SElliott Hughes // Returns the Huffman code of a give alphabet for distance codes. 155*07fb1d06SElliott Hughes // 156*07fb1d06SElliott Hughes // |alphabet| IN The alphabet. 157*07fb1d06SElliott Hughes // |huffman| OUT The Huffman code for |alphabet|. 158*07fb1d06SElliott Hughes // |nbits| OUT The maximum number of bits in the Huffman code of the 159*07fb1d06SElliott Hughes // |alphabet|. DistanceHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)160*07fb1d06SElliott Hughes inline bool DistanceHuffman(uint16_t alphabet, 161*07fb1d06SElliott Hughes uint16_t* huffman, 162*07fb1d06SElliott Hughes size_t* nbits) { 163*07fb1d06SElliott Hughes TEST_AND_RETURN_FALSE(alphabet < distance_lens_.size()); 164*07fb1d06SElliott Hughes *huffman = distance_rcodes_[alphabet]; 165*07fb1d06SElliott Hughes *nbits = distance_lens_[alphabet]; 166*07fb1d06SElliott Hughes return true; 167*07fb1d06SElliott Hughes } 168*07fb1d06SElliott Hughes 169*07fb1d06SElliott Hughes // This populates the object with fixed huffman table parameters. 170*07fb1d06SElliott Hughes // TODO(ahassani): Revamp the use of this function to be initiliazed once in 171*07fb1d06SElliott Hughes // the lifetime of the program and only one instance needed. 172*07fb1d06SElliott Hughes bool BuildFixedHuffmanTable(); 173*07fb1d06SElliott Hughes 174*07fb1d06SElliott Hughes // This functions first reads the Huffman code length arrays from the input 175*07fb1d06SElliott Hughes // deflate stream, then builds both literal/length and distance Huffman 176*07fb1d06SElliott Hughes // code arrays. It also writes the Huffman table into the puffed stream. 177*07fb1d06SElliott Hughes // 178*07fb1d06SElliott Hughes // |br| IN The |BitReaderInterface| reading the deflate stream. 179*07fb1d06SElliott Hughes // |buffer| OUT The object to write the Huffman table. 180*07fb1d06SElliott Hughes // |length| IN/OUT The length available in the |buffer| and in return it 181*07fb1d06SElliott Hughes // will be the length of Huffman table data written into 182*07fb1d06SElliott Hughes // the |buffer|. 183*07fb1d06SElliott Hughes bool BuildDynamicHuffmanTable(BitReaderInterface* br, 184*07fb1d06SElliott Hughes uint8_t* buffer, 185*07fb1d06SElliott Hughes size_t* length); 186*07fb1d06SElliott Hughes 187*07fb1d06SElliott Hughes // This functions first reads the Huffman code length arrays from the input 188*07fb1d06SElliott Hughes // puffed |buffer|, then builds both literal/length and distance Huffman code 189*07fb1d06SElliott Hughes // arrays. It also writes the coded Huffman table arrays into the deflate 190*07fb1d06SElliott Hughes // stream. 191*07fb1d06SElliott Hughes // 192*07fb1d06SElliott Hughes // |buffer| IN The array to read the Huffman table from. 193*07fb1d06SElliott Hughes // |length| IN The length available in the |buffer|. 194*07fb1d06SElliott Hughes // |bw| IN/OUT The |BitWriterInterface| for writing into the deflate 195*07fb1d06SElliott Hughes // stream. 196*07fb1d06SElliott Hughes bool BuildDynamicHuffmanTable(const uint8_t* buffer, 197*07fb1d06SElliott Hughes size_t length, 198*07fb1d06SElliott Hughes BitWriterInterface* bw); 199*07fb1d06SElliott Hughes 200*07fb1d06SElliott Hughes protected: 201*07fb1d06SElliott Hughes // Initializes the Huffman codes from an array of lengths. 202*07fb1d06SElliott Hughes // 203*07fb1d06SElliott Hughes // |lens| IN The input array of code lengths. 204*07fb1d06SElliott Hughes // |max_bits| OUT The maximum number of bits used for the Huffman codes. 205*07fb1d06SElliott Hughes bool InitHuffmanCodes(const Buffer& lens, size_t* max_bits); 206*07fb1d06SElliott Hughes 207*07fb1d06SElliott Hughes // Creates the Huffman code to alphabet array. 208*07fb1d06SElliott Hughes // |lens| IN The input array of code lengths. 209*07fb1d06SElliott Hughes // |hcodes| OUT The Huffman to alphabet array. 210*07fb1d06SElliott Hughes // |max_bits| OUT The maximum number of bits used for the Huffman codes. 211*07fb1d06SElliott Hughes bool BuildHuffmanCodes(const Buffer& lens, 212*07fb1d06SElliott Hughes std::vector<uint16_t>* hcodes, 213*07fb1d06SElliott Hughes size_t* max_bits); 214*07fb1d06SElliott Hughes 215*07fb1d06SElliott Hughes // Creates the alphabet to Huffman code array. 216*07fb1d06SElliott Hughes // |lens| IN The input array of code lengths. 217*07fb1d06SElliott Hughes // |rcodes| OUT The Huffman to Huffman array. 218*07fb1d06SElliott Hughes // |max_bits| OUT The maximum number of bits used for the Huffman codes. 219*07fb1d06SElliott Hughes bool BuildHuffmanReverseCodes(const Buffer& lens, 220*07fb1d06SElliott Hughes std::vector<uint16_t>* rcodes, 221*07fb1d06SElliott Hughes size_t* max_bits); 222*07fb1d06SElliott Hughes 223*07fb1d06SElliott Hughes // Reads a specific Huffman code length array from input. At the same time 224*07fb1d06SElliott Hughes // writes the array into the puffed stream. The Huffman code length array is 225*07fb1d06SElliott Hughes // either the literal/lengths or distance codes. 226*07fb1d06SElliott Hughes // 227*07fb1d06SElliott Hughes // |br| IN The |BitReaderInterface| for reading the deflate 228*07fb1d06SElliott Hughes // stream. 229*07fb1d06SElliott Hughes // |buffer| OUT The array to write the Huffman table. 230*07fb1d06SElliott Hughes // |length| IN/OUT The length available in the |buffer| and in return it 231*07fb1d06SElliott Hughes // will be the length of data written into the |buffer|. 232*07fb1d06SElliott Hughes // |max_bits| IN The maximum number of bits in the Huffman codes. 233*07fb1d06SElliott Hughes // |num_codes| IN The size of the Huffman code length array in the input. 234*07fb1d06SElliott Hughes // |lens| OUT The resulting Huffman code length array. 235*07fb1d06SElliott Hughes bool BuildHuffmanCodeLengths(BitReaderInterface* br, 236*07fb1d06SElliott Hughes uint8_t* buffer, 237*07fb1d06SElliott Hughes size_t* length, 238*07fb1d06SElliott Hughes size_t max_bits, 239*07fb1d06SElliott Hughes size_t num_codes, 240*07fb1d06SElliott Hughes Buffer* lens); 241*07fb1d06SElliott Hughes 242*07fb1d06SElliott Hughes // Similar to |BuildHuffmanCodeLengths| but for reading from puffed buffer and 243*07fb1d06SElliott Hughes // writing into deflate stream. 244*07fb1d06SElliott Hughes // 245*07fb1d06SElliott Hughes // |buffer| IN The array to read the Huffman table from. 246*07fb1d06SElliott Hughes // |length| IN The length available in the |buffer|. 247*07fb1d06SElliott Hughes // |bw| IN/OUT The |BitWriterInterface| for writing into the deflate 248*07fb1d06SElliott Hughes // stream. 249*07fb1d06SElliott Hughes // |num_codes| IN Number of Huffman code lengths to read from the 250*07fb1d06SElliott Hughes // |buffer|. 251*07fb1d06SElliott Hughes // |lens| OUT The Huffman code lengths array. 252*07fb1d06SElliott Hughes bool BuildHuffmanCodeLengths(const uint8_t* buffer, 253*07fb1d06SElliott Hughes size_t* length, 254*07fb1d06SElliott Hughes BitWriterInterface* bw, 255*07fb1d06SElliott Hughes size_t num_codes, 256*07fb1d06SElliott Hughes Buffer* lens); 257*07fb1d06SElliott Hughes 258*07fb1d06SElliott Hughes private: 259*07fb1d06SElliott Hughes // A utility struct used to create Huffman codes. 260*07fb1d06SElliott Hughes struct CodeIndexPair { 261*07fb1d06SElliott Hughes uint16_t code; // The Huffman code 262*07fb1d06SElliott Hughes uint16_t index; // The alphabet 263*07fb1d06SElliott Hughes }; 264*07fb1d06SElliott Hughes // A vector with maximum size of 286 that is only uses as temporary space for 265*07fb1d06SElliott Hughes // building Huffman codes. 266*07fb1d06SElliott Hughes std::vector<CodeIndexPair> codeindexpairs_; 267*07fb1d06SElliott Hughes 268*07fb1d06SElliott Hughes // Used in building Huffman codes for literals/lengths and distances. 269*07fb1d06SElliott Hughes std::vector<uint8_t> lit_len_lens_; 270*07fb1d06SElliott Hughes std::vector<uint16_t> lit_len_hcodes_; 271*07fb1d06SElliott Hughes std::vector<uint16_t> lit_len_rcodes_; 272*07fb1d06SElliott Hughes size_t lit_len_max_bits_; 273*07fb1d06SElliott Hughes std::vector<uint8_t> distance_lens_; 274*07fb1d06SElliott Hughes std::vector<uint16_t> distance_hcodes_; 275*07fb1d06SElliott Hughes std::vector<uint16_t> distance_rcodes_; 276*07fb1d06SElliott Hughes size_t distance_max_bits_; 277*07fb1d06SElliott Hughes 278*07fb1d06SElliott Hughes // The reason for keeping a temporary buffer here is to avoid reallocing each 279*07fb1d06SElliott Hughes // time. 280*07fb1d06SElliott Hughes std::vector<uint8_t> tmp_lens_; 281*07fb1d06SElliott Hughes 282*07fb1d06SElliott Hughes // Used in building Huffman codes for reading and decoding literal/length and 283*07fb1d06SElliott Hughes // distance Huffman code length arrays. 284*07fb1d06SElliott Hughes std::vector<uint8_t> code_lens_; 285*07fb1d06SElliott Hughes std::vector<uint16_t> code_hcodes_; 286*07fb1d06SElliott Hughes std::vector<uint16_t> code_rcodes_; 287*07fb1d06SElliott Hughes size_t code_max_bits_; 288*07fb1d06SElliott Hughes 289*07fb1d06SElliott Hughes bool initialized_; 290*07fb1d06SElliott Hughes 291*07fb1d06SElliott Hughes DISALLOW_COPY_AND_ASSIGN(HuffmanTable); 292*07fb1d06SElliott Hughes }; 293*07fb1d06SElliott Hughes 294*07fb1d06SElliott Hughes // The type of a block in a deflate stream. 295*07fb1d06SElliott Hughes enum class BlockType : uint8_t { 296*07fb1d06SElliott Hughes kUncompressed = 0x00, 297*07fb1d06SElliott Hughes kFixed = 0x01, 298*07fb1d06SElliott Hughes kDynamic = 0x02, 299*07fb1d06SElliott Hughes }; 300*07fb1d06SElliott Hughes 301*07fb1d06SElliott Hughes std::string BlockTypeToString(BlockType type); 302*07fb1d06SElliott Hughes 303*07fb1d06SElliott Hughes } // namespace puffin 304*07fb1d06SElliott Hughes 305*07fb1d06SElliott Hughes #endif // SRC_HUFFMAN_TABLE_H_ 306