1*10465441SEvalZero /* inftrees.c -- generate Huffman trees for efficient decoding
2*10465441SEvalZero * Copyright (C) 1995-2005 Mark Adler
3*10465441SEvalZero * For conditions of distribution and use, see copyright notice in zlib.h
4*10465441SEvalZero */
5*10465441SEvalZero
6*10465441SEvalZero #include "zutil.h"
7*10465441SEvalZero #include "inftrees.h"
8*10465441SEvalZero
9*10465441SEvalZero #define MAXBITS 15
10*10465441SEvalZero
11*10465441SEvalZero const char inflate_copyright[] =
12*10465441SEvalZero " inflate 1.2.3 Copyright 1995-2005 Mark Adler ";
13*10465441SEvalZero /*
14*10465441SEvalZero If you use the zlib library in a product, an acknowledgment is welcome
15*10465441SEvalZero in the documentation of your product. If for some reason you cannot
16*10465441SEvalZero include such an acknowledgment, I would appreciate that you keep this
17*10465441SEvalZero copyright string in the executable of your product.
18*10465441SEvalZero */
19*10465441SEvalZero
20*10465441SEvalZero /*
21*10465441SEvalZero Build a set of tables to decode the provided canonical Huffman code.
22*10465441SEvalZero The code lengths are lens[0..codes-1]. The result starts at *table,
23*10465441SEvalZero whose indices are 0..2^bits-1. work is a writable array of at least
24*10465441SEvalZero lens shorts, which is used as a work area. type is the type of code
25*10465441SEvalZero to be generated, CODES, LENS, or DISTS. On return, zero is success,
26*10465441SEvalZero -1 is an invalid code, and +1 means that ENOUGH isn't enough. table
27*10465441SEvalZero on return points to the next available entry's address. bits is the
28*10465441SEvalZero requested root table index bits, and on return it is the actual root
29*10465441SEvalZero table index bits. It will differ if the request is greater than the
30*10465441SEvalZero longest code or if it is less than the shortest code.
31*10465441SEvalZero */
inflate_table(type,lens,codes,table,bits,work)32*10465441SEvalZero int inflate_table(type, lens, codes, table, bits, work)
33*10465441SEvalZero codetype type;
34*10465441SEvalZero unsigned short FAR *lens;
35*10465441SEvalZero unsigned codes;
36*10465441SEvalZero code FAR * FAR *table;
37*10465441SEvalZero unsigned FAR *bits;
38*10465441SEvalZero unsigned short FAR *work;
39*10465441SEvalZero {
40*10465441SEvalZero unsigned len; /* a code's length in bits */
41*10465441SEvalZero unsigned sym; /* index of code symbols */
42*10465441SEvalZero unsigned min, max; /* minimum and maximum code lengths */
43*10465441SEvalZero unsigned root; /* number of index bits for root table */
44*10465441SEvalZero unsigned curr; /* number of index bits for current table */
45*10465441SEvalZero unsigned drop; /* code bits to drop for sub-table */
46*10465441SEvalZero int left; /* number of prefix codes available */
47*10465441SEvalZero unsigned used; /* code entries in table used */
48*10465441SEvalZero unsigned huff; /* Huffman code */
49*10465441SEvalZero unsigned incr; /* for incrementing code, index */
50*10465441SEvalZero unsigned fill; /* index for replicating entries */
51*10465441SEvalZero unsigned low; /* low bits for current root entry */
52*10465441SEvalZero unsigned mask; /* mask for low root bits */
53*10465441SEvalZero code this; /* table entry for duplication */
54*10465441SEvalZero code FAR *next; /* next available space in table */
55*10465441SEvalZero const unsigned short FAR *base; /* base value table to use */
56*10465441SEvalZero const unsigned short FAR *extra; /* extra bits table to use */
57*10465441SEvalZero int end; /* use base and extra for symbol > end */
58*10465441SEvalZero unsigned short count[MAXBITS+1]; /* number of codes of each length */
59*10465441SEvalZero unsigned short offs[MAXBITS+1]; /* offsets in table for each length */
60*10465441SEvalZero static const unsigned short lbase[31] = { /* Length codes 257..285 base */
61*10465441SEvalZero 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
62*10465441SEvalZero 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
63*10465441SEvalZero static const unsigned short lext[31] = { /* Length codes 257..285 extra */
64*10465441SEvalZero 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
65*10465441SEvalZero 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196};
66*10465441SEvalZero static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
67*10465441SEvalZero 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
68*10465441SEvalZero 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
69*10465441SEvalZero 8193, 12289, 16385, 24577, 0, 0};
70*10465441SEvalZero static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
71*10465441SEvalZero 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
72*10465441SEvalZero 23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
73*10465441SEvalZero 28, 28, 29, 29, 64, 64};
74*10465441SEvalZero
75*10465441SEvalZero /*
76*10465441SEvalZero Process a set of code lengths to create a canonical Huffman code. The
77*10465441SEvalZero code lengths are lens[0..codes-1]. Each length corresponds to the
78*10465441SEvalZero symbols 0..codes-1. The Huffman code is generated by first sorting the
79*10465441SEvalZero symbols by length from short to long, and retaining the symbol order
80*10465441SEvalZero for codes with equal lengths. Then the code starts with all zero bits
81*10465441SEvalZero for the first code of the shortest length, and the codes are integer
82*10465441SEvalZero increments for the same length, and zeros are appended as the length
83*10465441SEvalZero increases. For the deflate format, these bits are stored backwards
84*10465441SEvalZero from their more natural integer increment ordering, and so when the
85*10465441SEvalZero decoding tables are built in the large loop below, the integer codes
86*10465441SEvalZero are incremented backwards.
87*10465441SEvalZero
88*10465441SEvalZero This routine assumes, but does not check, that all of the entries in
89*10465441SEvalZero lens[] are in the range 0..MAXBITS. The caller must assure this.
90*10465441SEvalZero 1..MAXBITS is interpreted as that code length. zero means that that
91*10465441SEvalZero symbol does not occur in this code.
92*10465441SEvalZero
93*10465441SEvalZero The codes are sorted by computing a count of codes for each length,
94*10465441SEvalZero creating from that a table of starting indices for each length in the
95*10465441SEvalZero sorted table, and then entering the symbols in order in the sorted
96*10465441SEvalZero table. The sorted table is work[], with that space being provided by
97*10465441SEvalZero the caller.
98*10465441SEvalZero
99*10465441SEvalZero The length counts are used for other purposes as well, i.e. finding
100*10465441SEvalZero the minimum and maximum length codes, determining if there are any
101*10465441SEvalZero codes at all, checking for a valid set of lengths, and looking ahead
102*10465441SEvalZero at length counts to determine sub-table sizes when building the
103*10465441SEvalZero decoding tables.
104*10465441SEvalZero */
105*10465441SEvalZero
106*10465441SEvalZero /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
107*10465441SEvalZero for (len = 0; len <= MAXBITS; len++)
108*10465441SEvalZero count[len] = 0;
109*10465441SEvalZero for (sym = 0; sym < codes; sym++)
110*10465441SEvalZero count[lens[sym]]++;
111*10465441SEvalZero
112*10465441SEvalZero /* bound code lengths, force root to be within code lengths */
113*10465441SEvalZero root = *bits;
114*10465441SEvalZero for (max = MAXBITS; max >= 1; max--)
115*10465441SEvalZero if (count[max] != 0) break;
116*10465441SEvalZero if (root > max) root = max;
117*10465441SEvalZero if (max == 0) { /* no symbols to code at all */
118*10465441SEvalZero this.op = (unsigned char)64; /* invalid code marker */
119*10465441SEvalZero this.bits = (unsigned char)1;
120*10465441SEvalZero this.val = (unsigned short)0;
121*10465441SEvalZero *(*table)++ = this; /* make a table to force an error */
122*10465441SEvalZero *(*table)++ = this;
123*10465441SEvalZero *bits = 1;
124*10465441SEvalZero return 0; /* no symbols, but wait for decoding to report error */
125*10465441SEvalZero }
126*10465441SEvalZero for (min = 1; min <= MAXBITS; min++)
127*10465441SEvalZero if (count[min] != 0) break;
128*10465441SEvalZero if (root < min) root = min;
129*10465441SEvalZero
130*10465441SEvalZero /* check for an over-subscribed or incomplete set of lengths */
131*10465441SEvalZero left = 1;
132*10465441SEvalZero for (len = 1; len <= MAXBITS; len++) {
133*10465441SEvalZero left <<= 1;
134*10465441SEvalZero left -= count[len];
135*10465441SEvalZero if (left < 0) return -1; /* over-subscribed */
136*10465441SEvalZero }
137*10465441SEvalZero if (left > 0 && (type == CODES || max != 1))
138*10465441SEvalZero return -1; /* incomplete set */
139*10465441SEvalZero
140*10465441SEvalZero /* generate offsets into symbol table for each length for sorting */
141*10465441SEvalZero offs[1] = 0;
142*10465441SEvalZero for (len = 1; len < MAXBITS; len++)
143*10465441SEvalZero offs[len + 1] = offs[len] + count[len];
144*10465441SEvalZero
145*10465441SEvalZero /* sort symbols by length, by symbol order within each length */
146*10465441SEvalZero for (sym = 0; sym < codes; sym++)
147*10465441SEvalZero if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
148*10465441SEvalZero
149*10465441SEvalZero /*
150*10465441SEvalZero Create and fill in decoding tables. In this loop, the table being
151*10465441SEvalZero filled is at next and has curr index bits. The code being used is huff
152*10465441SEvalZero with length len. That code is converted to an index by dropping drop
153*10465441SEvalZero bits off of the bottom. For codes where len is less than drop + curr,
154*10465441SEvalZero those top drop + curr - len bits are incremented through all values to
155*10465441SEvalZero fill the table with replicated entries.
156*10465441SEvalZero
157*10465441SEvalZero root is the number of index bits for the root table. When len exceeds
158*10465441SEvalZero root, sub-tables are created pointed to by the root entry with an index
159*10465441SEvalZero of the low root bits of huff. This is saved in low to check for when a
160*10465441SEvalZero new sub-table should be started. drop is zero when the root table is
161*10465441SEvalZero being filled, and drop is root when sub-tables are being filled.
162*10465441SEvalZero
163*10465441SEvalZero When a new sub-table is needed, it is necessary to look ahead in the
164*10465441SEvalZero code lengths to determine what size sub-table is needed. The length
165*10465441SEvalZero counts are used for this, and so count[] is decremented as codes are
166*10465441SEvalZero entered in the tables.
167*10465441SEvalZero
168*10465441SEvalZero used keeps track of how many table entries have been allocated from the
169*10465441SEvalZero provided *table space. It is checked when a LENS table is being made
170*10465441SEvalZero against the space in *table, ENOUGH, minus the maximum space needed by
171*10465441SEvalZero the worst case distance code, MAXD. This should never happen, but the
172*10465441SEvalZero sufficiency of ENOUGH has not been proven exhaustively, hence the check.
173*10465441SEvalZero This assumes that when type == LENS, bits == 9.
174*10465441SEvalZero
175*10465441SEvalZero sym increments through all symbols, and the loop terminates when
176*10465441SEvalZero all codes of length max, i.e. all codes, have been processed. This
177*10465441SEvalZero routine permits incomplete codes, so another loop after this one fills
178*10465441SEvalZero in the rest of the decoding tables with invalid code markers.
179*10465441SEvalZero */
180*10465441SEvalZero
181*10465441SEvalZero /* set up for code type */
182*10465441SEvalZero switch (type) {
183*10465441SEvalZero case CODES:
184*10465441SEvalZero base = extra = work; /* dummy value--not used */
185*10465441SEvalZero end = 19;
186*10465441SEvalZero break;
187*10465441SEvalZero case LENS:
188*10465441SEvalZero base = lbase;
189*10465441SEvalZero base -= 257;
190*10465441SEvalZero extra = lext;
191*10465441SEvalZero extra -= 257;
192*10465441SEvalZero end = 256;
193*10465441SEvalZero break;
194*10465441SEvalZero default: /* DISTS */
195*10465441SEvalZero base = dbase;
196*10465441SEvalZero extra = dext;
197*10465441SEvalZero end = -1;
198*10465441SEvalZero }
199*10465441SEvalZero
200*10465441SEvalZero /* initialize state for loop */
201*10465441SEvalZero huff = 0; /* starting code */
202*10465441SEvalZero sym = 0; /* starting code symbol */
203*10465441SEvalZero len = min; /* starting code length */
204*10465441SEvalZero next = *table; /* current table to fill in */
205*10465441SEvalZero curr = root; /* current table index bits */
206*10465441SEvalZero drop = 0; /* current bits to drop from code for index */
207*10465441SEvalZero low = (unsigned)(-1); /* trigger new sub-table when len > root */
208*10465441SEvalZero used = 1U << root; /* use root table entries */
209*10465441SEvalZero mask = used - 1; /* mask for comparing low */
210*10465441SEvalZero
211*10465441SEvalZero /* check available table space */
212*10465441SEvalZero if (type == LENS && used >= ENOUGH - MAXD)
213*10465441SEvalZero return 1;
214*10465441SEvalZero
215*10465441SEvalZero /* process all codes and make table entries */
216*10465441SEvalZero for (;;) {
217*10465441SEvalZero /* create table entry */
218*10465441SEvalZero this.bits = (unsigned char)(len - drop);
219*10465441SEvalZero if ((int)(work[sym]) < end) {
220*10465441SEvalZero this.op = (unsigned char)0;
221*10465441SEvalZero this.val = work[sym];
222*10465441SEvalZero }
223*10465441SEvalZero else if ((int)(work[sym]) > end) {
224*10465441SEvalZero this.op = (unsigned char)(extra[work[sym]]);
225*10465441SEvalZero this.val = base[work[sym]];
226*10465441SEvalZero }
227*10465441SEvalZero else {
228*10465441SEvalZero this.op = (unsigned char)(32 + 64); /* end of block */
229*10465441SEvalZero this.val = 0;
230*10465441SEvalZero }
231*10465441SEvalZero
232*10465441SEvalZero /* replicate for those indices with low len bits equal to huff */
233*10465441SEvalZero incr = 1U << (len - drop);
234*10465441SEvalZero fill = 1U << curr;
235*10465441SEvalZero min = fill; /* save offset to next table */
236*10465441SEvalZero do {
237*10465441SEvalZero fill -= incr;
238*10465441SEvalZero next[(huff >> drop) + fill] = this;
239*10465441SEvalZero } while (fill != 0);
240*10465441SEvalZero
241*10465441SEvalZero /* backwards increment the len-bit code huff */
242*10465441SEvalZero incr = 1U << (len - 1);
243*10465441SEvalZero while (huff & incr)
244*10465441SEvalZero incr >>= 1;
245*10465441SEvalZero if (incr != 0) {
246*10465441SEvalZero huff &= incr - 1;
247*10465441SEvalZero huff += incr;
248*10465441SEvalZero }
249*10465441SEvalZero else
250*10465441SEvalZero huff = 0;
251*10465441SEvalZero
252*10465441SEvalZero /* go to next symbol, update count, len */
253*10465441SEvalZero sym++;
254*10465441SEvalZero if (--(count[len]) == 0) {
255*10465441SEvalZero if (len == max) break;
256*10465441SEvalZero len = lens[work[sym]];
257*10465441SEvalZero }
258*10465441SEvalZero
259*10465441SEvalZero /* create new sub-table if needed */
260*10465441SEvalZero if (len > root && (huff & mask) != low) {
261*10465441SEvalZero /* if first time, transition to sub-tables */
262*10465441SEvalZero if (drop == 0)
263*10465441SEvalZero drop = root;
264*10465441SEvalZero
265*10465441SEvalZero /* increment past last table */
266*10465441SEvalZero next += min; /* here min is 1 << curr */
267*10465441SEvalZero
268*10465441SEvalZero /* determine length of next table */
269*10465441SEvalZero curr = len - drop;
270*10465441SEvalZero left = (int)(1 << curr);
271*10465441SEvalZero while (curr + drop < max) {
272*10465441SEvalZero left -= count[curr + drop];
273*10465441SEvalZero if (left <= 0) break;
274*10465441SEvalZero curr++;
275*10465441SEvalZero left <<= 1;
276*10465441SEvalZero }
277*10465441SEvalZero
278*10465441SEvalZero /* check for enough space */
279*10465441SEvalZero used += 1U << curr;
280*10465441SEvalZero if (type == LENS && used >= ENOUGH - MAXD)
281*10465441SEvalZero return 1;
282*10465441SEvalZero
283*10465441SEvalZero /* point entry in root table to sub-table */
284*10465441SEvalZero low = huff & mask;
285*10465441SEvalZero (*table)[low].op = (unsigned char)curr;
286*10465441SEvalZero (*table)[low].bits = (unsigned char)root;
287*10465441SEvalZero (*table)[low].val = (unsigned short)(next - *table);
288*10465441SEvalZero }
289*10465441SEvalZero }
290*10465441SEvalZero
291*10465441SEvalZero /*
292*10465441SEvalZero Fill in rest of table for incomplete codes. This loop is similar to the
293*10465441SEvalZero loop above in incrementing huff for table indices. It is assumed that
294*10465441SEvalZero len is equal to curr + drop, so there is no loop needed to increment
295*10465441SEvalZero through high index bits. When the current sub-table is filled, the loop
296*10465441SEvalZero drops back to the root table to fill in any remaining entries there.
297*10465441SEvalZero */
298*10465441SEvalZero this.op = (unsigned char)64; /* invalid code marker */
299*10465441SEvalZero this.bits = (unsigned char)(len - drop);
300*10465441SEvalZero this.val = (unsigned short)0;
301*10465441SEvalZero while (huff != 0) {
302*10465441SEvalZero /* when done with sub-table, drop back to root table */
303*10465441SEvalZero if (drop != 0 && (huff & mask) != low) {
304*10465441SEvalZero drop = 0;
305*10465441SEvalZero len = root;
306*10465441SEvalZero next = *table;
307*10465441SEvalZero this.bits = (unsigned char)len;
308*10465441SEvalZero }
309*10465441SEvalZero
310*10465441SEvalZero /* put invalid code marker in table */
311*10465441SEvalZero next[huff >> drop] = this;
312*10465441SEvalZero
313*10465441SEvalZero /* backwards increment the len-bit code huff */
314*10465441SEvalZero incr = 1U << (len - 1);
315*10465441SEvalZero while (huff & incr)
316*10465441SEvalZero incr >>= 1;
317*10465441SEvalZero if (incr != 0) {
318*10465441SEvalZero huff &= incr - 1;
319*10465441SEvalZero huff += incr;
320*10465441SEvalZero }
321*10465441SEvalZero else
322*10465441SEvalZero huff = 0;
323*10465441SEvalZero }
324*10465441SEvalZero
325*10465441SEvalZero /* set return parameters */
326*10465441SEvalZero *table += used;
327*10465441SEvalZero *bits = root;
328*10465441SEvalZero return 0;
329*10465441SEvalZero }
330