1*10465441SEvalZero /* infblock.c -- interpret and process block types to last block
2*10465441SEvalZero * Copyright (C) 1995-2002 Mark Adler
3*10465441SEvalZero * For conditions of distribution and use, see copyright notice in zlib.h
4*10465441SEvalZero */
5*10465441SEvalZero
6*10465441SEvalZero #include "zutil.h"
7*10465441SEvalZero #include "infblock.h"
8*10465441SEvalZero #include "inftrees.h"
9*10465441SEvalZero //#include "infcodes.h" //no such file!
10*10465441SEvalZero #include "infutil.h"
11*10465441SEvalZero
12*10465441SEvalZero struct inflate_codes_state {int dummy;}; /* for buggy compilers */
13*10465441SEvalZero
14*10465441SEvalZero /* simplify the use of the inflate_huft type with some defines */
15*10465441SEvalZero #define exop word.what.Exop
16*10465441SEvalZero #define bits word.what.Bits
17*10465441SEvalZero
18*10465441SEvalZero /* Table for deflate from PKZIP's appnote.txt. */
19*10465441SEvalZero local const uInt border[] = { /* Order of the bit length code lengths */
20*10465441SEvalZero 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
21*10465441SEvalZero
22*10465441SEvalZero /*
23*10465441SEvalZero Notes beyond the 1.93a appnote.txt:
24*10465441SEvalZero
25*10465441SEvalZero 1. Distance pointers never point before the beginning of the output
26*10465441SEvalZero stream.
27*10465441SEvalZero 2. Distance pointers can point back across blocks, up to 32k away.
28*10465441SEvalZero 3. There is an implied maximum of 7 bits for the bit length table and
29*10465441SEvalZero 15 bits for the actual data.
30*10465441SEvalZero 4. If only one code exists, then it is encoded using one bit. (Zero
31*10465441SEvalZero would be more efficient, but perhaps a little confusing.) If two
32*10465441SEvalZero codes exist, they are coded using one bit each (0 and 1).
33*10465441SEvalZero 5. There is no way of sending zero distance codes--a dummy must be
34*10465441SEvalZero sent if there are none. (History: a pre 2.0 version of PKZIP would
35*10465441SEvalZero store blocks with no distance codes, but this was discovered to be
36*10465441SEvalZero too harsh a criterion.) Valid only for 1.93a. 2.04c does allow
37*10465441SEvalZero zero distance codes, which is sent as one code of zero bits in
38*10465441SEvalZero length.
39*10465441SEvalZero 6. There are up to 286 literal/length codes. Code 256 represents the
40*10465441SEvalZero end-of-block. Note however that the static length tree defines
41*10465441SEvalZero 288 codes just to fill out the Huffman codes. Codes 286 and 287
42*10465441SEvalZero cannot be used though, since there is no length base or extra bits
43*10465441SEvalZero defined for them. Similarily, there are up to 30 distance codes.
44*10465441SEvalZero However, static trees define 32 codes (all 5 bits) to fill out the
45*10465441SEvalZero Huffman codes, but the last two had better not show up in the data.
46*10465441SEvalZero 7. Unzip can check dynamic Huffman blocks for complete code sets.
47*10465441SEvalZero The exception is that a single code would not be complete (see #4).
48*10465441SEvalZero 8. The five bits following the block type is really the number of
49*10465441SEvalZero literal codes sent minus 257.
50*10465441SEvalZero 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
51*10465441SEvalZero (1+6+6). Therefore, to output three times the length, you output
52*10465441SEvalZero three codes (1+1+1), whereas to output four times the same length,
53*10465441SEvalZero you only need two codes (1+3). Hmm.
54*10465441SEvalZero 10. In the tree reconstruction algorithm, Code = Code + Increment
55*10465441SEvalZero only if BitLength(i) is not zero. (Pretty obvious.)
56*10465441SEvalZero 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19)
57*10465441SEvalZero 12. Note: length code 284 can represent 227-258, but length code 285
58*10465441SEvalZero really is 258. The last length deserves its own, short code
59*10465441SEvalZero since it gets used a lot in very redundant files. The length
60*10465441SEvalZero 258 is special since 258 - 3 (the min match length) is 255.
61*10465441SEvalZero 13. The literal/length and distance code bit lengths are read as a
62*10465441SEvalZero single stream of lengths. It is possible (and advantageous) for
63*10465441SEvalZero a repeat code (16, 17, or 18) to go across the boundary between
64*10465441SEvalZero the two sets of lengths.
65*10465441SEvalZero */
66*10465441SEvalZero
67*10465441SEvalZero
inflate_blocks_reset(s,z,c)68*10465441SEvalZero void inflate_blocks_reset(s, z, c)
69*10465441SEvalZero inflate_blocks_statef *s;
70*10465441SEvalZero z_streamp z;
71*10465441SEvalZero uLongf *c;
72*10465441SEvalZero {
73*10465441SEvalZero if (c != Z_NULL)
74*10465441SEvalZero *c = s->check;
75*10465441SEvalZero if (s->mode == BTREE || s->mode == DTREE)
76*10465441SEvalZero ZFREE(z, s->sub.trees.blens);
77*10465441SEvalZero if (s->mode == CODES)
78*10465441SEvalZero inflate_codes_free(s->sub.decode.codes, z);
79*10465441SEvalZero s->mode = TYPE;
80*10465441SEvalZero s->bitk = 0;
81*10465441SEvalZero s->bitb = 0;
82*10465441SEvalZero s->read = s->write = s->window;
83*10465441SEvalZero if (s->checkfn != Z_NULL)
84*10465441SEvalZero z->adler = s->check = (*s->checkfn)(0L, (const Bytef *)Z_NULL, 0);
85*10465441SEvalZero Tracev((stderr, "inflate: blocks reset\n"));
86*10465441SEvalZero }
87*10465441SEvalZero
88*10465441SEvalZero
inflate_blocks_new(z,c,w)89*10465441SEvalZero inflate_blocks_statef *inflate_blocks_new(z, c, w)
90*10465441SEvalZero z_streamp z;
91*10465441SEvalZero check_func c;
92*10465441SEvalZero uInt w;
93*10465441SEvalZero {
94*10465441SEvalZero inflate_blocks_statef *s;
95*10465441SEvalZero
96*10465441SEvalZero if ((s = (inflate_blocks_statef *)ZALLOC
97*10465441SEvalZero (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL)
98*10465441SEvalZero return s;
99*10465441SEvalZero if ((s->hufts =
100*10465441SEvalZero (inflate_huft *)ZALLOC(z, sizeof(inflate_huft), MANY)) == Z_NULL)
101*10465441SEvalZero {
102*10465441SEvalZero ZFREE(z, s);
103*10465441SEvalZero return Z_NULL;
104*10465441SEvalZero }
105*10465441SEvalZero if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL)
106*10465441SEvalZero {
107*10465441SEvalZero ZFREE(z, s->hufts);
108*10465441SEvalZero ZFREE(z, s);
109*10465441SEvalZero return Z_NULL;
110*10465441SEvalZero }
111*10465441SEvalZero s->end = s->window + w;
112*10465441SEvalZero s->checkfn = c;
113*10465441SEvalZero s->mode = TYPE;
114*10465441SEvalZero Tracev((stderr, "inflate: blocks allocated\n"));
115*10465441SEvalZero inflate_blocks_reset(s, z, Z_NULL);
116*10465441SEvalZero return s;
117*10465441SEvalZero }
118*10465441SEvalZero
119*10465441SEvalZero
inflate_blocks(s,z,r)120*10465441SEvalZero int inflate_blocks(s, z, r)
121*10465441SEvalZero inflate_blocks_statef *s;
122*10465441SEvalZero z_streamp z;
123*10465441SEvalZero int r;
124*10465441SEvalZero {
125*10465441SEvalZero uInt t; /* temporary storage */
126*10465441SEvalZero uLong b; /* bit buffer */
127*10465441SEvalZero uInt k; /* bits in bit buffer */
128*10465441SEvalZero Bytef *p; /* input data pointer */
129*10465441SEvalZero uInt n; /* bytes available there */
130*10465441SEvalZero Bytef *q; /* output window write pointer */
131*10465441SEvalZero uInt m; /* bytes to end of window or read pointer */
132*10465441SEvalZero
133*10465441SEvalZero /* copy input/output information to locals (UPDATE macro restores) */
134*10465441SEvalZero LOAD
135*10465441SEvalZero
136*10465441SEvalZero /* process input based on current state */
137*10465441SEvalZero while (1) switch (s->mode)
138*10465441SEvalZero {
139*10465441SEvalZero case TYPE:
140*10465441SEvalZero NEEDBITS(3)
141*10465441SEvalZero t = (uInt)b & 7;
142*10465441SEvalZero s->last = t & 1;
143*10465441SEvalZero switch (t >> 1)
144*10465441SEvalZero {
145*10465441SEvalZero case 0: /* stored */
146*10465441SEvalZero Tracev((stderr, "inflate: stored block%s\n",
147*10465441SEvalZero s->last ? " (last)" : ""));
148*10465441SEvalZero DUMPBITS(3)
149*10465441SEvalZero t = k & 7; /* go to byte boundary */
150*10465441SEvalZero DUMPBITS(t)
151*10465441SEvalZero s->mode = LENS; /* get length of stored block */
152*10465441SEvalZero break;
153*10465441SEvalZero case 1: /* fixed */
154*10465441SEvalZero Tracev((stderr, "inflate: fixed codes block%s\n",
155*10465441SEvalZero s->last ? " (last)" : ""));
156*10465441SEvalZero {
157*10465441SEvalZero uInt bl, bd;
158*10465441SEvalZero inflate_huft *tl, *td;
159*10465441SEvalZero
160*10465441SEvalZero inflate_trees_fixed(&bl, &bd, &tl, &td, z);
161*10465441SEvalZero s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z);
162*10465441SEvalZero if (s->sub.decode.codes == Z_NULL)
163*10465441SEvalZero {
164*10465441SEvalZero r = Z_MEM_ERROR;
165*10465441SEvalZero LEAVE
166*10465441SEvalZero }
167*10465441SEvalZero }
168*10465441SEvalZero DUMPBITS(3)
169*10465441SEvalZero s->mode = CODES;
170*10465441SEvalZero break;
171*10465441SEvalZero case 2: /* dynamic */
172*10465441SEvalZero Tracev((stderr, "inflate: dynamic codes block%s\n",
173*10465441SEvalZero s->last ? " (last)" : ""));
174*10465441SEvalZero DUMPBITS(3)
175*10465441SEvalZero s->mode = TABLE;
176*10465441SEvalZero break;
177*10465441SEvalZero case 3: /* illegal */
178*10465441SEvalZero DUMPBITS(3)
179*10465441SEvalZero s->mode = BAD;
180*10465441SEvalZero z->msg = (char*)"invalid block type";
181*10465441SEvalZero r = Z_DATA_ERROR;
182*10465441SEvalZero LEAVE
183*10465441SEvalZero }
184*10465441SEvalZero break;
185*10465441SEvalZero case LENS:
186*10465441SEvalZero NEEDBITS(32)
187*10465441SEvalZero if ((((~b) >> 16) & 0xffff) != (b & 0xffff))
188*10465441SEvalZero {
189*10465441SEvalZero s->mode = BAD;
190*10465441SEvalZero z->msg = (char*)"invalid stored block lengths";
191*10465441SEvalZero r = Z_DATA_ERROR;
192*10465441SEvalZero LEAVE
193*10465441SEvalZero }
194*10465441SEvalZero s->sub.left = (uInt)b & 0xffff;
195*10465441SEvalZero b = k = 0; /* dump bits */
196*10465441SEvalZero Tracev((stderr, "inflate: stored length %u\n", s->sub.left));
197*10465441SEvalZero s->mode = s->sub.left ? STORED : (s->last ? DRY : TYPE);
198*10465441SEvalZero break;
199*10465441SEvalZero case STORED:
200*10465441SEvalZero if (n == 0)
201*10465441SEvalZero LEAVE
202*10465441SEvalZero NEEDOUT
203*10465441SEvalZero t = s->sub.left;
204*10465441SEvalZero if (t > n) t = n;
205*10465441SEvalZero if (t > m) t = m;
206*10465441SEvalZero zmemcpy(q, p, t);
207*10465441SEvalZero p += t; n -= t;
208*10465441SEvalZero q += t; m -= t;
209*10465441SEvalZero if ((s->sub.left -= t) != 0)
210*10465441SEvalZero break;
211*10465441SEvalZero Tracev((stderr, "inflate: stored end, %lu total out\n",
212*10465441SEvalZero z->total_out + (q >= s->read ? q - s->read :
213*10465441SEvalZero (s->end - s->read) + (q - s->window))));
214*10465441SEvalZero s->mode = s->last ? DRY : TYPE;
215*10465441SEvalZero break;
216*10465441SEvalZero case TABLE:
217*10465441SEvalZero NEEDBITS(14)
218*10465441SEvalZero s->sub.trees.table = t = (uInt)b & 0x3fff;
219*10465441SEvalZero #ifndef PKZIP_BUG_WORKAROUND
220*10465441SEvalZero if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29)
221*10465441SEvalZero {
222*10465441SEvalZero s->mode = BAD;
223*10465441SEvalZero z->msg = (char*)"too many length or distance symbols";
224*10465441SEvalZero r = Z_DATA_ERROR;
225*10465441SEvalZero LEAVE
226*10465441SEvalZero }
227*10465441SEvalZero #endif
228*10465441SEvalZero t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f);
229*10465441SEvalZero if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL)
230*10465441SEvalZero {
231*10465441SEvalZero r = Z_MEM_ERROR;
232*10465441SEvalZero LEAVE
233*10465441SEvalZero }
234*10465441SEvalZero DUMPBITS(14)
235*10465441SEvalZero s->sub.trees.index = 0;
236*10465441SEvalZero Tracev((stderr, "inflate: table sizes ok\n"));
237*10465441SEvalZero s->mode = BTREE;
238*10465441SEvalZero case BTREE:
239*10465441SEvalZero while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10))
240*10465441SEvalZero {
241*10465441SEvalZero NEEDBITS(3)
242*10465441SEvalZero s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7;
243*10465441SEvalZero DUMPBITS(3)
244*10465441SEvalZero }
245*10465441SEvalZero while (s->sub.trees.index < 19)
246*10465441SEvalZero s->sub.trees.blens[border[s->sub.trees.index++]] = 0;
247*10465441SEvalZero s->sub.trees.bb = 7;
248*10465441SEvalZero t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb,
249*10465441SEvalZero &s->sub.trees.tb, s->hufts, z);
250*10465441SEvalZero if (t != Z_OK)
251*10465441SEvalZero {
252*10465441SEvalZero r = t;
253*10465441SEvalZero if (r == Z_DATA_ERROR)
254*10465441SEvalZero {
255*10465441SEvalZero ZFREE(z, s->sub.trees.blens);
256*10465441SEvalZero s->mode = BAD;
257*10465441SEvalZero }
258*10465441SEvalZero LEAVE
259*10465441SEvalZero }
260*10465441SEvalZero s->sub.trees.index = 0;
261*10465441SEvalZero Tracev((stderr, "inflate: bits tree ok\n"));
262*10465441SEvalZero s->mode = DTREE;
263*10465441SEvalZero case DTREE:
264*10465441SEvalZero while (t = s->sub.trees.table,
265*10465441SEvalZero s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f))
266*10465441SEvalZero {
267*10465441SEvalZero inflate_huft *h;
268*10465441SEvalZero uInt i, j, c;
269*10465441SEvalZero
270*10465441SEvalZero t = s->sub.trees.bb;
271*10465441SEvalZero NEEDBITS(t)
272*10465441SEvalZero h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]);
273*10465441SEvalZero t = h->bits;
274*10465441SEvalZero c = h->base;
275*10465441SEvalZero if (c < 16)
276*10465441SEvalZero {
277*10465441SEvalZero DUMPBITS(t)
278*10465441SEvalZero s->sub.trees.blens[s->sub.trees.index++] = c;
279*10465441SEvalZero }
280*10465441SEvalZero else /* c == 16..18 */
281*10465441SEvalZero {
282*10465441SEvalZero i = c == 18 ? 7 : c - 14;
283*10465441SEvalZero j = c == 18 ? 11 : 3;
284*10465441SEvalZero NEEDBITS(t + i)
285*10465441SEvalZero DUMPBITS(t)
286*10465441SEvalZero j += (uInt)b & inflate_mask[i];
287*10465441SEvalZero DUMPBITS(i)
288*10465441SEvalZero i = s->sub.trees.index;
289*10465441SEvalZero t = s->sub.trees.table;
290*10465441SEvalZero if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) ||
291*10465441SEvalZero (c == 16 && i < 1))
292*10465441SEvalZero {
293*10465441SEvalZero ZFREE(z, s->sub.trees.blens);
294*10465441SEvalZero s->mode = BAD;
295*10465441SEvalZero z->msg = (char*)"invalid bit length repeat";
296*10465441SEvalZero r = Z_DATA_ERROR;
297*10465441SEvalZero LEAVE
298*10465441SEvalZero }
299*10465441SEvalZero c = c == 16 ? s->sub.trees.blens[i - 1] : 0;
300*10465441SEvalZero do {
301*10465441SEvalZero s->sub.trees.blens[i++] = c;
302*10465441SEvalZero } while (--j);
303*10465441SEvalZero s->sub.trees.index = i;
304*10465441SEvalZero }
305*10465441SEvalZero }
306*10465441SEvalZero s->sub.trees.tb = Z_NULL;
307*10465441SEvalZero {
308*10465441SEvalZero uInt bl, bd;
309*10465441SEvalZero inflate_huft *tl, *td;
310*10465441SEvalZero inflate_codes_statef *c;
311*10465441SEvalZero
312*10465441SEvalZero bl = 9; /* must be <= 9 for lookahead assumptions */
313*10465441SEvalZero bd = 6; /* must be <= 9 for lookahead assumptions */
314*10465441SEvalZero t = s->sub.trees.table;
315*10465441SEvalZero t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f),
316*10465441SEvalZero s->sub.trees.blens, &bl, &bd, &tl, &td,
317*10465441SEvalZero s->hufts, z);
318*10465441SEvalZero if (t != Z_OK)
319*10465441SEvalZero {
320*10465441SEvalZero if (t == (uInt)Z_DATA_ERROR)
321*10465441SEvalZero {
322*10465441SEvalZero ZFREE(z, s->sub.trees.blens);
323*10465441SEvalZero s->mode = BAD;
324*10465441SEvalZero }
325*10465441SEvalZero r = t;
326*10465441SEvalZero LEAVE
327*10465441SEvalZero }
328*10465441SEvalZero Tracev((stderr, "inflate: trees ok\n"));
329*10465441SEvalZero if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL)
330*10465441SEvalZero {
331*10465441SEvalZero r = Z_MEM_ERROR;
332*10465441SEvalZero LEAVE
333*10465441SEvalZero }
334*10465441SEvalZero s->sub.decode.codes = c;
335*10465441SEvalZero }
336*10465441SEvalZero ZFREE(z, s->sub.trees.blens);
337*10465441SEvalZero s->mode = CODES;
338*10465441SEvalZero case CODES:
339*10465441SEvalZero UPDATE
340*10465441SEvalZero if ((r = inflate_codes(s, z, r)) != Z_STREAM_END)
341*10465441SEvalZero return inflate_flush(s, z, r);
342*10465441SEvalZero r = Z_OK;
343*10465441SEvalZero inflate_codes_free(s->sub.decode.codes, z);
344*10465441SEvalZero LOAD
345*10465441SEvalZero Tracev((stderr, "inflate: codes end, %lu total out\n",
346*10465441SEvalZero z->total_out + (q >= s->read ? q - s->read :
347*10465441SEvalZero (s->end - s->read) + (q - s->window))));
348*10465441SEvalZero if (!s->last)
349*10465441SEvalZero {
350*10465441SEvalZero s->mode = TYPE;
351*10465441SEvalZero break;
352*10465441SEvalZero }
353*10465441SEvalZero s->mode = DRY;
354*10465441SEvalZero case DRY:
355*10465441SEvalZero FLUSH
356*10465441SEvalZero if (s->read != s->write)
357*10465441SEvalZero LEAVE
358*10465441SEvalZero s->mode = DONE;
359*10465441SEvalZero case DONE:
360*10465441SEvalZero r = Z_STREAM_END;
361*10465441SEvalZero LEAVE
362*10465441SEvalZero case BAD:
363*10465441SEvalZero r = Z_DATA_ERROR;
364*10465441SEvalZero LEAVE
365*10465441SEvalZero default:
366*10465441SEvalZero r = Z_STREAM_ERROR;
367*10465441SEvalZero LEAVE
368*10465441SEvalZero }
369*10465441SEvalZero }
370*10465441SEvalZero
371*10465441SEvalZero
inflate_blocks_free(s,z)372*10465441SEvalZero int inflate_blocks_free(s, z)
373*10465441SEvalZero inflate_blocks_statef *s;
374*10465441SEvalZero z_streamp z;
375*10465441SEvalZero {
376*10465441SEvalZero inflate_blocks_reset(s, z, Z_NULL);
377*10465441SEvalZero ZFREE(z, s->window);
378*10465441SEvalZero ZFREE(z, s->hufts);
379*10465441SEvalZero ZFREE(z, s);
380*10465441SEvalZero Tracev((stderr, "inflate: blocks freed\n"));
381*10465441SEvalZero return Z_OK;
382*10465441SEvalZero }
383*10465441SEvalZero
384*10465441SEvalZero
inflate_set_dictionary(s,d,n)385*10465441SEvalZero void inflate_set_dictionary(s, d, n)
386*10465441SEvalZero inflate_blocks_statef *s;
387*10465441SEvalZero const Bytef *d;
388*10465441SEvalZero uInt n;
389*10465441SEvalZero {
390*10465441SEvalZero zmemcpy(s->window, d, n);
391*10465441SEvalZero s->read = s->write = s->window + n;
392*10465441SEvalZero }
393*10465441SEvalZero
394*10465441SEvalZero
395*10465441SEvalZero /* Returns true if inflate is currently at the end of a block generated
396*10465441SEvalZero * by Z_SYNC_FLUSH or Z_FULL_FLUSH.
397*10465441SEvalZero * IN assertion: s != Z_NULL
398*10465441SEvalZero */
inflate_blocks_sync_point(s)399*10465441SEvalZero int inflate_blocks_sync_point(s)
400*10465441SEvalZero inflate_blocks_statef *s;
401*10465441SEvalZero {
402*10465441SEvalZero return s->mode == LENS;
403*10465441SEvalZero }
404