1*0ac9a9daSXin Li
2*0ac9a9daSXin Li /*-------------------------------------------------------------*/
3*0ac9a9daSXin Li /*--- Huffman coding low-level stuff ---*/
4*0ac9a9daSXin Li /*--- huffman.c ---*/
5*0ac9a9daSXin Li /*-------------------------------------------------------------*/
6*0ac9a9daSXin Li
7*0ac9a9daSXin Li /* ------------------------------------------------------------------
8*0ac9a9daSXin Li This file is part of bzip2/libbzip2, a program and library for
9*0ac9a9daSXin Li lossless, block-sorting data compression.
10*0ac9a9daSXin Li
11*0ac9a9daSXin Li bzip2/libbzip2 version 1.0.8 of 13 July 2019
12*0ac9a9daSXin Li Copyright (C) 1996-2019 Julian Seward <[email protected]>
13*0ac9a9daSXin Li
14*0ac9a9daSXin Li Please read the WARNING, DISCLAIMER and PATENTS sections in the
15*0ac9a9daSXin Li README file.
16*0ac9a9daSXin Li
17*0ac9a9daSXin Li This program is released under the terms of the license contained
18*0ac9a9daSXin Li in the file LICENSE.
19*0ac9a9daSXin Li ------------------------------------------------------------------ */
20*0ac9a9daSXin Li
21*0ac9a9daSXin Li
22*0ac9a9daSXin Li #include "bzlib_private.h"
23*0ac9a9daSXin Li
24*0ac9a9daSXin Li /*---------------------------------------------------*/
25*0ac9a9daSXin Li #define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
26*0ac9a9daSXin Li #define DEPTHOF(zz1) ((zz1) & 0x000000ff)
27*0ac9a9daSXin Li #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
28*0ac9a9daSXin Li
29*0ac9a9daSXin Li #define ADDWEIGHTS(zw1,zw2) \
30*0ac9a9daSXin Li (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
31*0ac9a9daSXin Li (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
32*0ac9a9daSXin Li
33*0ac9a9daSXin Li #define UPHEAP(z) \
34*0ac9a9daSXin Li { \
35*0ac9a9daSXin Li Int32 zz, tmp; \
36*0ac9a9daSXin Li zz = z; tmp = heap[zz]; \
37*0ac9a9daSXin Li while (weight[tmp] < weight[heap[zz >> 1]]) { \
38*0ac9a9daSXin Li heap[zz] = heap[zz >> 1]; \
39*0ac9a9daSXin Li zz >>= 1; \
40*0ac9a9daSXin Li } \
41*0ac9a9daSXin Li heap[zz] = tmp; \
42*0ac9a9daSXin Li }
43*0ac9a9daSXin Li
44*0ac9a9daSXin Li #define DOWNHEAP(z) \
45*0ac9a9daSXin Li { \
46*0ac9a9daSXin Li Int32 zz, yy, tmp; \
47*0ac9a9daSXin Li zz = z; tmp = heap[zz]; \
48*0ac9a9daSXin Li while (True) { \
49*0ac9a9daSXin Li yy = zz << 1; \
50*0ac9a9daSXin Li if (yy > nHeap) break; \
51*0ac9a9daSXin Li if (yy < nHeap && \
52*0ac9a9daSXin Li weight[heap[yy+1]] < weight[heap[yy]]) \
53*0ac9a9daSXin Li yy++; \
54*0ac9a9daSXin Li if (weight[tmp] < weight[heap[yy]]) break; \
55*0ac9a9daSXin Li heap[zz] = heap[yy]; \
56*0ac9a9daSXin Li zz = yy; \
57*0ac9a9daSXin Li } \
58*0ac9a9daSXin Li heap[zz] = tmp; \
59*0ac9a9daSXin Li }
60*0ac9a9daSXin Li
61*0ac9a9daSXin Li
62*0ac9a9daSXin Li /*---------------------------------------------------*/
BZ2_hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)63*0ac9a9daSXin Li void BZ2_hbMakeCodeLengths ( UChar *len,
64*0ac9a9daSXin Li Int32 *freq,
65*0ac9a9daSXin Li Int32 alphaSize,
66*0ac9a9daSXin Li Int32 maxLen )
67*0ac9a9daSXin Li {
68*0ac9a9daSXin Li /*--
69*0ac9a9daSXin Li Nodes and heap entries run from 1. Entry 0
70*0ac9a9daSXin Li for both the heap and nodes is a sentinel.
71*0ac9a9daSXin Li --*/
72*0ac9a9daSXin Li Int32 nNodes, nHeap, n1, n2, i, j, k;
73*0ac9a9daSXin Li Bool tooLong;
74*0ac9a9daSXin Li
75*0ac9a9daSXin Li Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ];
76*0ac9a9daSXin Li Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
77*0ac9a9daSXin Li Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
78*0ac9a9daSXin Li
79*0ac9a9daSXin Li for (i = 0; i < alphaSize; i++)
80*0ac9a9daSXin Li weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
81*0ac9a9daSXin Li
82*0ac9a9daSXin Li while (True) {
83*0ac9a9daSXin Li
84*0ac9a9daSXin Li nNodes = alphaSize;
85*0ac9a9daSXin Li nHeap = 0;
86*0ac9a9daSXin Li
87*0ac9a9daSXin Li heap[0] = 0;
88*0ac9a9daSXin Li weight[0] = 0;
89*0ac9a9daSXin Li parent[0] = -2;
90*0ac9a9daSXin Li
91*0ac9a9daSXin Li for (i = 1; i <= alphaSize; i++) {
92*0ac9a9daSXin Li parent[i] = -1;
93*0ac9a9daSXin Li nHeap++;
94*0ac9a9daSXin Li heap[nHeap] = i;
95*0ac9a9daSXin Li UPHEAP(nHeap);
96*0ac9a9daSXin Li }
97*0ac9a9daSXin Li
98*0ac9a9daSXin Li AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
99*0ac9a9daSXin Li
100*0ac9a9daSXin Li while (nHeap > 1) {
101*0ac9a9daSXin Li n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
102*0ac9a9daSXin Li n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
103*0ac9a9daSXin Li nNodes++;
104*0ac9a9daSXin Li parent[n1] = parent[n2] = nNodes;
105*0ac9a9daSXin Li weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
106*0ac9a9daSXin Li parent[nNodes] = -1;
107*0ac9a9daSXin Li nHeap++;
108*0ac9a9daSXin Li heap[nHeap] = nNodes;
109*0ac9a9daSXin Li UPHEAP(nHeap);
110*0ac9a9daSXin Li }
111*0ac9a9daSXin Li
112*0ac9a9daSXin Li AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
113*0ac9a9daSXin Li
114*0ac9a9daSXin Li tooLong = False;
115*0ac9a9daSXin Li for (i = 1; i <= alphaSize; i++) {
116*0ac9a9daSXin Li j = 0;
117*0ac9a9daSXin Li k = i;
118*0ac9a9daSXin Li while (parent[k] >= 0) { k = parent[k]; j++; }
119*0ac9a9daSXin Li len[i-1] = j;
120*0ac9a9daSXin Li if (j > maxLen) tooLong = True;
121*0ac9a9daSXin Li }
122*0ac9a9daSXin Li
123*0ac9a9daSXin Li if (! tooLong) break;
124*0ac9a9daSXin Li
125*0ac9a9daSXin Li /* 17 Oct 04: keep-going condition for the following loop used
126*0ac9a9daSXin Li to be 'i < alphaSize', which missed the last element,
127*0ac9a9daSXin Li theoretically leading to the possibility of the compressor
128*0ac9a9daSXin Li looping. However, this count-scaling step is only needed if
129*0ac9a9daSXin Li one of the generated Huffman code words is longer than
130*0ac9a9daSXin Li maxLen, which up to and including version 1.0.2 was 20 bits,
131*0ac9a9daSXin Li which is extremely unlikely. In version 1.0.3 maxLen was
132*0ac9a9daSXin Li changed to 17 bits, which has minimal effect on compression
133*0ac9a9daSXin Li ratio, but does mean this scaling step is used from time to
134*0ac9a9daSXin Li time, enough to verify that it works.
135*0ac9a9daSXin Li
136*0ac9a9daSXin Li This means that bzip2-1.0.3 and later will only produce
137*0ac9a9daSXin Li Huffman codes with a maximum length of 17 bits. However, in
138*0ac9a9daSXin Li order to preserve backwards compatibility with bitstreams
139*0ac9a9daSXin Li produced by versions pre-1.0.3, the decompressor must still
140*0ac9a9daSXin Li handle lengths of up to 20. */
141*0ac9a9daSXin Li
142*0ac9a9daSXin Li for (i = 1; i <= alphaSize; i++) {
143*0ac9a9daSXin Li j = weight[i] >> 8;
144*0ac9a9daSXin Li j = 1 + (j / 2);
145*0ac9a9daSXin Li weight[i] = j << 8;
146*0ac9a9daSXin Li }
147*0ac9a9daSXin Li }
148*0ac9a9daSXin Li }
149*0ac9a9daSXin Li
150*0ac9a9daSXin Li
151*0ac9a9daSXin Li /*---------------------------------------------------*/
BZ2_hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)152*0ac9a9daSXin Li void BZ2_hbAssignCodes ( Int32 *code,
153*0ac9a9daSXin Li UChar *length,
154*0ac9a9daSXin Li Int32 minLen,
155*0ac9a9daSXin Li Int32 maxLen,
156*0ac9a9daSXin Li Int32 alphaSize )
157*0ac9a9daSXin Li {
158*0ac9a9daSXin Li Int32 n, vec, i;
159*0ac9a9daSXin Li
160*0ac9a9daSXin Li vec = 0;
161*0ac9a9daSXin Li for (n = minLen; n <= maxLen; n++) {
162*0ac9a9daSXin Li for (i = 0; i < alphaSize; i++)
163*0ac9a9daSXin Li if (length[i] == n) { code[i] = vec; vec++; };
164*0ac9a9daSXin Li vec <<= 1;
165*0ac9a9daSXin Li }
166*0ac9a9daSXin Li }
167*0ac9a9daSXin Li
168*0ac9a9daSXin Li
169*0ac9a9daSXin Li /*---------------------------------------------------*/
BZ2_hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)170*0ac9a9daSXin Li void BZ2_hbCreateDecodeTables ( Int32 *limit,
171*0ac9a9daSXin Li Int32 *base,
172*0ac9a9daSXin Li Int32 *perm,
173*0ac9a9daSXin Li UChar *length,
174*0ac9a9daSXin Li Int32 minLen,
175*0ac9a9daSXin Li Int32 maxLen,
176*0ac9a9daSXin Li Int32 alphaSize )
177*0ac9a9daSXin Li {
178*0ac9a9daSXin Li Int32 pp, i, j, vec;
179*0ac9a9daSXin Li
180*0ac9a9daSXin Li pp = 0;
181*0ac9a9daSXin Li for (i = minLen; i <= maxLen; i++)
182*0ac9a9daSXin Li for (j = 0; j < alphaSize; j++)
183*0ac9a9daSXin Li if (length[j] == i) { perm[pp] = j; pp++; };
184*0ac9a9daSXin Li
185*0ac9a9daSXin Li for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
186*0ac9a9daSXin Li for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
187*0ac9a9daSXin Li
188*0ac9a9daSXin Li for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
189*0ac9a9daSXin Li
190*0ac9a9daSXin Li for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
191*0ac9a9daSXin Li vec = 0;
192*0ac9a9daSXin Li
193*0ac9a9daSXin Li for (i = minLen; i <= maxLen; i++) {
194*0ac9a9daSXin Li vec += (base[i+1] - base[i]);
195*0ac9a9daSXin Li limit[i] = vec-1;
196*0ac9a9daSXin Li vec <<= 1;
197*0ac9a9daSXin Li }
198*0ac9a9daSXin Li for (i = minLen + 1; i <= maxLen; i++)
199*0ac9a9daSXin Li base[i] = ((limit[i-1] + 1) << 1) - base[i];
200*0ac9a9daSXin Li }
201*0ac9a9daSXin Li
202*0ac9a9daSXin Li
203*0ac9a9daSXin Li /*-------------------------------------------------------------*/
204*0ac9a9daSXin Li /*--- end huffman.c ---*/
205*0ac9a9daSXin Li /*-------------------------------------------------------------*/
206