xref: /aosp_15_r20/external/bzip2/huffman.c (revision 0ac9a9daea5cce2e775d5da949508593e2ee9206)
1*0ac9a9daSXin Li 
2*0ac9a9daSXin Li /*-------------------------------------------------------------*/
3*0ac9a9daSXin Li /*--- Huffman coding low-level stuff                        ---*/
4*0ac9a9daSXin Li /*---                                             huffman.c ---*/
5*0ac9a9daSXin Li /*-------------------------------------------------------------*/
6*0ac9a9daSXin Li 
7*0ac9a9daSXin Li /* ------------------------------------------------------------------
8*0ac9a9daSXin Li    This file is part of bzip2/libbzip2, a program and library for
9*0ac9a9daSXin Li    lossless, block-sorting data compression.
10*0ac9a9daSXin Li 
11*0ac9a9daSXin Li    bzip2/libbzip2 version 1.0.8 of 13 July 2019
12*0ac9a9daSXin Li    Copyright (C) 1996-2019 Julian Seward <[email protected]>
13*0ac9a9daSXin Li 
14*0ac9a9daSXin Li    Please read the WARNING, DISCLAIMER and PATENTS sections in the
15*0ac9a9daSXin Li    README file.
16*0ac9a9daSXin Li 
17*0ac9a9daSXin Li    This program is released under the terms of the license contained
18*0ac9a9daSXin Li    in the file LICENSE.
19*0ac9a9daSXin Li    ------------------------------------------------------------------ */
20*0ac9a9daSXin Li 
21*0ac9a9daSXin Li 
22*0ac9a9daSXin Li #include "bzlib_private.h"
23*0ac9a9daSXin Li 
24*0ac9a9daSXin Li /*---------------------------------------------------*/
25*0ac9a9daSXin Li #define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
26*0ac9a9daSXin Li #define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
27*0ac9a9daSXin Li #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
28*0ac9a9daSXin Li 
29*0ac9a9daSXin Li #define ADDWEIGHTS(zw1,zw2)                           \
30*0ac9a9daSXin Li    (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \
31*0ac9a9daSXin Li    (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
32*0ac9a9daSXin Li 
33*0ac9a9daSXin Li #define UPHEAP(z)                                     \
34*0ac9a9daSXin Li {                                                     \
35*0ac9a9daSXin Li    Int32 zz, tmp;                                     \
36*0ac9a9daSXin Li    zz = z; tmp = heap[zz];                            \
37*0ac9a9daSXin Li    while (weight[tmp] < weight[heap[zz >> 1]]) {      \
38*0ac9a9daSXin Li       heap[zz] = heap[zz >> 1];                       \
39*0ac9a9daSXin Li       zz >>= 1;                                       \
40*0ac9a9daSXin Li    }                                                  \
41*0ac9a9daSXin Li    heap[zz] = tmp;                                    \
42*0ac9a9daSXin Li }
43*0ac9a9daSXin Li 
44*0ac9a9daSXin Li #define DOWNHEAP(z)                                   \
45*0ac9a9daSXin Li {                                                     \
46*0ac9a9daSXin Li    Int32 zz, yy, tmp;                                 \
47*0ac9a9daSXin Li    zz = z; tmp = heap[zz];                            \
48*0ac9a9daSXin Li    while (True) {                                     \
49*0ac9a9daSXin Li       yy = zz << 1;                                   \
50*0ac9a9daSXin Li       if (yy > nHeap) break;                          \
51*0ac9a9daSXin Li       if (yy < nHeap &&                               \
52*0ac9a9daSXin Li           weight[heap[yy+1]] < weight[heap[yy]])      \
53*0ac9a9daSXin Li          yy++;                                        \
54*0ac9a9daSXin Li       if (weight[tmp] < weight[heap[yy]]) break;      \
55*0ac9a9daSXin Li       heap[zz] = heap[yy];                            \
56*0ac9a9daSXin Li       zz = yy;                                        \
57*0ac9a9daSXin Li    }                                                  \
58*0ac9a9daSXin Li    heap[zz] = tmp;                                    \
59*0ac9a9daSXin Li }
60*0ac9a9daSXin Li 
61*0ac9a9daSXin Li 
62*0ac9a9daSXin Li /*---------------------------------------------------*/
BZ2_hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)63*0ac9a9daSXin Li void BZ2_hbMakeCodeLengths ( UChar *len,
64*0ac9a9daSXin Li                              Int32 *freq,
65*0ac9a9daSXin Li                              Int32 alphaSize,
66*0ac9a9daSXin Li                              Int32 maxLen )
67*0ac9a9daSXin Li {
68*0ac9a9daSXin Li    /*--
69*0ac9a9daSXin Li       Nodes and heap entries run from 1.  Entry 0
70*0ac9a9daSXin Li       for both the heap and nodes is a sentinel.
71*0ac9a9daSXin Li    --*/
72*0ac9a9daSXin Li    Int32 nNodes, nHeap, n1, n2, i, j, k;
73*0ac9a9daSXin Li    Bool  tooLong;
74*0ac9a9daSXin Li 
75*0ac9a9daSXin Li    Int32 heap   [ BZ_MAX_ALPHA_SIZE + 2 ];
76*0ac9a9daSXin Li    Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
77*0ac9a9daSXin Li    Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
78*0ac9a9daSXin Li 
79*0ac9a9daSXin Li    for (i = 0; i < alphaSize; i++)
80*0ac9a9daSXin Li       weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
81*0ac9a9daSXin Li 
82*0ac9a9daSXin Li    while (True) {
83*0ac9a9daSXin Li 
84*0ac9a9daSXin Li       nNodes = alphaSize;
85*0ac9a9daSXin Li       nHeap = 0;
86*0ac9a9daSXin Li 
87*0ac9a9daSXin Li       heap[0] = 0;
88*0ac9a9daSXin Li       weight[0] = 0;
89*0ac9a9daSXin Li       parent[0] = -2;
90*0ac9a9daSXin Li 
91*0ac9a9daSXin Li       for (i = 1; i <= alphaSize; i++) {
92*0ac9a9daSXin Li          parent[i] = -1;
93*0ac9a9daSXin Li          nHeap++;
94*0ac9a9daSXin Li          heap[nHeap] = i;
95*0ac9a9daSXin Li          UPHEAP(nHeap);
96*0ac9a9daSXin Li       }
97*0ac9a9daSXin Li 
98*0ac9a9daSXin Li       AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
99*0ac9a9daSXin Li 
100*0ac9a9daSXin Li       while (nHeap > 1) {
101*0ac9a9daSXin Li          n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
102*0ac9a9daSXin Li          n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
103*0ac9a9daSXin Li          nNodes++;
104*0ac9a9daSXin Li          parent[n1] = parent[n2] = nNodes;
105*0ac9a9daSXin Li          weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
106*0ac9a9daSXin Li          parent[nNodes] = -1;
107*0ac9a9daSXin Li          nHeap++;
108*0ac9a9daSXin Li          heap[nHeap] = nNodes;
109*0ac9a9daSXin Li          UPHEAP(nHeap);
110*0ac9a9daSXin Li       }
111*0ac9a9daSXin Li 
112*0ac9a9daSXin Li       AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
113*0ac9a9daSXin Li 
114*0ac9a9daSXin Li       tooLong = False;
115*0ac9a9daSXin Li       for (i = 1; i <= alphaSize; i++) {
116*0ac9a9daSXin Li          j = 0;
117*0ac9a9daSXin Li          k = i;
118*0ac9a9daSXin Li          while (parent[k] >= 0) { k = parent[k]; j++; }
119*0ac9a9daSXin Li          len[i-1] = j;
120*0ac9a9daSXin Li          if (j > maxLen) tooLong = True;
121*0ac9a9daSXin Li       }
122*0ac9a9daSXin Li 
123*0ac9a9daSXin Li       if (! tooLong) break;
124*0ac9a9daSXin Li 
125*0ac9a9daSXin Li       /* 17 Oct 04: keep-going condition for the following loop used
126*0ac9a9daSXin Li          to be 'i < alphaSize', which missed the last element,
127*0ac9a9daSXin Li          theoretically leading to the possibility of the compressor
128*0ac9a9daSXin Li          looping.  However, this count-scaling step is only needed if
129*0ac9a9daSXin Li          one of the generated Huffman code words is longer than
130*0ac9a9daSXin Li          maxLen, which up to and including version 1.0.2 was 20 bits,
131*0ac9a9daSXin Li          which is extremely unlikely.  In version 1.0.3 maxLen was
132*0ac9a9daSXin Li          changed to 17 bits, which has minimal effect on compression
133*0ac9a9daSXin Li          ratio, but does mean this scaling step is used from time to
134*0ac9a9daSXin Li          time, enough to verify that it works.
135*0ac9a9daSXin Li 
136*0ac9a9daSXin Li          This means that bzip2-1.0.3 and later will only produce
137*0ac9a9daSXin Li          Huffman codes with a maximum length of 17 bits.  However, in
138*0ac9a9daSXin Li          order to preserve backwards compatibility with bitstreams
139*0ac9a9daSXin Li          produced by versions pre-1.0.3, the decompressor must still
140*0ac9a9daSXin Li          handle lengths of up to 20. */
141*0ac9a9daSXin Li 
142*0ac9a9daSXin Li       for (i = 1; i <= alphaSize; i++) {
143*0ac9a9daSXin Li          j = weight[i] >> 8;
144*0ac9a9daSXin Li          j = 1 + (j / 2);
145*0ac9a9daSXin Li          weight[i] = j << 8;
146*0ac9a9daSXin Li       }
147*0ac9a9daSXin Li    }
148*0ac9a9daSXin Li }
149*0ac9a9daSXin Li 
150*0ac9a9daSXin Li 
151*0ac9a9daSXin Li /*---------------------------------------------------*/
BZ2_hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)152*0ac9a9daSXin Li void BZ2_hbAssignCodes ( Int32 *code,
153*0ac9a9daSXin Li                          UChar *length,
154*0ac9a9daSXin Li                          Int32 minLen,
155*0ac9a9daSXin Li                          Int32 maxLen,
156*0ac9a9daSXin Li                          Int32 alphaSize )
157*0ac9a9daSXin Li {
158*0ac9a9daSXin Li    Int32 n, vec, i;
159*0ac9a9daSXin Li 
160*0ac9a9daSXin Li    vec = 0;
161*0ac9a9daSXin Li    for (n = minLen; n <= maxLen; n++) {
162*0ac9a9daSXin Li       for (i = 0; i < alphaSize; i++)
163*0ac9a9daSXin Li          if (length[i] == n) { code[i] = vec; vec++; };
164*0ac9a9daSXin Li       vec <<= 1;
165*0ac9a9daSXin Li    }
166*0ac9a9daSXin Li }
167*0ac9a9daSXin Li 
168*0ac9a9daSXin Li 
169*0ac9a9daSXin Li /*---------------------------------------------------*/
BZ2_hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)170*0ac9a9daSXin Li void BZ2_hbCreateDecodeTables ( Int32 *limit,
171*0ac9a9daSXin Li                                 Int32 *base,
172*0ac9a9daSXin Li                                 Int32 *perm,
173*0ac9a9daSXin Li                                 UChar *length,
174*0ac9a9daSXin Li                                 Int32 minLen,
175*0ac9a9daSXin Li                                 Int32 maxLen,
176*0ac9a9daSXin Li                                 Int32 alphaSize )
177*0ac9a9daSXin Li {
178*0ac9a9daSXin Li    Int32 pp, i, j, vec;
179*0ac9a9daSXin Li 
180*0ac9a9daSXin Li    pp = 0;
181*0ac9a9daSXin Li    for (i = minLen; i <= maxLen; i++)
182*0ac9a9daSXin Li       for (j = 0; j < alphaSize; j++)
183*0ac9a9daSXin Li          if (length[j] == i) { perm[pp] = j; pp++; };
184*0ac9a9daSXin Li 
185*0ac9a9daSXin Li    for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
186*0ac9a9daSXin Li    for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
187*0ac9a9daSXin Li 
188*0ac9a9daSXin Li    for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
189*0ac9a9daSXin Li 
190*0ac9a9daSXin Li    for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
191*0ac9a9daSXin Li    vec = 0;
192*0ac9a9daSXin Li 
193*0ac9a9daSXin Li    for (i = minLen; i <= maxLen; i++) {
194*0ac9a9daSXin Li       vec += (base[i+1] - base[i]);
195*0ac9a9daSXin Li       limit[i] = vec-1;
196*0ac9a9daSXin Li       vec <<= 1;
197*0ac9a9daSXin Li    }
198*0ac9a9daSXin Li    for (i = minLen + 1; i <= maxLen; i++)
199*0ac9a9daSXin Li       base[i] = ((limit[i-1] + 1) << 1) - base[i];
200*0ac9a9daSXin Li }
201*0ac9a9daSXin Li 
202*0ac9a9daSXin Li 
203*0ac9a9daSXin Li /*-------------------------------------------------------------*/
204*0ac9a9daSXin Li /*--- end                                         huffman.c ---*/
205*0ac9a9daSXin Li /*-------------------------------------------------------------*/
206