xref: /aosp_15_r20/external/bzip2/compress.c (revision 0ac9a9daea5cce2e775d5da949508593e2ee9206)
1*0ac9a9daSXin Li 
2*0ac9a9daSXin Li /*-------------------------------------------------------------*/
3*0ac9a9daSXin Li /*--- Compression machinery (not incl block sorting)        ---*/
4*0ac9a9daSXin Li /*---                                            compress.c ---*/
5*0ac9a9daSXin Li /*-------------------------------------------------------------*/
6*0ac9a9daSXin Li 
7*0ac9a9daSXin Li /* ------------------------------------------------------------------
8*0ac9a9daSXin Li    This file is part of bzip2/libbzip2, a program and library for
9*0ac9a9daSXin Li    lossless, block-sorting data compression.
10*0ac9a9daSXin Li 
11*0ac9a9daSXin Li    bzip2/libbzip2 version 1.0.8 of 13 July 2019
12*0ac9a9daSXin Li    Copyright (C) 1996-2019 Julian Seward <[email protected]>
13*0ac9a9daSXin Li 
14*0ac9a9daSXin Li    Please read the WARNING, DISCLAIMER and PATENTS sections in the
15*0ac9a9daSXin Li    README file.
16*0ac9a9daSXin Li 
17*0ac9a9daSXin Li    This program is released under the terms of the license contained
18*0ac9a9daSXin Li    in the file LICENSE.
19*0ac9a9daSXin Li    ------------------------------------------------------------------ */
20*0ac9a9daSXin Li 
21*0ac9a9daSXin Li 
22*0ac9a9daSXin Li /* CHANGES
23*0ac9a9daSXin Li     0.9.0    -- original version.
24*0ac9a9daSXin Li     0.9.0a/b -- no changes in this file.
25*0ac9a9daSXin Li     0.9.0c   -- changed setting of nGroups in sendMTFValues()
26*0ac9a9daSXin Li                 so as to do a bit better on small files
27*0ac9a9daSXin Li */
28*0ac9a9daSXin Li 
29*0ac9a9daSXin Li #include "bzlib_private.h"
30*0ac9a9daSXin Li 
31*0ac9a9daSXin Li 
32*0ac9a9daSXin Li /*---------------------------------------------------*/
33*0ac9a9daSXin Li /*--- Bit stream I/O                              ---*/
34*0ac9a9daSXin Li /*---------------------------------------------------*/
35*0ac9a9daSXin Li 
36*0ac9a9daSXin Li /*---------------------------------------------------*/
BZ2_bsInitWrite(EState * s)37*0ac9a9daSXin Li void BZ2_bsInitWrite ( EState* s )
38*0ac9a9daSXin Li {
39*0ac9a9daSXin Li    s->bsLive = 0;
40*0ac9a9daSXin Li    s->bsBuff = 0;
41*0ac9a9daSXin Li }
42*0ac9a9daSXin Li 
43*0ac9a9daSXin Li 
44*0ac9a9daSXin Li /*---------------------------------------------------*/
45*0ac9a9daSXin Li static
bsFinishWrite(EState * s)46*0ac9a9daSXin Li void bsFinishWrite ( EState* s )
47*0ac9a9daSXin Li {
48*0ac9a9daSXin Li    while (s->bsLive > 0) {
49*0ac9a9daSXin Li       s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
50*0ac9a9daSXin Li       s->numZ++;
51*0ac9a9daSXin Li       s->bsBuff <<= 8;
52*0ac9a9daSXin Li       s->bsLive -= 8;
53*0ac9a9daSXin Li    }
54*0ac9a9daSXin Li }
55*0ac9a9daSXin Li 
56*0ac9a9daSXin Li 
57*0ac9a9daSXin Li /*---------------------------------------------------*/
58*0ac9a9daSXin Li #define bsNEEDW(nz)                           \
59*0ac9a9daSXin Li {                                             \
60*0ac9a9daSXin Li    while (s->bsLive >= 8) {                   \
61*0ac9a9daSXin Li       s->zbits[s->numZ]                       \
62*0ac9a9daSXin Li          = (UChar)(s->bsBuff >> 24);          \
63*0ac9a9daSXin Li       s->numZ++;                              \
64*0ac9a9daSXin Li       s->bsBuff <<= 8;                        \
65*0ac9a9daSXin Li       s->bsLive -= 8;                         \
66*0ac9a9daSXin Li    }                                          \
67*0ac9a9daSXin Li }
68*0ac9a9daSXin Li 
69*0ac9a9daSXin Li 
70*0ac9a9daSXin Li /*---------------------------------------------------*/
71*0ac9a9daSXin Li static
72*0ac9a9daSXin Li __inline__
bsW(EState * s,Int32 n,UInt32 v)73*0ac9a9daSXin Li void bsW ( EState* s, Int32 n, UInt32 v )
74*0ac9a9daSXin Li {
75*0ac9a9daSXin Li    bsNEEDW ( n );
76*0ac9a9daSXin Li    s->bsBuff |= (v << (32 - s->bsLive - n));
77*0ac9a9daSXin Li    s->bsLive += n;
78*0ac9a9daSXin Li }
79*0ac9a9daSXin Li 
80*0ac9a9daSXin Li 
81*0ac9a9daSXin Li /*---------------------------------------------------*/
82*0ac9a9daSXin Li static
bsPutUInt32(EState * s,UInt32 u)83*0ac9a9daSXin Li void bsPutUInt32 ( EState* s, UInt32 u )
84*0ac9a9daSXin Li {
85*0ac9a9daSXin Li    bsW ( s, 8, (u >> 24) & 0xffL );
86*0ac9a9daSXin Li    bsW ( s, 8, (u >> 16) & 0xffL );
87*0ac9a9daSXin Li    bsW ( s, 8, (u >>  8) & 0xffL );
88*0ac9a9daSXin Li    bsW ( s, 8,  u        & 0xffL );
89*0ac9a9daSXin Li }
90*0ac9a9daSXin Li 
91*0ac9a9daSXin Li 
92*0ac9a9daSXin Li /*---------------------------------------------------*/
93*0ac9a9daSXin Li static
bsPutUChar(EState * s,UChar c)94*0ac9a9daSXin Li void bsPutUChar ( EState* s, UChar c )
95*0ac9a9daSXin Li {
96*0ac9a9daSXin Li    bsW( s, 8, (UInt32)c );
97*0ac9a9daSXin Li }
98*0ac9a9daSXin Li 
99*0ac9a9daSXin Li 
100*0ac9a9daSXin Li /*---------------------------------------------------*/
101*0ac9a9daSXin Li /*--- The back end proper                         ---*/
102*0ac9a9daSXin Li /*---------------------------------------------------*/
103*0ac9a9daSXin Li 
104*0ac9a9daSXin Li /*---------------------------------------------------*/
105*0ac9a9daSXin Li static
makeMaps_e(EState * s)106*0ac9a9daSXin Li void makeMaps_e ( EState* s )
107*0ac9a9daSXin Li {
108*0ac9a9daSXin Li    Int32 i;
109*0ac9a9daSXin Li    s->nInUse = 0;
110*0ac9a9daSXin Li    for (i = 0; i < 256; i++)
111*0ac9a9daSXin Li       if (s->inUse[i]) {
112*0ac9a9daSXin Li          s->unseqToSeq[i] = s->nInUse;
113*0ac9a9daSXin Li          s->nInUse++;
114*0ac9a9daSXin Li       }
115*0ac9a9daSXin Li }
116*0ac9a9daSXin Li 
117*0ac9a9daSXin Li 
118*0ac9a9daSXin Li /*---------------------------------------------------*/
119*0ac9a9daSXin Li static
generateMTFValues(EState * s)120*0ac9a9daSXin Li void generateMTFValues ( EState* s )
121*0ac9a9daSXin Li {
122*0ac9a9daSXin Li    UChar   yy[256];
123*0ac9a9daSXin Li    Int32   i, j;
124*0ac9a9daSXin Li    Int32   zPend;
125*0ac9a9daSXin Li    Int32   wr;
126*0ac9a9daSXin Li    Int32   EOB;
127*0ac9a9daSXin Li 
128*0ac9a9daSXin Li    /*
129*0ac9a9daSXin Li       After sorting (eg, here),
130*0ac9a9daSXin Li          s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
131*0ac9a9daSXin Li          and
132*0ac9a9daSXin Li          ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
133*0ac9a9daSXin Li          holds the original block data.
134*0ac9a9daSXin Li 
135*0ac9a9daSXin Li       The first thing to do is generate the MTF values,
136*0ac9a9daSXin Li       and put them in
137*0ac9a9daSXin Li          ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
138*0ac9a9daSXin Li       Because there are strictly fewer or equal MTF values
139*0ac9a9daSXin Li       than block values, ptr values in this area are overwritten
140*0ac9a9daSXin Li       with MTF values only when they are no longer needed.
141*0ac9a9daSXin Li 
142*0ac9a9daSXin Li       The final compressed bitstream is generated into the
143*0ac9a9daSXin Li       area starting at
144*0ac9a9daSXin Li          (UChar*) (&((UChar*)s->arr2)[s->nblock])
145*0ac9a9daSXin Li 
146*0ac9a9daSXin Li       These storage aliases are set up in bzCompressInit(),
147*0ac9a9daSXin Li       except for the last one, which is arranged in
148*0ac9a9daSXin Li       compressBlock().
149*0ac9a9daSXin Li    */
150*0ac9a9daSXin Li    UInt32* ptr   = s->ptr;
151*0ac9a9daSXin Li    UChar* block  = s->block;
152*0ac9a9daSXin Li    UInt16* mtfv  = s->mtfv;
153*0ac9a9daSXin Li 
154*0ac9a9daSXin Li    makeMaps_e ( s );
155*0ac9a9daSXin Li    EOB = s->nInUse+1;
156*0ac9a9daSXin Li 
157*0ac9a9daSXin Li    for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
158*0ac9a9daSXin Li 
159*0ac9a9daSXin Li    wr = 0;
160*0ac9a9daSXin Li    zPend = 0;
161*0ac9a9daSXin Li    for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
162*0ac9a9daSXin Li 
163*0ac9a9daSXin Li    for (i = 0; i < s->nblock; i++) {
164*0ac9a9daSXin Li       UChar ll_i;
165*0ac9a9daSXin Li       AssertD ( wr <= i, "generateMTFValues(1)" );
166*0ac9a9daSXin Li       j = ptr[i]-1; if (j < 0) j += s->nblock;
167*0ac9a9daSXin Li       ll_i = s->unseqToSeq[block[j]];
168*0ac9a9daSXin Li       AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
169*0ac9a9daSXin Li 
170*0ac9a9daSXin Li       if (yy[0] == ll_i) {
171*0ac9a9daSXin Li          zPend++;
172*0ac9a9daSXin Li       } else {
173*0ac9a9daSXin Li 
174*0ac9a9daSXin Li          if (zPend > 0) {
175*0ac9a9daSXin Li             zPend--;
176*0ac9a9daSXin Li             while (True) {
177*0ac9a9daSXin Li                if (zPend & 1) {
178*0ac9a9daSXin Li                   mtfv[wr] = BZ_RUNB; wr++;
179*0ac9a9daSXin Li                   s->mtfFreq[BZ_RUNB]++;
180*0ac9a9daSXin Li                } else {
181*0ac9a9daSXin Li                   mtfv[wr] = BZ_RUNA; wr++;
182*0ac9a9daSXin Li                   s->mtfFreq[BZ_RUNA]++;
183*0ac9a9daSXin Li                }
184*0ac9a9daSXin Li                if (zPend < 2) break;
185*0ac9a9daSXin Li                zPend = (zPend - 2) / 2;
186*0ac9a9daSXin Li             };
187*0ac9a9daSXin Li             zPend = 0;
188*0ac9a9daSXin Li          }
189*0ac9a9daSXin Li          {
190*0ac9a9daSXin Li             register UChar  rtmp;
191*0ac9a9daSXin Li             register UChar* ryy_j;
192*0ac9a9daSXin Li             register UChar  rll_i;
193*0ac9a9daSXin Li             rtmp  = yy[1];
194*0ac9a9daSXin Li             yy[1] = yy[0];
195*0ac9a9daSXin Li             ryy_j = &(yy[1]);
196*0ac9a9daSXin Li             rll_i = ll_i;
197*0ac9a9daSXin Li             while ( rll_i != rtmp ) {
198*0ac9a9daSXin Li                register UChar rtmp2;
199*0ac9a9daSXin Li                ryy_j++;
200*0ac9a9daSXin Li                rtmp2  = rtmp;
201*0ac9a9daSXin Li                rtmp   = *ryy_j;
202*0ac9a9daSXin Li                *ryy_j = rtmp2;
203*0ac9a9daSXin Li             };
204*0ac9a9daSXin Li             yy[0] = rtmp;
205*0ac9a9daSXin Li             j = ryy_j - &(yy[0]);
206*0ac9a9daSXin Li             mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
207*0ac9a9daSXin Li          }
208*0ac9a9daSXin Li 
209*0ac9a9daSXin Li       }
210*0ac9a9daSXin Li    }
211*0ac9a9daSXin Li 
212*0ac9a9daSXin Li    if (zPend > 0) {
213*0ac9a9daSXin Li       zPend--;
214*0ac9a9daSXin Li       while (True) {
215*0ac9a9daSXin Li          if (zPend & 1) {
216*0ac9a9daSXin Li             mtfv[wr] = BZ_RUNB; wr++;
217*0ac9a9daSXin Li             s->mtfFreq[BZ_RUNB]++;
218*0ac9a9daSXin Li          } else {
219*0ac9a9daSXin Li             mtfv[wr] = BZ_RUNA; wr++;
220*0ac9a9daSXin Li             s->mtfFreq[BZ_RUNA]++;
221*0ac9a9daSXin Li          }
222*0ac9a9daSXin Li          if (zPend < 2) break;
223*0ac9a9daSXin Li          zPend = (zPend - 2) / 2;
224*0ac9a9daSXin Li       };
225*0ac9a9daSXin Li       zPend = 0;
226*0ac9a9daSXin Li    }
227*0ac9a9daSXin Li 
228*0ac9a9daSXin Li    mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
229*0ac9a9daSXin Li 
230*0ac9a9daSXin Li    s->nMTF = wr;
231*0ac9a9daSXin Li }
232*0ac9a9daSXin Li 
233*0ac9a9daSXin Li 
234*0ac9a9daSXin Li /*---------------------------------------------------*/
235*0ac9a9daSXin Li #define BZ_LESSER_ICOST  0
236*0ac9a9daSXin Li #define BZ_GREATER_ICOST 15
237*0ac9a9daSXin Li 
238*0ac9a9daSXin Li static
sendMTFValues(EState * s)239*0ac9a9daSXin Li void sendMTFValues ( EState* s )
240*0ac9a9daSXin Li {
241*0ac9a9daSXin Li    Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
242*0ac9a9daSXin Li    Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
243*0ac9a9daSXin Li    Int32 nGroups, nBytes;
244*0ac9a9daSXin Li 
245*0ac9a9daSXin Li    /*--
246*0ac9a9daSXin Li    UChar  len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
247*0ac9a9daSXin Li    is a global since the decoder also needs it.
248*0ac9a9daSXin Li 
249*0ac9a9daSXin Li    Int32  code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
250*0ac9a9daSXin Li    Int32  rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
251*0ac9a9daSXin Li    are also globals only used in this proc.
252*0ac9a9daSXin Li    Made global to keep stack frame size small.
253*0ac9a9daSXin Li    --*/
254*0ac9a9daSXin Li 
255*0ac9a9daSXin Li 
256*0ac9a9daSXin Li    UInt16 cost[BZ_N_GROUPS];
257*0ac9a9daSXin Li    Int32  fave[BZ_N_GROUPS];
258*0ac9a9daSXin Li 
259*0ac9a9daSXin Li    UInt16* mtfv = s->mtfv;
260*0ac9a9daSXin Li 
261*0ac9a9daSXin Li    if (s->verbosity >= 3)
262*0ac9a9daSXin Li       VPrintf3( "      %d in block, %d after MTF & 1-2 coding, "
263*0ac9a9daSXin Li                 "%d+2 syms in use\n",
264*0ac9a9daSXin Li                 s->nblock, s->nMTF, s->nInUse );
265*0ac9a9daSXin Li 
266*0ac9a9daSXin Li    alphaSize = s->nInUse+2;
267*0ac9a9daSXin Li    for (t = 0; t < BZ_N_GROUPS; t++)
268*0ac9a9daSXin Li       for (v = 0; v < alphaSize; v++)
269*0ac9a9daSXin Li          s->len[t][v] = BZ_GREATER_ICOST;
270*0ac9a9daSXin Li 
271*0ac9a9daSXin Li    /*--- Decide how many coding tables to use ---*/
272*0ac9a9daSXin Li    AssertH ( s->nMTF > 0, 3001 );
273*0ac9a9daSXin Li    if (s->nMTF < 200)  nGroups = 2; else
274*0ac9a9daSXin Li    if (s->nMTF < 600)  nGroups = 3; else
275*0ac9a9daSXin Li    if (s->nMTF < 1200) nGroups = 4; else
276*0ac9a9daSXin Li    if (s->nMTF < 2400) nGroups = 5; else
277*0ac9a9daSXin Li                        nGroups = 6;
278*0ac9a9daSXin Li 
279*0ac9a9daSXin Li    /*--- Generate an initial set of coding tables ---*/
280*0ac9a9daSXin Li    {
281*0ac9a9daSXin Li       Int32 nPart, remF, tFreq, aFreq;
282*0ac9a9daSXin Li 
283*0ac9a9daSXin Li       nPart = nGroups;
284*0ac9a9daSXin Li       remF  = s->nMTF;
285*0ac9a9daSXin Li       gs = 0;
286*0ac9a9daSXin Li       while (nPart > 0) {
287*0ac9a9daSXin Li          tFreq = remF / nPart;
288*0ac9a9daSXin Li          ge = gs-1;
289*0ac9a9daSXin Li          aFreq = 0;
290*0ac9a9daSXin Li          while (aFreq < tFreq && ge < alphaSize-1) {
291*0ac9a9daSXin Li             ge++;
292*0ac9a9daSXin Li             aFreq += s->mtfFreq[ge];
293*0ac9a9daSXin Li          }
294*0ac9a9daSXin Li 
295*0ac9a9daSXin Li          if (ge > gs
296*0ac9a9daSXin Li              && nPart != nGroups && nPart != 1
297*0ac9a9daSXin Li              && ((nGroups-nPart) % 2 == 1)) {
298*0ac9a9daSXin Li             aFreq -= s->mtfFreq[ge];
299*0ac9a9daSXin Li             ge--;
300*0ac9a9daSXin Li          }
301*0ac9a9daSXin Li 
302*0ac9a9daSXin Li          if (s->verbosity >= 3)
303*0ac9a9daSXin Li             VPrintf5( "      initial group %d, [%d .. %d], "
304*0ac9a9daSXin Li                       "has %d syms (%4.1f%%)\n",
305*0ac9a9daSXin Li                       nPart, gs, ge, aFreq,
306*0ac9a9daSXin Li                       (100.0 * (float)aFreq) / (float)(s->nMTF) );
307*0ac9a9daSXin Li 
308*0ac9a9daSXin Li          for (v = 0; v < alphaSize; v++)
309*0ac9a9daSXin Li             if (v >= gs && v <= ge)
310*0ac9a9daSXin Li                s->len[nPart-1][v] = BZ_LESSER_ICOST; else
311*0ac9a9daSXin Li                s->len[nPart-1][v] = BZ_GREATER_ICOST;
312*0ac9a9daSXin Li 
313*0ac9a9daSXin Li          nPart--;
314*0ac9a9daSXin Li          gs = ge+1;
315*0ac9a9daSXin Li          remF -= aFreq;
316*0ac9a9daSXin Li       }
317*0ac9a9daSXin Li    }
318*0ac9a9daSXin Li 
319*0ac9a9daSXin Li    /*---
320*0ac9a9daSXin Li       Iterate up to BZ_N_ITERS times to improve the tables.
321*0ac9a9daSXin Li    ---*/
322*0ac9a9daSXin Li    for (iter = 0; iter < BZ_N_ITERS; iter++) {
323*0ac9a9daSXin Li 
324*0ac9a9daSXin Li       for (t = 0; t < nGroups; t++) fave[t] = 0;
325*0ac9a9daSXin Li 
326*0ac9a9daSXin Li       for (t = 0; t < nGroups; t++)
327*0ac9a9daSXin Li          for (v = 0; v < alphaSize; v++)
328*0ac9a9daSXin Li             s->rfreq[t][v] = 0;
329*0ac9a9daSXin Li 
330*0ac9a9daSXin Li       /*---
331*0ac9a9daSXin Li         Set up an auxiliary length table which is used to fast-track
332*0ac9a9daSXin Li 	the common case (nGroups == 6).
333*0ac9a9daSXin Li       ---*/
334*0ac9a9daSXin Li       if (nGroups == 6) {
335*0ac9a9daSXin Li          for (v = 0; v < alphaSize; v++) {
336*0ac9a9daSXin Li             s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
337*0ac9a9daSXin Li             s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
338*0ac9a9daSXin Li             s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
339*0ac9a9daSXin Li 	 }
340*0ac9a9daSXin Li       }
341*0ac9a9daSXin Li 
342*0ac9a9daSXin Li       nSelectors = 0;
343*0ac9a9daSXin Li       totc = 0;
344*0ac9a9daSXin Li       gs = 0;
345*0ac9a9daSXin Li       while (True) {
346*0ac9a9daSXin Li 
347*0ac9a9daSXin Li          /*--- Set group start & end marks. --*/
348*0ac9a9daSXin Li          if (gs >= s->nMTF) break;
349*0ac9a9daSXin Li          ge = gs + BZ_G_SIZE - 1;
350*0ac9a9daSXin Li          if (ge >= s->nMTF) ge = s->nMTF-1;
351*0ac9a9daSXin Li 
352*0ac9a9daSXin Li          /*--
353*0ac9a9daSXin Li             Calculate the cost of this group as coded
354*0ac9a9daSXin Li             by each of the coding tables.
355*0ac9a9daSXin Li          --*/
356*0ac9a9daSXin Li          for (t = 0; t < nGroups; t++) cost[t] = 0;
357*0ac9a9daSXin Li 
358*0ac9a9daSXin Li          if (nGroups == 6 && 50 == ge-gs+1) {
359*0ac9a9daSXin Li             /*--- fast track the common case ---*/
360*0ac9a9daSXin Li             register UInt32 cost01, cost23, cost45;
361*0ac9a9daSXin Li             register UInt16 icv;
362*0ac9a9daSXin Li             cost01 = cost23 = cost45 = 0;
363*0ac9a9daSXin Li 
364*0ac9a9daSXin Li #           define BZ_ITER(nn)                \
365*0ac9a9daSXin Li                icv = mtfv[gs+(nn)];           \
366*0ac9a9daSXin Li                cost01 += s->len_pack[icv][0]; \
367*0ac9a9daSXin Li                cost23 += s->len_pack[icv][1]; \
368*0ac9a9daSXin Li                cost45 += s->len_pack[icv][2]; \
369*0ac9a9daSXin Li 
370*0ac9a9daSXin Li             BZ_ITER(0);  BZ_ITER(1);  BZ_ITER(2);  BZ_ITER(3);  BZ_ITER(4);
371*0ac9a9daSXin Li             BZ_ITER(5);  BZ_ITER(6);  BZ_ITER(7);  BZ_ITER(8);  BZ_ITER(9);
372*0ac9a9daSXin Li             BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
373*0ac9a9daSXin Li             BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
374*0ac9a9daSXin Li             BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
375*0ac9a9daSXin Li             BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
376*0ac9a9daSXin Li             BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
377*0ac9a9daSXin Li             BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
378*0ac9a9daSXin Li             BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
379*0ac9a9daSXin Li             BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
380*0ac9a9daSXin Li 
381*0ac9a9daSXin Li #           undef BZ_ITER
382*0ac9a9daSXin Li 
383*0ac9a9daSXin Li             cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
384*0ac9a9daSXin Li             cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
385*0ac9a9daSXin Li             cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
386*0ac9a9daSXin Li 
387*0ac9a9daSXin Li          } else {
388*0ac9a9daSXin Li 	    /*--- slow version which correctly handles all situations ---*/
389*0ac9a9daSXin Li             for (i = gs; i <= ge; i++) {
390*0ac9a9daSXin Li                UInt16 icv = mtfv[i];
391*0ac9a9daSXin Li                for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
392*0ac9a9daSXin Li             }
393*0ac9a9daSXin Li          }
394*0ac9a9daSXin Li 
395*0ac9a9daSXin Li          /*--
396*0ac9a9daSXin Li             Find the coding table which is best for this group,
397*0ac9a9daSXin Li             and record its identity in the selector table.
398*0ac9a9daSXin Li          --*/
399*0ac9a9daSXin Li          bc = 999999999; bt = -1;
400*0ac9a9daSXin Li          for (t = 0; t < nGroups; t++)
401*0ac9a9daSXin Li             if (cost[t] < bc) { bc = cost[t]; bt = t; };
402*0ac9a9daSXin Li          totc += bc;
403*0ac9a9daSXin Li          fave[bt]++;
404*0ac9a9daSXin Li          s->selector[nSelectors] = bt;
405*0ac9a9daSXin Li          nSelectors++;
406*0ac9a9daSXin Li 
407*0ac9a9daSXin Li          /*--
408*0ac9a9daSXin Li             Increment the symbol frequencies for the selected table.
409*0ac9a9daSXin Li           --*/
410*0ac9a9daSXin Li          if (nGroups == 6 && 50 == ge-gs+1) {
411*0ac9a9daSXin Li             /*--- fast track the common case ---*/
412*0ac9a9daSXin Li 
413*0ac9a9daSXin Li #           define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
414*0ac9a9daSXin Li 
415*0ac9a9daSXin Li             BZ_ITUR(0);  BZ_ITUR(1);  BZ_ITUR(2);  BZ_ITUR(3);  BZ_ITUR(4);
416*0ac9a9daSXin Li             BZ_ITUR(5);  BZ_ITUR(6);  BZ_ITUR(7);  BZ_ITUR(8);  BZ_ITUR(9);
417*0ac9a9daSXin Li             BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
418*0ac9a9daSXin Li             BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
419*0ac9a9daSXin Li             BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
420*0ac9a9daSXin Li             BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
421*0ac9a9daSXin Li             BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
422*0ac9a9daSXin Li             BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
423*0ac9a9daSXin Li             BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
424*0ac9a9daSXin Li             BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
425*0ac9a9daSXin Li 
426*0ac9a9daSXin Li #           undef BZ_ITUR
427*0ac9a9daSXin Li 
428*0ac9a9daSXin Li          } else {
429*0ac9a9daSXin Li 	    /*--- slow version which correctly handles all situations ---*/
430*0ac9a9daSXin Li             for (i = gs; i <= ge; i++)
431*0ac9a9daSXin Li                s->rfreq[bt][ mtfv[i] ]++;
432*0ac9a9daSXin Li          }
433*0ac9a9daSXin Li 
434*0ac9a9daSXin Li          gs = ge+1;
435*0ac9a9daSXin Li       }
436*0ac9a9daSXin Li       if (s->verbosity >= 3) {
437*0ac9a9daSXin Li          VPrintf2 ( "      pass %d: size is %d, grp uses are ",
438*0ac9a9daSXin Li                    iter+1, totc/8 );
439*0ac9a9daSXin Li          for (t = 0; t < nGroups; t++)
440*0ac9a9daSXin Li             VPrintf1 ( "%d ", fave[t] );
441*0ac9a9daSXin Li          VPrintf0 ( "\n" );
442*0ac9a9daSXin Li       }
443*0ac9a9daSXin Li 
444*0ac9a9daSXin Li       /*--
445*0ac9a9daSXin Li         Recompute the tables based on the accumulated frequencies.
446*0ac9a9daSXin Li       --*/
447*0ac9a9daSXin Li       /* maxLen was changed from 20 to 17 in bzip2-1.0.3.  See
448*0ac9a9daSXin Li          comment in huffman.c for details. */
449*0ac9a9daSXin Li       for (t = 0; t < nGroups; t++)
450*0ac9a9daSXin Li          BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
451*0ac9a9daSXin Li                                  alphaSize, 17 /*20*/ );
452*0ac9a9daSXin Li    }
453*0ac9a9daSXin Li 
454*0ac9a9daSXin Li 
455*0ac9a9daSXin Li    AssertH( nGroups < 8, 3002 );
456*0ac9a9daSXin Li    AssertH( nSelectors < 32768 &&
457*0ac9a9daSXin Li             nSelectors <= BZ_MAX_SELECTORS,
458*0ac9a9daSXin Li             3003 );
459*0ac9a9daSXin Li 
460*0ac9a9daSXin Li 
461*0ac9a9daSXin Li    /*--- Compute MTF values for the selectors. ---*/
462*0ac9a9daSXin Li    {
463*0ac9a9daSXin Li       UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
464*0ac9a9daSXin Li       for (i = 0; i < nGroups; i++) pos[i] = i;
465*0ac9a9daSXin Li       for (i = 0; i < nSelectors; i++) {
466*0ac9a9daSXin Li          ll_i = s->selector[i];
467*0ac9a9daSXin Li          j = 0;
468*0ac9a9daSXin Li          tmp = pos[j];
469*0ac9a9daSXin Li          while ( ll_i != tmp ) {
470*0ac9a9daSXin Li             j++;
471*0ac9a9daSXin Li             tmp2 = tmp;
472*0ac9a9daSXin Li             tmp = pos[j];
473*0ac9a9daSXin Li             pos[j] = tmp2;
474*0ac9a9daSXin Li          };
475*0ac9a9daSXin Li          pos[0] = tmp;
476*0ac9a9daSXin Li          s->selectorMtf[i] = j;
477*0ac9a9daSXin Li       }
478*0ac9a9daSXin Li    };
479*0ac9a9daSXin Li 
480*0ac9a9daSXin Li    /*--- Assign actual codes for the tables. --*/
481*0ac9a9daSXin Li    for (t = 0; t < nGroups; t++) {
482*0ac9a9daSXin Li       minLen = 32;
483*0ac9a9daSXin Li       maxLen = 0;
484*0ac9a9daSXin Li       for (i = 0; i < alphaSize; i++) {
485*0ac9a9daSXin Li          if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
486*0ac9a9daSXin Li          if (s->len[t][i] < minLen) minLen = s->len[t][i];
487*0ac9a9daSXin Li       }
488*0ac9a9daSXin Li       AssertH ( !(maxLen > 17 /*20*/ ), 3004 );
489*0ac9a9daSXin Li       AssertH ( !(minLen < 1),  3005 );
490*0ac9a9daSXin Li       BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
491*0ac9a9daSXin Li                           minLen, maxLen, alphaSize );
492*0ac9a9daSXin Li    }
493*0ac9a9daSXin Li 
494*0ac9a9daSXin Li    /*--- Transmit the mapping table. ---*/
495*0ac9a9daSXin Li    {
496*0ac9a9daSXin Li       Bool inUse16[16];
497*0ac9a9daSXin Li       for (i = 0; i < 16; i++) {
498*0ac9a9daSXin Li           inUse16[i] = False;
499*0ac9a9daSXin Li           for (j = 0; j < 16; j++)
500*0ac9a9daSXin Li              if (s->inUse[i * 16 + j]) inUse16[i] = True;
501*0ac9a9daSXin Li       }
502*0ac9a9daSXin Li 
503*0ac9a9daSXin Li       nBytes = s->numZ;
504*0ac9a9daSXin Li       for (i = 0; i < 16; i++)
505*0ac9a9daSXin Li          if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
506*0ac9a9daSXin Li 
507*0ac9a9daSXin Li       for (i = 0; i < 16; i++)
508*0ac9a9daSXin Li          if (inUse16[i])
509*0ac9a9daSXin Li             for (j = 0; j < 16; j++) {
510*0ac9a9daSXin Li                if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
511*0ac9a9daSXin Li             }
512*0ac9a9daSXin Li 
513*0ac9a9daSXin Li       if (s->verbosity >= 3)
514*0ac9a9daSXin Li          VPrintf1( "      bytes: mapping %d, ", s->numZ-nBytes );
515*0ac9a9daSXin Li    }
516*0ac9a9daSXin Li 
517*0ac9a9daSXin Li    /*--- Now the selectors. ---*/
518*0ac9a9daSXin Li    nBytes = s->numZ;
519*0ac9a9daSXin Li    bsW ( s, 3, nGroups );
520*0ac9a9daSXin Li    bsW ( s, 15, nSelectors );
521*0ac9a9daSXin Li    for (i = 0; i < nSelectors; i++) {
522*0ac9a9daSXin Li       for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
523*0ac9a9daSXin Li       bsW(s,1,0);
524*0ac9a9daSXin Li    }
525*0ac9a9daSXin Li    if (s->verbosity >= 3)
526*0ac9a9daSXin Li       VPrintf1( "selectors %d, ", s->numZ-nBytes );
527*0ac9a9daSXin Li 
528*0ac9a9daSXin Li    /*--- Now the coding tables. ---*/
529*0ac9a9daSXin Li    nBytes = s->numZ;
530*0ac9a9daSXin Li 
531*0ac9a9daSXin Li    for (t = 0; t < nGroups; t++) {
532*0ac9a9daSXin Li       Int32 curr = s->len[t][0];
533*0ac9a9daSXin Li       bsW ( s, 5, curr );
534*0ac9a9daSXin Li       for (i = 0; i < alphaSize; i++) {
535*0ac9a9daSXin Li          while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
536*0ac9a9daSXin Li          while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
537*0ac9a9daSXin Li          bsW ( s, 1, 0 );
538*0ac9a9daSXin Li       }
539*0ac9a9daSXin Li    }
540*0ac9a9daSXin Li 
541*0ac9a9daSXin Li    if (s->verbosity >= 3)
542*0ac9a9daSXin Li       VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
543*0ac9a9daSXin Li 
544*0ac9a9daSXin Li    /*--- And finally, the block data proper ---*/
545*0ac9a9daSXin Li    nBytes = s->numZ;
546*0ac9a9daSXin Li    selCtr = 0;
547*0ac9a9daSXin Li    gs = 0;
548*0ac9a9daSXin Li    while (True) {
549*0ac9a9daSXin Li       if (gs >= s->nMTF) break;
550*0ac9a9daSXin Li       ge = gs + BZ_G_SIZE - 1;
551*0ac9a9daSXin Li       if (ge >= s->nMTF) ge = s->nMTF-1;
552*0ac9a9daSXin Li       AssertH ( s->selector[selCtr] < nGroups, 3006 );
553*0ac9a9daSXin Li 
554*0ac9a9daSXin Li       if (nGroups == 6 && 50 == ge-gs+1) {
555*0ac9a9daSXin Li             /*--- fast track the common case ---*/
556*0ac9a9daSXin Li             UInt16 mtfv_i;
557*0ac9a9daSXin Li             UChar* s_len_sel_selCtr
558*0ac9a9daSXin Li                = &(s->len[s->selector[selCtr]][0]);
559*0ac9a9daSXin Li             Int32* s_code_sel_selCtr
560*0ac9a9daSXin Li                = &(s->code[s->selector[selCtr]][0]);
561*0ac9a9daSXin Li 
562*0ac9a9daSXin Li #           define BZ_ITAH(nn)                      \
563*0ac9a9daSXin Li                mtfv_i = mtfv[gs+(nn)];              \
564*0ac9a9daSXin Li                bsW ( s,                             \
565*0ac9a9daSXin Li                      s_len_sel_selCtr[mtfv_i],      \
566*0ac9a9daSXin Li                      s_code_sel_selCtr[mtfv_i] )
567*0ac9a9daSXin Li 
568*0ac9a9daSXin Li             BZ_ITAH(0);  BZ_ITAH(1);  BZ_ITAH(2);  BZ_ITAH(3);  BZ_ITAH(4);
569*0ac9a9daSXin Li             BZ_ITAH(5);  BZ_ITAH(6);  BZ_ITAH(7);  BZ_ITAH(8);  BZ_ITAH(9);
570*0ac9a9daSXin Li             BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
571*0ac9a9daSXin Li             BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
572*0ac9a9daSXin Li             BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
573*0ac9a9daSXin Li             BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
574*0ac9a9daSXin Li             BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
575*0ac9a9daSXin Li             BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
576*0ac9a9daSXin Li             BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
577*0ac9a9daSXin Li             BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
578*0ac9a9daSXin Li 
579*0ac9a9daSXin Li #           undef BZ_ITAH
580*0ac9a9daSXin Li 
581*0ac9a9daSXin Li       } else {
582*0ac9a9daSXin Li 	 /*--- slow version which correctly handles all situations ---*/
583*0ac9a9daSXin Li          for (i = gs; i <= ge; i++) {
584*0ac9a9daSXin Li             bsW ( s,
585*0ac9a9daSXin Li                   s->len  [s->selector[selCtr]] [mtfv[i]],
586*0ac9a9daSXin Li                   s->code [s->selector[selCtr]] [mtfv[i]] );
587*0ac9a9daSXin Li          }
588*0ac9a9daSXin Li       }
589*0ac9a9daSXin Li 
590*0ac9a9daSXin Li 
591*0ac9a9daSXin Li       gs = ge+1;
592*0ac9a9daSXin Li       selCtr++;
593*0ac9a9daSXin Li    }
594*0ac9a9daSXin Li    AssertH( selCtr == nSelectors, 3007 );
595*0ac9a9daSXin Li 
596*0ac9a9daSXin Li    if (s->verbosity >= 3)
597*0ac9a9daSXin Li       VPrintf1( "codes %d\n", s->numZ-nBytes );
598*0ac9a9daSXin Li }
599*0ac9a9daSXin Li 
600*0ac9a9daSXin Li 
601*0ac9a9daSXin Li /*---------------------------------------------------*/
BZ2_compressBlock(EState * s,Bool is_last_block)602*0ac9a9daSXin Li void BZ2_compressBlock ( EState* s, Bool is_last_block )
603*0ac9a9daSXin Li {
604*0ac9a9daSXin Li    if (s->nblock > 0) {
605*0ac9a9daSXin Li 
606*0ac9a9daSXin Li       BZ_FINALISE_CRC ( s->blockCRC );
607*0ac9a9daSXin Li       s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
608*0ac9a9daSXin Li       s->combinedCRC ^= s->blockCRC;
609*0ac9a9daSXin Li       if (s->blockNo > 1) s->numZ = 0;
610*0ac9a9daSXin Li 
611*0ac9a9daSXin Li       if (s->verbosity >= 2)
612*0ac9a9daSXin Li          VPrintf4( "    block %d: crc = 0x%08x, "
613*0ac9a9daSXin Li                    "combined CRC = 0x%08x, size = %d\n",
614*0ac9a9daSXin Li                    s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
615*0ac9a9daSXin Li 
616*0ac9a9daSXin Li       BZ2_blockSort ( s );
617*0ac9a9daSXin Li    }
618*0ac9a9daSXin Li 
619*0ac9a9daSXin Li    s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
620*0ac9a9daSXin Li 
621*0ac9a9daSXin Li    /*-- If this is the first block, create the stream header. --*/
622*0ac9a9daSXin Li    if (s->blockNo == 1) {
623*0ac9a9daSXin Li       BZ2_bsInitWrite ( s );
624*0ac9a9daSXin Li       bsPutUChar ( s, BZ_HDR_B );
625*0ac9a9daSXin Li       bsPutUChar ( s, BZ_HDR_Z );
626*0ac9a9daSXin Li       bsPutUChar ( s, BZ_HDR_h );
627*0ac9a9daSXin Li       bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
628*0ac9a9daSXin Li    }
629*0ac9a9daSXin Li 
630*0ac9a9daSXin Li    if (s->nblock > 0) {
631*0ac9a9daSXin Li 
632*0ac9a9daSXin Li       bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
633*0ac9a9daSXin Li       bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
634*0ac9a9daSXin Li       bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
635*0ac9a9daSXin Li 
636*0ac9a9daSXin Li       /*-- Now the block's CRC, so it is in a known place. --*/
637*0ac9a9daSXin Li       bsPutUInt32 ( s, s->blockCRC );
638*0ac9a9daSXin Li 
639*0ac9a9daSXin Li       /*--
640*0ac9a9daSXin Li          Now a single bit indicating (non-)randomisation.
641*0ac9a9daSXin Li          As of version 0.9.5, we use a better sorting algorithm
642*0ac9a9daSXin Li          which makes randomisation unnecessary.  So always set
643*0ac9a9daSXin Li          the randomised bit to 'no'.  Of course, the decoder
644*0ac9a9daSXin Li          still needs to be able to handle randomised blocks
645*0ac9a9daSXin Li          so as to maintain backwards compatibility with
646*0ac9a9daSXin Li          older versions of bzip2.
647*0ac9a9daSXin Li       --*/
648*0ac9a9daSXin Li       bsW(s,1,0);
649*0ac9a9daSXin Li 
650*0ac9a9daSXin Li       bsW ( s, 24, s->origPtr );
651*0ac9a9daSXin Li       generateMTFValues ( s );
652*0ac9a9daSXin Li       sendMTFValues ( s );
653*0ac9a9daSXin Li    }
654*0ac9a9daSXin Li 
655*0ac9a9daSXin Li 
656*0ac9a9daSXin Li    /*-- If this is the last block, add the stream trailer. --*/
657*0ac9a9daSXin Li    if (is_last_block) {
658*0ac9a9daSXin Li 
659*0ac9a9daSXin Li       bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
660*0ac9a9daSXin Li       bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
661*0ac9a9daSXin Li       bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
662*0ac9a9daSXin Li       bsPutUInt32 ( s, s->combinedCRC );
663*0ac9a9daSXin Li       if (s->verbosity >= 2)
664*0ac9a9daSXin Li          VPrintf1( "    final combined CRC = 0x%08x\n   ", s->combinedCRC );
665*0ac9a9daSXin Li       bsFinishWrite ( s );
666*0ac9a9daSXin Li    }
667*0ac9a9daSXin Li }
668*0ac9a9daSXin Li 
669*0ac9a9daSXin Li 
670*0ac9a9daSXin Li /*-------------------------------------------------------------*/
671*0ac9a9daSXin Li /*--- end                                        compress.c ---*/
672*0ac9a9daSXin Li /*-------------------------------------------------------------*/
673