1*0ac9a9daSXin Li
2*0ac9a9daSXin Li /*-------------------------------------------------------------*/
3*0ac9a9daSXin Li /*--- Block sorting machinery ---*/
4*0ac9a9daSXin Li /*--- blocksort.c ---*/
5*0ac9a9daSXin Li /*-------------------------------------------------------------*/
6*0ac9a9daSXin Li
7*0ac9a9daSXin Li /* ------------------------------------------------------------------
8*0ac9a9daSXin Li This file is part of bzip2/libbzip2, a program and library for
9*0ac9a9daSXin Li lossless, block-sorting data compression.
10*0ac9a9daSXin Li
11*0ac9a9daSXin Li bzip2/libbzip2 version 1.0.8 of 13 July 2019
12*0ac9a9daSXin Li Copyright (C) 1996-2019 Julian Seward <[email protected]>
13*0ac9a9daSXin Li
14*0ac9a9daSXin Li Please read the WARNING, DISCLAIMER and PATENTS sections in the
15*0ac9a9daSXin Li README file.
16*0ac9a9daSXin Li
17*0ac9a9daSXin Li This program is released under the terms of the license contained
18*0ac9a9daSXin Li in the file LICENSE.
19*0ac9a9daSXin Li ------------------------------------------------------------------ */
20*0ac9a9daSXin Li
21*0ac9a9daSXin Li
22*0ac9a9daSXin Li #include "bzlib_private.h"
23*0ac9a9daSXin Li
24*0ac9a9daSXin Li /*---------------------------------------------*/
25*0ac9a9daSXin Li /*--- Fallback O(N log(N)^2) sorting ---*/
26*0ac9a9daSXin Li /*--- algorithm, for repetitive blocks ---*/
27*0ac9a9daSXin Li /*---------------------------------------------*/
28*0ac9a9daSXin Li
29*0ac9a9daSXin Li /*---------------------------------------------*/
30*0ac9a9daSXin Li static
31*0ac9a9daSXin Li __inline__
fallbackSimpleSort(UInt32 * fmap,UInt32 * eclass,Int32 lo,Int32 hi)32*0ac9a9daSXin Li void fallbackSimpleSort ( UInt32* fmap,
33*0ac9a9daSXin Li UInt32* eclass,
34*0ac9a9daSXin Li Int32 lo,
35*0ac9a9daSXin Li Int32 hi )
36*0ac9a9daSXin Li {
37*0ac9a9daSXin Li Int32 i, j, tmp;
38*0ac9a9daSXin Li UInt32 ec_tmp;
39*0ac9a9daSXin Li
40*0ac9a9daSXin Li if (lo == hi) return;
41*0ac9a9daSXin Li
42*0ac9a9daSXin Li if (hi - lo > 3) {
43*0ac9a9daSXin Li for ( i = hi-4; i >= lo; i-- ) {
44*0ac9a9daSXin Li tmp = fmap[i];
45*0ac9a9daSXin Li ec_tmp = eclass[tmp];
46*0ac9a9daSXin Li for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
47*0ac9a9daSXin Li fmap[j-4] = fmap[j];
48*0ac9a9daSXin Li fmap[j-4] = tmp;
49*0ac9a9daSXin Li }
50*0ac9a9daSXin Li }
51*0ac9a9daSXin Li
52*0ac9a9daSXin Li for ( i = hi-1; i >= lo; i-- ) {
53*0ac9a9daSXin Li tmp = fmap[i];
54*0ac9a9daSXin Li ec_tmp = eclass[tmp];
55*0ac9a9daSXin Li for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
56*0ac9a9daSXin Li fmap[j-1] = fmap[j];
57*0ac9a9daSXin Li fmap[j-1] = tmp;
58*0ac9a9daSXin Li }
59*0ac9a9daSXin Li }
60*0ac9a9daSXin Li
61*0ac9a9daSXin Li
62*0ac9a9daSXin Li /*---------------------------------------------*/
63*0ac9a9daSXin Li #define fswap(zz1, zz2) \
64*0ac9a9daSXin Li { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
65*0ac9a9daSXin Li
66*0ac9a9daSXin Li #define fvswap(zzp1, zzp2, zzn) \
67*0ac9a9daSXin Li { \
68*0ac9a9daSXin Li Int32 yyp1 = (zzp1); \
69*0ac9a9daSXin Li Int32 yyp2 = (zzp2); \
70*0ac9a9daSXin Li Int32 yyn = (zzn); \
71*0ac9a9daSXin Li while (yyn > 0) { \
72*0ac9a9daSXin Li fswap(fmap[yyp1], fmap[yyp2]); \
73*0ac9a9daSXin Li yyp1++; yyp2++; yyn--; \
74*0ac9a9daSXin Li } \
75*0ac9a9daSXin Li }
76*0ac9a9daSXin Li
77*0ac9a9daSXin Li
78*0ac9a9daSXin Li #define fmin(a,b) ((a) < (b)) ? (a) : (b)
79*0ac9a9daSXin Li
80*0ac9a9daSXin Li #define fpush(lz,hz) { stackLo[sp] = lz; \
81*0ac9a9daSXin Li stackHi[sp] = hz; \
82*0ac9a9daSXin Li sp++; }
83*0ac9a9daSXin Li
84*0ac9a9daSXin Li #define fpop(lz,hz) { sp--; \
85*0ac9a9daSXin Li lz = stackLo[sp]; \
86*0ac9a9daSXin Li hz = stackHi[sp]; }
87*0ac9a9daSXin Li
88*0ac9a9daSXin Li #define FALLBACK_QSORT_SMALL_THRESH 10
89*0ac9a9daSXin Li #define FALLBACK_QSORT_STACK_SIZE 100
90*0ac9a9daSXin Li
91*0ac9a9daSXin Li
92*0ac9a9daSXin Li static
fallbackQSort3(UInt32 * fmap,UInt32 * eclass,Int32 loSt,Int32 hiSt)93*0ac9a9daSXin Li void fallbackQSort3 ( UInt32* fmap,
94*0ac9a9daSXin Li UInt32* eclass,
95*0ac9a9daSXin Li Int32 loSt,
96*0ac9a9daSXin Li Int32 hiSt )
97*0ac9a9daSXin Li {
98*0ac9a9daSXin Li Int32 unLo, unHi, ltLo, gtHi, n, m;
99*0ac9a9daSXin Li Int32 sp, lo, hi;
100*0ac9a9daSXin Li UInt32 med, r, r3;
101*0ac9a9daSXin Li Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
102*0ac9a9daSXin Li Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
103*0ac9a9daSXin Li
104*0ac9a9daSXin Li r = 0;
105*0ac9a9daSXin Li
106*0ac9a9daSXin Li sp = 0;
107*0ac9a9daSXin Li fpush ( loSt, hiSt );
108*0ac9a9daSXin Li
109*0ac9a9daSXin Li while (sp > 0) {
110*0ac9a9daSXin Li
111*0ac9a9daSXin Li AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
112*0ac9a9daSXin Li
113*0ac9a9daSXin Li fpop ( lo, hi );
114*0ac9a9daSXin Li if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
115*0ac9a9daSXin Li fallbackSimpleSort ( fmap, eclass, lo, hi );
116*0ac9a9daSXin Li continue;
117*0ac9a9daSXin Li }
118*0ac9a9daSXin Li
119*0ac9a9daSXin Li /* Random partitioning. Median of 3 sometimes fails to
120*0ac9a9daSXin Li avoid bad cases. Median of 9 seems to help but
121*0ac9a9daSXin Li looks rather expensive. This too seems to work but
122*0ac9a9daSXin Li is cheaper. Guidance for the magic constants
123*0ac9a9daSXin Li 7621 and 32768 is taken from Sedgewick's algorithms
124*0ac9a9daSXin Li book, chapter 35.
125*0ac9a9daSXin Li */
126*0ac9a9daSXin Li r = ((r * 7621) + 1) % 32768;
127*0ac9a9daSXin Li r3 = r % 3;
128*0ac9a9daSXin Li if (r3 == 0) med = eclass[fmap[lo]]; else
129*0ac9a9daSXin Li if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
130*0ac9a9daSXin Li med = eclass[fmap[hi]];
131*0ac9a9daSXin Li
132*0ac9a9daSXin Li unLo = ltLo = lo;
133*0ac9a9daSXin Li unHi = gtHi = hi;
134*0ac9a9daSXin Li
135*0ac9a9daSXin Li while (1) {
136*0ac9a9daSXin Li while (1) {
137*0ac9a9daSXin Li if (unLo > unHi) break;
138*0ac9a9daSXin Li n = (Int32)eclass[fmap[unLo]] - (Int32)med;
139*0ac9a9daSXin Li if (n == 0) {
140*0ac9a9daSXin Li fswap(fmap[unLo], fmap[ltLo]);
141*0ac9a9daSXin Li ltLo++; unLo++;
142*0ac9a9daSXin Li continue;
143*0ac9a9daSXin Li };
144*0ac9a9daSXin Li if (n > 0) break;
145*0ac9a9daSXin Li unLo++;
146*0ac9a9daSXin Li }
147*0ac9a9daSXin Li while (1) {
148*0ac9a9daSXin Li if (unLo > unHi) break;
149*0ac9a9daSXin Li n = (Int32)eclass[fmap[unHi]] - (Int32)med;
150*0ac9a9daSXin Li if (n == 0) {
151*0ac9a9daSXin Li fswap(fmap[unHi], fmap[gtHi]);
152*0ac9a9daSXin Li gtHi--; unHi--;
153*0ac9a9daSXin Li continue;
154*0ac9a9daSXin Li };
155*0ac9a9daSXin Li if (n < 0) break;
156*0ac9a9daSXin Li unHi--;
157*0ac9a9daSXin Li }
158*0ac9a9daSXin Li if (unLo > unHi) break;
159*0ac9a9daSXin Li fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
160*0ac9a9daSXin Li }
161*0ac9a9daSXin Li
162*0ac9a9daSXin Li AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
163*0ac9a9daSXin Li
164*0ac9a9daSXin Li if (gtHi < ltLo) continue;
165*0ac9a9daSXin Li
166*0ac9a9daSXin Li n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
167*0ac9a9daSXin Li m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
168*0ac9a9daSXin Li
169*0ac9a9daSXin Li n = lo + unLo - ltLo - 1;
170*0ac9a9daSXin Li m = hi - (gtHi - unHi) + 1;
171*0ac9a9daSXin Li
172*0ac9a9daSXin Li if (n - lo > hi - m) {
173*0ac9a9daSXin Li fpush ( lo, n );
174*0ac9a9daSXin Li fpush ( m, hi );
175*0ac9a9daSXin Li } else {
176*0ac9a9daSXin Li fpush ( m, hi );
177*0ac9a9daSXin Li fpush ( lo, n );
178*0ac9a9daSXin Li }
179*0ac9a9daSXin Li }
180*0ac9a9daSXin Li }
181*0ac9a9daSXin Li
182*0ac9a9daSXin Li #undef fmin
183*0ac9a9daSXin Li #undef fpush
184*0ac9a9daSXin Li #undef fpop
185*0ac9a9daSXin Li #undef fswap
186*0ac9a9daSXin Li #undef fvswap
187*0ac9a9daSXin Li #undef FALLBACK_QSORT_SMALL_THRESH
188*0ac9a9daSXin Li #undef FALLBACK_QSORT_STACK_SIZE
189*0ac9a9daSXin Li
190*0ac9a9daSXin Li
191*0ac9a9daSXin Li /*---------------------------------------------*/
192*0ac9a9daSXin Li /* Pre:
193*0ac9a9daSXin Li nblock > 0
194*0ac9a9daSXin Li eclass exists for [0 .. nblock-1]
195*0ac9a9daSXin Li ((UChar*)eclass) [0 .. nblock-1] holds block
196*0ac9a9daSXin Li ptr exists for [0 .. nblock-1]
197*0ac9a9daSXin Li
198*0ac9a9daSXin Li Post:
199*0ac9a9daSXin Li ((UChar*)eclass) [0 .. nblock-1] holds block
200*0ac9a9daSXin Li All other areas of eclass destroyed
201*0ac9a9daSXin Li fmap [0 .. nblock-1] holds sorted order
202*0ac9a9daSXin Li bhtab [ 0 .. 2+(nblock/32) ] destroyed
203*0ac9a9daSXin Li */
204*0ac9a9daSXin Li
205*0ac9a9daSXin Li #define SET_BH(zz) bhtab[(zz) >> 5] |= ((UInt32)1 << ((zz) & 31))
206*0ac9a9daSXin Li #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~((UInt32)1 << ((zz) & 31))
207*0ac9a9daSXin Li #define ISSET_BH(zz) (bhtab[(zz) >> 5] & ((UInt32)1 << ((zz) & 31)))
208*0ac9a9daSXin Li #define WORD_BH(zz) bhtab[(zz) >> 5]
209*0ac9a9daSXin Li #define UNALIGNED_BH(zz) ((zz) & 0x01f)
210*0ac9a9daSXin Li
211*0ac9a9daSXin Li static
fallbackSort(UInt32 * fmap,UInt32 * eclass,UInt32 * bhtab,Int32 nblock,Int32 verb)212*0ac9a9daSXin Li void fallbackSort ( UInt32* fmap,
213*0ac9a9daSXin Li UInt32* eclass,
214*0ac9a9daSXin Li UInt32* bhtab,
215*0ac9a9daSXin Li Int32 nblock,
216*0ac9a9daSXin Li Int32 verb )
217*0ac9a9daSXin Li {
218*0ac9a9daSXin Li Int32 ftab[257];
219*0ac9a9daSXin Li Int32 ftabCopy[256];
220*0ac9a9daSXin Li Int32 H, i, j, k, l, r, cc, cc1;
221*0ac9a9daSXin Li Int32 nNotDone;
222*0ac9a9daSXin Li Int32 nBhtab;
223*0ac9a9daSXin Li UChar* eclass8 = (UChar*)eclass;
224*0ac9a9daSXin Li
225*0ac9a9daSXin Li /*--
226*0ac9a9daSXin Li Initial 1-char radix sort to generate
227*0ac9a9daSXin Li initial fmap and initial BH bits.
228*0ac9a9daSXin Li --*/
229*0ac9a9daSXin Li if (verb >= 4)
230*0ac9a9daSXin Li VPrintf0 ( " bucket sorting ...\n" );
231*0ac9a9daSXin Li for (i = 0; i < 257; i++) ftab[i] = 0;
232*0ac9a9daSXin Li for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
233*0ac9a9daSXin Li for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
234*0ac9a9daSXin Li for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
235*0ac9a9daSXin Li
236*0ac9a9daSXin Li for (i = 0; i < nblock; i++) {
237*0ac9a9daSXin Li j = eclass8[i];
238*0ac9a9daSXin Li k = ftab[j] - 1;
239*0ac9a9daSXin Li ftab[j] = k;
240*0ac9a9daSXin Li fmap[k] = i;
241*0ac9a9daSXin Li }
242*0ac9a9daSXin Li
243*0ac9a9daSXin Li nBhtab = 2 + (nblock / 32);
244*0ac9a9daSXin Li for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
245*0ac9a9daSXin Li for (i = 0; i < 256; i++) SET_BH(ftab[i]);
246*0ac9a9daSXin Li
247*0ac9a9daSXin Li /*--
248*0ac9a9daSXin Li Inductively refine the buckets. Kind-of an
249*0ac9a9daSXin Li "exponential radix sort" (!), inspired by the
250*0ac9a9daSXin Li Manber-Myers suffix array construction algorithm.
251*0ac9a9daSXin Li --*/
252*0ac9a9daSXin Li
253*0ac9a9daSXin Li /*-- set sentinel bits for block-end detection --*/
254*0ac9a9daSXin Li for (i = 0; i < 32; i++) {
255*0ac9a9daSXin Li SET_BH(nblock + 2*i);
256*0ac9a9daSXin Li CLEAR_BH(nblock + 2*i + 1);
257*0ac9a9daSXin Li }
258*0ac9a9daSXin Li
259*0ac9a9daSXin Li /*-- the log(N) loop --*/
260*0ac9a9daSXin Li H = 1;
261*0ac9a9daSXin Li while (1) {
262*0ac9a9daSXin Li
263*0ac9a9daSXin Li if (verb >= 4)
264*0ac9a9daSXin Li VPrintf1 ( " depth %6d has ", H );
265*0ac9a9daSXin Li
266*0ac9a9daSXin Li j = 0;
267*0ac9a9daSXin Li for (i = 0; i < nblock; i++) {
268*0ac9a9daSXin Li if (ISSET_BH(i)) j = i;
269*0ac9a9daSXin Li k = fmap[i] - H; if (k < 0) k += nblock;
270*0ac9a9daSXin Li eclass[k] = j;
271*0ac9a9daSXin Li }
272*0ac9a9daSXin Li
273*0ac9a9daSXin Li nNotDone = 0;
274*0ac9a9daSXin Li r = -1;
275*0ac9a9daSXin Li while (1) {
276*0ac9a9daSXin Li
277*0ac9a9daSXin Li /*-- find the next non-singleton bucket --*/
278*0ac9a9daSXin Li k = r + 1;
279*0ac9a9daSXin Li while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
280*0ac9a9daSXin Li if (ISSET_BH(k)) {
281*0ac9a9daSXin Li while (WORD_BH(k) == 0xffffffff) k += 32;
282*0ac9a9daSXin Li while (ISSET_BH(k)) k++;
283*0ac9a9daSXin Li }
284*0ac9a9daSXin Li l = k - 1;
285*0ac9a9daSXin Li if (l >= nblock) break;
286*0ac9a9daSXin Li while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
287*0ac9a9daSXin Li if (!ISSET_BH(k)) {
288*0ac9a9daSXin Li while (WORD_BH(k) == 0x00000000) k += 32;
289*0ac9a9daSXin Li while (!ISSET_BH(k)) k++;
290*0ac9a9daSXin Li }
291*0ac9a9daSXin Li r = k - 1;
292*0ac9a9daSXin Li if (r >= nblock) break;
293*0ac9a9daSXin Li
294*0ac9a9daSXin Li /*-- now [l, r] bracket current bucket --*/
295*0ac9a9daSXin Li if (r > l) {
296*0ac9a9daSXin Li nNotDone += (r - l + 1);
297*0ac9a9daSXin Li fallbackQSort3 ( fmap, eclass, l, r );
298*0ac9a9daSXin Li
299*0ac9a9daSXin Li /*-- scan bucket and generate header bits-- */
300*0ac9a9daSXin Li cc = -1;
301*0ac9a9daSXin Li for (i = l; i <= r; i++) {
302*0ac9a9daSXin Li cc1 = eclass[fmap[i]];
303*0ac9a9daSXin Li if (cc != cc1) { SET_BH(i); cc = cc1; };
304*0ac9a9daSXin Li }
305*0ac9a9daSXin Li }
306*0ac9a9daSXin Li }
307*0ac9a9daSXin Li
308*0ac9a9daSXin Li if (verb >= 4)
309*0ac9a9daSXin Li VPrintf1 ( "%6d unresolved strings\n", nNotDone );
310*0ac9a9daSXin Li
311*0ac9a9daSXin Li H *= 2;
312*0ac9a9daSXin Li if (H > nblock || nNotDone == 0) break;
313*0ac9a9daSXin Li }
314*0ac9a9daSXin Li
315*0ac9a9daSXin Li /*--
316*0ac9a9daSXin Li Reconstruct the original block in
317*0ac9a9daSXin Li eclass8 [0 .. nblock-1], since the
318*0ac9a9daSXin Li previous phase destroyed it.
319*0ac9a9daSXin Li --*/
320*0ac9a9daSXin Li if (verb >= 4)
321*0ac9a9daSXin Li VPrintf0 ( " reconstructing block ...\n" );
322*0ac9a9daSXin Li j = 0;
323*0ac9a9daSXin Li for (i = 0; i < nblock; i++) {
324*0ac9a9daSXin Li while (ftabCopy[j] == 0) j++;
325*0ac9a9daSXin Li ftabCopy[j]--;
326*0ac9a9daSXin Li eclass8[fmap[i]] = (UChar)j;
327*0ac9a9daSXin Li }
328*0ac9a9daSXin Li AssertH ( j < 256, 1005 );
329*0ac9a9daSXin Li }
330*0ac9a9daSXin Li
331*0ac9a9daSXin Li #undef SET_BH
332*0ac9a9daSXin Li #undef CLEAR_BH
333*0ac9a9daSXin Li #undef ISSET_BH
334*0ac9a9daSXin Li #undef WORD_BH
335*0ac9a9daSXin Li #undef UNALIGNED_BH
336*0ac9a9daSXin Li
337*0ac9a9daSXin Li
338*0ac9a9daSXin Li /*---------------------------------------------*/
339*0ac9a9daSXin Li /*--- The main, O(N^2 log(N)) sorting ---*/
340*0ac9a9daSXin Li /*--- algorithm. Faster for "normal" ---*/
341*0ac9a9daSXin Li /*--- non-repetitive blocks. ---*/
342*0ac9a9daSXin Li /*---------------------------------------------*/
343*0ac9a9daSXin Li
344*0ac9a9daSXin Li /*---------------------------------------------*/
345*0ac9a9daSXin Li static
346*0ac9a9daSXin Li __inline__
mainGtU(UInt32 i1,UInt32 i2,UChar * block,UInt16 * quadrant,UInt32 nblock,Int32 * budget)347*0ac9a9daSXin Li Bool mainGtU ( UInt32 i1,
348*0ac9a9daSXin Li UInt32 i2,
349*0ac9a9daSXin Li UChar* block,
350*0ac9a9daSXin Li UInt16* quadrant,
351*0ac9a9daSXin Li UInt32 nblock,
352*0ac9a9daSXin Li Int32* budget )
353*0ac9a9daSXin Li {
354*0ac9a9daSXin Li Int32 k;
355*0ac9a9daSXin Li UChar c1, c2;
356*0ac9a9daSXin Li UInt16 s1, s2;
357*0ac9a9daSXin Li
358*0ac9a9daSXin Li AssertD ( i1 != i2, "mainGtU" );
359*0ac9a9daSXin Li /* 1 */
360*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
361*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
362*0ac9a9daSXin Li i1++; i2++;
363*0ac9a9daSXin Li /* 2 */
364*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
365*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
366*0ac9a9daSXin Li i1++; i2++;
367*0ac9a9daSXin Li /* 3 */
368*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
369*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
370*0ac9a9daSXin Li i1++; i2++;
371*0ac9a9daSXin Li /* 4 */
372*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
373*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
374*0ac9a9daSXin Li i1++; i2++;
375*0ac9a9daSXin Li /* 5 */
376*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
377*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
378*0ac9a9daSXin Li i1++; i2++;
379*0ac9a9daSXin Li /* 6 */
380*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
381*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
382*0ac9a9daSXin Li i1++; i2++;
383*0ac9a9daSXin Li /* 7 */
384*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
385*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
386*0ac9a9daSXin Li i1++; i2++;
387*0ac9a9daSXin Li /* 8 */
388*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
389*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
390*0ac9a9daSXin Li i1++; i2++;
391*0ac9a9daSXin Li /* 9 */
392*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
393*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
394*0ac9a9daSXin Li i1++; i2++;
395*0ac9a9daSXin Li /* 10 */
396*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
397*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
398*0ac9a9daSXin Li i1++; i2++;
399*0ac9a9daSXin Li /* 11 */
400*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
401*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
402*0ac9a9daSXin Li i1++; i2++;
403*0ac9a9daSXin Li /* 12 */
404*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
405*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
406*0ac9a9daSXin Li i1++; i2++;
407*0ac9a9daSXin Li
408*0ac9a9daSXin Li k = nblock + 8;
409*0ac9a9daSXin Li
410*0ac9a9daSXin Li do {
411*0ac9a9daSXin Li /* 1 */
412*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
413*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
414*0ac9a9daSXin Li s1 = quadrant[i1]; s2 = quadrant[i2];
415*0ac9a9daSXin Li if (s1 != s2) return (s1 > s2);
416*0ac9a9daSXin Li i1++; i2++;
417*0ac9a9daSXin Li /* 2 */
418*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
419*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
420*0ac9a9daSXin Li s1 = quadrant[i1]; s2 = quadrant[i2];
421*0ac9a9daSXin Li if (s1 != s2) return (s1 > s2);
422*0ac9a9daSXin Li i1++; i2++;
423*0ac9a9daSXin Li /* 3 */
424*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
425*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
426*0ac9a9daSXin Li s1 = quadrant[i1]; s2 = quadrant[i2];
427*0ac9a9daSXin Li if (s1 != s2) return (s1 > s2);
428*0ac9a9daSXin Li i1++; i2++;
429*0ac9a9daSXin Li /* 4 */
430*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
431*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
432*0ac9a9daSXin Li s1 = quadrant[i1]; s2 = quadrant[i2];
433*0ac9a9daSXin Li if (s1 != s2) return (s1 > s2);
434*0ac9a9daSXin Li i1++; i2++;
435*0ac9a9daSXin Li /* 5 */
436*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
437*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
438*0ac9a9daSXin Li s1 = quadrant[i1]; s2 = quadrant[i2];
439*0ac9a9daSXin Li if (s1 != s2) return (s1 > s2);
440*0ac9a9daSXin Li i1++; i2++;
441*0ac9a9daSXin Li /* 6 */
442*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
443*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
444*0ac9a9daSXin Li s1 = quadrant[i1]; s2 = quadrant[i2];
445*0ac9a9daSXin Li if (s1 != s2) return (s1 > s2);
446*0ac9a9daSXin Li i1++; i2++;
447*0ac9a9daSXin Li /* 7 */
448*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
449*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
450*0ac9a9daSXin Li s1 = quadrant[i1]; s2 = quadrant[i2];
451*0ac9a9daSXin Li if (s1 != s2) return (s1 > s2);
452*0ac9a9daSXin Li i1++; i2++;
453*0ac9a9daSXin Li /* 8 */
454*0ac9a9daSXin Li c1 = block[i1]; c2 = block[i2];
455*0ac9a9daSXin Li if (c1 != c2) return (c1 > c2);
456*0ac9a9daSXin Li s1 = quadrant[i1]; s2 = quadrant[i2];
457*0ac9a9daSXin Li if (s1 != s2) return (s1 > s2);
458*0ac9a9daSXin Li i1++; i2++;
459*0ac9a9daSXin Li
460*0ac9a9daSXin Li if (i1 >= nblock) i1 -= nblock;
461*0ac9a9daSXin Li if (i2 >= nblock) i2 -= nblock;
462*0ac9a9daSXin Li
463*0ac9a9daSXin Li k -= 8;
464*0ac9a9daSXin Li (*budget)--;
465*0ac9a9daSXin Li }
466*0ac9a9daSXin Li while (k >= 0);
467*0ac9a9daSXin Li
468*0ac9a9daSXin Li return False;
469*0ac9a9daSXin Li }
470*0ac9a9daSXin Li
471*0ac9a9daSXin Li
472*0ac9a9daSXin Li /*---------------------------------------------*/
473*0ac9a9daSXin Li /*--
474*0ac9a9daSXin Li Knuth's increments seem to work better
475*0ac9a9daSXin Li than Incerpi-Sedgewick here. Possibly
476*0ac9a9daSXin Li because the number of elems to sort is
477*0ac9a9daSXin Li usually small, typically <= 20.
478*0ac9a9daSXin Li --*/
479*0ac9a9daSXin Li static
480*0ac9a9daSXin Li Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
481*0ac9a9daSXin Li 9841, 29524, 88573, 265720,
482*0ac9a9daSXin Li 797161, 2391484 };
483*0ac9a9daSXin Li
484*0ac9a9daSXin Li static
mainSimpleSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 lo,Int32 hi,Int32 d,Int32 * budget)485*0ac9a9daSXin Li void mainSimpleSort ( UInt32* ptr,
486*0ac9a9daSXin Li UChar* block,
487*0ac9a9daSXin Li UInt16* quadrant,
488*0ac9a9daSXin Li Int32 nblock,
489*0ac9a9daSXin Li Int32 lo,
490*0ac9a9daSXin Li Int32 hi,
491*0ac9a9daSXin Li Int32 d,
492*0ac9a9daSXin Li Int32* budget )
493*0ac9a9daSXin Li {
494*0ac9a9daSXin Li Int32 i, j, h, bigN, hp;
495*0ac9a9daSXin Li UInt32 v;
496*0ac9a9daSXin Li
497*0ac9a9daSXin Li bigN = hi - lo + 1;
498*0ac9a9daSXin Li if (bigN < 2) return;
499*0ac9a9daSXin Li
500*0ac9a9daSXin Li hp = 0;
501*0ac9a9daSXin Li while (incs[hp] < bigN) hp++;
502*0ac9a9daSXin Li hp--;
503*0ac9a9daSXin Li
504*0ac9a9daSXin Li for (; hp >= 0; hp--) {
505*0ac9a9daSXin Li h = incs[hp];
506*0ac9a9daSXin Li
507*0ac9a9daSXin Li i = lo + h;
508*0ac9a9daSXin Li while (True) {
509*0ac9a9daSXin Li
510*0ac9a9daSXin Li /*-- copy 1 --*/
511*0ac9a9daSXin Li if (i > hi) break;
512*0ac9a9daSXin Li v = ptr[i];
513*0ac9a9daSXin Li j = i;
514*0ac9a9daSXin Li while ( mainGtU (
515*0ac9a9daSXin Li ptr[j-h]+d, v+d, block, quadrant, nblock, budget
516*0ac9a9daSXin Li ) ) {
517*0ac9a9daSXin Li ptr[j] = ptr[j-h];
518*0ac9a9daSXin Li j = j - h;
519*0ac9a9daSXin Li if (j <= (lo + h - 1)) break;
520*0ac9a9daSXin Li }
521*0ac9a9daSXin Li ptr[j] = v;
522*0ac9a9daSXin Li i++;
523*0ac9a9daSXin Li
524*0ac9a9daSXin Li /*-- copy 2 --*/
525*0ac9a9daSXin Li if (i > hi) break;
526*0ac9a9daSXin Li v = ptr[i];
527*0ac9a9daSXin Li j = i;
528*0ac9a9daSXin Li while ( mainGtU (
529*0ac9a9daSXin Li ptr[j-h]+d, v+d, block, quadrant, nblock, budget
530*0ac9a9daSXin Li ) ) {
531*0ac9a9daSXin Li ptr[j] = ptr[j-h];
532*0ac9a9daSXin Li j = j - h;
533*0ac9a9daSXin Li if (j <= (lo + h - 1)) break;
534*0ac9a9daSXin Li }
535*0ac9a9daSXin Li ptr[j] = v;
536*0ac9a9daSXin Li i++;
537*0ac9a9daSXin Li
538*0ac9a9daSXin Li /*-- copy 3 --*/
539*0ac9a9daSXin Li if (i > hi) break;
540*0ac9a9daSXin Li v = ptr[i];
541*0ac9a9daSXin Li j = i;
542*0ac9a9daSXin Li while ( mainGtU (
543*0ac9a9daSXin Li ptr[j-h]+d, v+d, block, quadrant, nblock, budget
544*0ac9a9daSXin Li ) ) {
545*0ac9a9daSXin Li ptr[j] = ptr[j-h];
546*0ac9a9daSXin Li j = j - h;
547*0ac9a9daSXin Li if (j <= (lo + h - 1)) break;
548*0ac9a9daSXin Li }
549*0ac9a9daSXin Li ptr[j] = v;
550*0ac9a9daSXin Li i++;
551*0ac9a9daSXin Li
552*0ac9a9daSXin Li if (*budget < 0) return;
553*0ac9a9daSXin Li }
554*0ac9a9daSXin Li }
555*0ac9a9daSXin Li }
556*0ac9a9daSXin Li
557*0ac9a9daSXin Li
558*0ac9a9daSXin Li /*---------------------------------------------*/
559*0ac9a9daSXin Li /*--
560*0ac9a9daSXin Li The following is an implementation of
561*0ac9a9daSXin Li an elegant 3-way quicksort for strings,
562*0ac9a9daSXin Li described in a paper "Fast Algorithms for
563*0ac9a9daSXin Li Sorting and Searching Strings", by Robert
564*0ac9a9daSXin Li Sedgewick and Jon L. Bentley.
565*0ac9a9daSXin Li --*/
566*0ac9a9daSXin Li
567*0ac9a9daSXin Li #define mswap(zz1, zz2) \
568*0ac9a9daSXin Li { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
569*0ac9a9daSXin Li
570*0ac9a9daSXin Li #define mvswap(zzp1, zzp2, zzn) \
571*0ac9a9daSXin Li { \
572*0ac9a9daSXin Li Int32 yyp1 = (zzp1); \
573*0ac9a9daSXin Li Int32 yyp2 = (zzp2); \
574*0ac9a9daSXin Li Int32 yyn = (zzn); \
575*0ac9a9daSXin Li while (yyn > 0) { \
576*0ac9a9daSXin Li mswap(ptr[yyp1], ptr[yyp2]); \
577*0ac9a9daSXin Li yyp1++; yyp2++; yyn--; \
578*0ac9a9daSXin Li } \
579*0ac9a9daSXin Li }
580*0ac9a9daSXin Li
581*0ac9a9daSXin Li static
582*0ac9a9daSXin Li __inline__
mmed3(UChar a,UChar b,UChar c)583*0ac9a9daSXin Li UChar mmed3 ( UChar a, UChar b, UChar c )
584*0ac9a9daSXin Li {
585*0ac9a9daSXin Li UChar t;
586*0ac9a9daSXin Li if (a > b) { t = a; a = b; b = t; };
587*0ac9a9daSXin Li if (b > c) {
588*0ac9a9daSXin Li b = c;
589*0ac9a9daSXin Li if (a > b) b = a;
590*0ac9a9daSXin Li }
591*0ac9a9daSXin Li return b;
592*0ac9a9daSXin Li }
593*0ac9a9daSXin Li
594*0ac9a9daSXin Li #define mmin(a,b) ((a) < (b)) ? (a) : (b)
595*0ac9a9daSXin Li
596*0ac9a9daSXin Li #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
597*0ac9a9daSXin Li stackHi[sp] = hz; \
598*0ac9a9daSXin Li stackD [sp] = dz; \
599*0ac9a9daSXin Li sp++; }
600*0ac9a9daSXin Li
601*0ac9a9daSXin Li #define mpop(lz,hz,dz) { sp--; \
602*0ac9a9daSXin Li lz = stackLo[sp]; \
603*0ac9a9daSXin Li hz = stackHi[sp]; \
604*0ac9a9daSXin Li dz = stackD [sp]; }
605*0ac9a9daSXin Li
606*0ac9a9daSXin Li
607*0ac9a9daSXin Li #define mnextsize(az) (nextHi[az]-nextLo[az])
608*0ac9a9daSXin Li
609*0ac9a9daSXin Li #define mnextswap(az,bz) \
610*0ac9a9daSXin Li { Int32 tz; \
611*0ac9a9daSXin Li tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
612*0ac9a9daSXin Li tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
613*0ac9a9daSXin Li tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
614*0ac9a9daSXin Li
615*0ac9a9daSXin Li
616*0ac9a9daSXin Li #define MAIN_QSORT_SMALL_THRESH 20
617*0ac9a9daSXin Li #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
618*0ac9a9daSXin Li #define MAIN_QSORT_STACK_SIZE 100
619*0ac9a9daSXin Li
620*0ac9a9daSXin Li static
mainQSort3(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 loSt,Int32 hiSt,Int32 dSt,Int32 * budget)621*0ac9a9daSXin Li void mainQSort3 ( UInt32* ptr,
622*0ac9a9daSXin Li UChar* block,
623*0ac9a9daSXin Li UInt16* quadrant,
624*0ac9a9daSXin Li Int32 nblock,
625*0ac9a9daSXin Li Int32 loSt,
626*0ac9a9daSXin Li Int32 hiSt,
627*0ac9a9daSXin Li Int32 dSt,
628*0ac9a9daSXin Li Int32* budget )
629*0ac9a9daSXin Li {
630*0ac9a9daSXin Li Int32 unLo, unHi, ltLo, gtHi, n, m, med;
631*0ac9a9daSXin Li Int32 sp, lo, hi, d;
632*0ac9a9daSXin Li
633*0ac9a9daSXin Li Int32 stackLo[MAIN_QSORT_STACK_SIZE];
634*0ac9a9daSXin Li Int32 stackHi[MAIN_QSORT_STACK_SIZE];
635*0ac9a9daSXin Li Int32 stackD [MAIN_QSORT_STACK_SIZE];
636*0ac9a9daSXin Li
637*0ac9a9daSXin Li Int32 nextLo[3];
638*0ac9a9daSXin Li Int32 nextHi[3];
639*0ac9a9daSXin Li Int32 nextD [3];
640*0ac9a9daSXin Li
641*0ac9a9daSXin Li sp = 0;
642*0ac9a9daSXin Li mpush ( loSt, hiSt, dSt );
643*0ac9a9daSXin Li
644*0ac9a9daSXin Li while (sp > 0) {
645*0ac9a9daSXin Li
646*0ac9a9daSXin Li AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
647*0ac9a9daSXin Li
648*0ac9a9daSXin Li mpop ( lo, hi, d );
649*0ac9a9daSXin Li if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
650*0ac9a9daSXin Li d > MAIN_QSORT_DEPTH_THRESH) {
651*0ac9a9daSXin Li mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
652*0ac9a9daSXin Li if (*budget < 0) return;
653*0ac9a9daSXin Li continue;
654*0ac9a9daSXin Li }
655*0ac9a9daSXin Li
656*0ac9a9daSXin Li med = (Int32)
657*0ac9a9daSXin Li mmed3 ( block[ptr[ lo ]+d],
658*0ac9a9daSXin Li block[ptr[ hi ]+d],
659*0ac9a9daSXin Li block[ptr[ (lo+hi)>>1 ]+d] );
660*0ac9a9daSXin Li
661*0ac9a9daSXin Li unLo = ltLo = lo;
662*0ac9a9daSXin Li unHi = gtHi = hi;
663*0ac9a9daSXin Li
664*0ac9a9daSXin Li while (True) {
665*0ac9a9daSXin Li while (True) {
666*0ac9a9daSXin Li if (unLo > unHi) break;
667*0ac9a9daSXin Li n = ((Int32)block[ptr[unLo]+d]) - med;
668*0ac9a9daSXin Li if (n == 0) {
669*0ac9a9daSXin Li mswap(ptr[unLo], ptr[ltLo]);
670*0ac9a9daSXin Li ltLo++; unLo++; continue;
671*0ac9a9daSXin Li };
672*0ac9a9daSXin Li if (n > 0) break;
673*0ac9a9daSXin Li unLo++;
674*0ac9a9daSXin Li }
675*0ac9a9daSXin Li while (True) {
676*0ac9a9daSXin Li if (unLo > unHi) break;
677*0ac9a9daSXin Li n = ((Int32)block[ptr[unHi]+d]) - med;
678*0ac9a9daSXin Li if (n == 0) {
679*0ac9a9daSXin Li mswap(ptr[unHi], ptr[gtHi]);
680*0ac9a9daSXin Li gtHi--; unHi--; continue;
681*0ac9a9daSXin Li };
682*0ac9a9daSXin Li if (n < 0) break;
683*0ac9a9daSXin Li unHi--;
684*0ac9a9daSXin Li }
685*0ac9a9daSXin Li if (unLo > unHi) break;
686*0ac9a9daSXin Li mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
687*0ac9a9daSXin Li }
688*0ac9a9daSXin Li
689*0ac9a9daSXin Li AssertD ( unHi == unLo-1, "mainQSort3(2)" );
690*0ac9a9daSXin Li
691*0ac9a9daSXin Li if (gtHi < ltLo) {
692*0ac9a9daSXin Li mpush(lo, hi, d+1 );
693*0ac9a9daSXin Li continue;
694*0ac9a9daSXin Li }
695*0ac9a9daSXin Li
696*0ac9a9daSXin Li n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
697*0ac9a9daSXin Li m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
698*0ac9a9daSXin Li
699*0ac9a9daSXin Li n = lo + unLo - ltLo - 1;
700*0ac9a9daSXin Li m = hi - (gtHi - unHi) + 1;
701*0ac9a9daSXin Li
702*0ac9a9daSXin Li nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
703*0ac9a9daSXin Li nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
704*0ac9a9daSXin Li nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
705*0ac9a9daSXin Li
706*0ac9a9daSXin Li if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
707*0ac9a9daSXin Li if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
708*0ac9a9daSXin Li if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
709*0ac9a9daSXin Li
710*0ac9a9daSXin Li AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
711*0ac9a9daSXin Li AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
712*0ac9a9daSXin Li
713*0ac9a9daSXin Li mpush (nextLo[0], nextHi[0], nextD[0]);
714*0ac9a9daSXin Li mpush (nextLo[1], nextHi[1], nextD[1]);
715*0ac9a9daSXin Li mpush (nextLo[2], nextHi[2], nextD[2]);
716*0ac9a9daSXin Li }
717*0ac9a9daSXin Li }
718*0ac9a9daSXin Li
719*0ac9a9daSXin Li #undef mswap
720*0ac9a9daSXin Li #undef mvswap
721*0ac9a9daSXin Li #undef mpush
722*0ac9a9daSXin Li #undef mpop
723*0ac9a9daSXin Li #undef mmin
724*0ac9a9daSXin Li #undef mnextsize
725*0ac9a9daSXin Li #undef mnextswap
726*0ac9a9daSXin Li #undef MAIN_QSORT_SMALL_THRESH
727*0ac9a9daSXin Li #undef MAIN_QSORT_DEPTH_THRESH
728*0ac9a9daSXin Li #undef MAIN_QSORT_STACK_SIZE
729*0ac9a9daSXin Li
730*0ac9a9daSXin Li
731*0ac9a9daSXin Li /*---------------------------------------------*/
732*0ac9a9daSXin Li /* Pre:
733*0ac9a9daSXin Li nblock > N_OVERSHOOT
734*0ac9a9daSXin Li block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
735*0ac9a9daSXin Li ((UChar*)block32) [0 .. nblock-1] holds block
736*0ac9a9daSXin Li ptr exists for [0 .. nblock-1]
737*0ac9a9daSXin Li
738*0ac9a9daSXin Li Post:
739*0ac9a9daSXin Li ((UChar*)block32) [0 .. nblock-1] holds block
740*0ac9a9daSXin Li All other areas of block32 destroyed
741*0ac9a9daSXin Li ftab [0 .. 65536 ] destroyed
742*0ac9a9daSXin Li ptr [0 .. nblock-1] holds sorted order
743*0ac9a9daSXin Li if (*budget < 0), sorting was abandoned
744*0ac9a9daSXin Li */
745*0ac9a9daSXin Li
746*0ac9a9daSXin Li #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
747*0ac9a9daSXin Li #define SETMASK (1 << 21)
748*0ac9a9daSXin Li #define CLEARMASK (~(SETMASK))
749*0ac9a9daSXin Li
750*0ac9a9daSXin Li static
mainSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,UInt32 * ftab,Int32 nblock,Int32 verb,Int32 * budget)751*0ac9a9daSXin Li void mainSort ( UInt32* ptr,
752*0ac9a9daSXin Li UChar* block,
753*0ac9a9daSXin Li UInt16* quadrant,
754*0ac9a9daSXin Li UInt32* ftab,
755*0ac9a9daSXin Li Int32 nblock,
756*0ac9a9daSXin Li Int32 verb,
757*0ac9a9daSXin Li Int32* budget )
758*0ac9a9daSXin Li {
759*0ac9a9daSXin Li Int32 i, j, k, ss, sb;
760*0ac9a9daSXin Li Int32 runningOrder[256];
761*0ac9a9daSXin Li Bool bigDone[256];
762*0ac9a9daSXin Li Int32 copyStart[256];
763*0ac9a9daSXin Li Int32 copyEnd [256];
764*0ac9a9daSXin Li UChar c1;
765*0ac9a9daSXin Li Int32 numQSorted;
766*0ac9a9daSXin Li UInt16 s;
767*0ac9a9daSXin Li if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
768*0ac9a9daSXin Li
769*0ac9a9daSXin Li /*-- set up the 2-byte frequency table --*/
770*0ac9a9daSXin Li for (i = 65536; i >= 0; i--) ftab[i] = 0;
771*0ac9a9daSXin Li
772*0ac9a9daSXin Li j = block[0] << 8;
773*0ac9a9daSXin Li i = nblock-1;
774*0ac9a9daSXin Li for (; i >= 3; i -= 4) {
775*0ac9a9daSXin Li quadrant[i] = 0;
776*0ac9a9daSXin Li j = (j >> 8) | ( ((UInt16)block[i]) << 8);
777*0ac9a9daSXin Li ftab[j]++;
778*0ac9a9daSXin Li quadrant[i-1] = 0;
779*0ac9a9daSXin Li j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
780*0ac9a9daSXin Li ftab[j]++;
781*0ac9a9daSXin Li quadrant[i-2] = 0;
782*0ac9a9daSXin Li j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
783*0ac9a9daSXin Li ftab[j]++;
784*0ac9a9daSXin Li quadrant[i-3] = 0;
785*0ac9a9daSXin Li j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
786*0ac9a9daSXin Li ftab[j]++;
787*0ac9a9daSXin Li }
788*0ac9a9daSXin Li for (; i >= 0; i--) {
789*0ac9a9daSXin Li quadrant[i] = 0;
790*0ac9a9daSXin Li j = (j >> 8) | ( ((UInt16)block[i]) << 8);
791*0ac9a9daSXin Li ftab[j]++;
792*0ac9a9daSXin Li }
793*0ac9a9daSXin Li
794*0ac9a9daSXin Li /*-- (emphasises close relationship of block & quadrant) --*/
795*0ac9a9daSXin Li for (i = 0; i < BZ_N_OVERSHOOT; i++) {
796*0ac9a9daSXin Li block [nblock+i] = block[i];
797*0ac9a9daSXin Li quadrant[nblock+i] = 0;
798*0ac9a9daSXin Li }
799*0ac9a9daSXin Li
800*0ac9a9daSXin Li if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
801*0ac9a9daSXin Li
802*0ac9a9daSXin Li /*-- Complete the initial radix sort --*/
803*0ac9a9daSXin Li for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
804*0ac9a9daSXin Li
805*0ac9a9daSXin Li s = block[0] << 8;
806*0ac9a9daSXin Li i = nblock-1;
807*0ac9a9daSXin Li for (; i >= 3; i -= 4) {
808*0ac9a9daSXin Li s = (s >> 8) | (block[i] << 8);
809*0ac9a9daSXin Li j = ftab[s] -1;
810*0ac9a9daSXin Li ftab[s] = j;
811*0ac9a9daSXin Li ptr[j] = i;
812*0ac9a9daSXin Li s = (s >> 8) | (block[i-1] << 8);
813*0ac9a9daSXin Li j = ftab[s] -1;
814*0ac9a9daSXin Li ftab[s] = j;
815*0ac9a9daSXin Li ptr[j] = i-1;
816*0ac9a9daSXin Li s = (s >> 8) | (block[i-2] << 8);
817*0ac9a9daSXin Li j = ftab[s] -1;
818*0ac9a9daSXin Li ftab[s] = j;
819*0ac9a9daSXin Li ptr[j] = i-2;
820*0ac9a9daSXin Li s = (s >> 8) | (block[i-3] << 8);
821*0ac9a9daSXin Li j = ftab[s] -1;
822*0ac9a9daSXin Li ftab[s] = j;
823*0ac9a9daSXin Li ptr[j] = i-3;
824*0ac9a9daSXin Li }
825*0ac9a9daSXin Li for (; i >= 0; i--) {
826*0ac9a9daSXin Li s = (s >> 8) | (block[i] << 8);
827*0ac9a9daSXin Li j = ftab[s] -1;
828*0ac9a9daSXin Li ftab[s] = j;
829*0ac9a9daSXin Li ptr[j] = i;
830*0ac9a9daSXin Li }
831*0ac9a9daSXin Li
832*0ac9a9daSXin Li /*--
833*0ac9a9daSXin Li Now ftab contains the first loc of every small bucket.
834*0ac9a9daSXin Li Calculate the running order, from smallest to largest
835*0ac9a9daSXin Li big bucket.
836*0ac9a9daSXin Li --*/
837*0ac9a9daSXin Li for (i = 0; i <= 255; i++) {
838*0ac9a9daSXin Li bigDone [i] = False;
839*0ac9a9daSXin Li runningOrder[i] = i;
840*0ac9a9daSXin Li }
841*0ac9a9daSXin Li
842*0ac9a9daSXin Li {
843*0ac9a9daSXin Li Int32 vv;
844*0ac9a9daSXin Li Int32 h = 1;
845*0ac9a9daSXin Li do h = 3 * h + 1; while (h <= 256);
846*0ac9a9daSXin Li do {
847*0ac9a9daSXin Li h = h / 3;
848*0ac9a9daSXin Li for (i = h; i <= 255; i++) {
849*0ac9a9daSXin Li vv = runningOrder[i];
850*0ac9a9daSXin Li j = i;
851*0ac9a9daSXin Li while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
852*0ac9a9daSXin Li runningOrder[j] = runningOrder[j-h];
853*0ac9a9daSXin Li j = j - h;
854*0ac9a9daSXin Li if (j <= (h - 1)) goto zero;
855*0ac9a9daSXin Li }
856*0ac9a9daSXin Li zero:
857*0ac9a9daSXin Li runningOrder[j] = vv;
858*0ac9a9daSXin Li }
859*0ac9a9daSXin Li } while (h != 1);
860*0ac9a9daSXin Li }
861*0ac9a9daSXin Li
862*0ac9a9daSXin Li /*--
863*0ac9a9daSXin Li The main sorting loop.
864*0ac9a9daSXin Li --*/
865*0ac9a9daSXin Li
866*0ac9a9daSXin Li numQSorted = 0;
867*0ac9a9daSXin Li
868*0ac9a9daSXin Li for (i = 0; i <= 255; i++) {
869*0ac9a9daSXin Li
870*0ac9a9daSXin Li /*--
871*0ac9a9daSXin Li Process big buckets, starting with the least full.
872*0ac9a9daSXin Li Basically this is a 3-step process in which we call
873*0ac9a9daSXin Li mainQSort3 to sort the small buckets [ss, j], but
874*0ac9a9daSXin Li also make a big effort to avoid the calls if we can.
875*0ac9a9daSXin Li --*/
876*0ac9a9daSXin Li ss = runningOrder[i];
877*0ac9a9daSXin Li
878*0ac9a9daSXin Li /*--
879*0ac9a9daSXin Li Step 1:
880*0ac9a9daSXin Li Complete the big bucket [ss] by quicksorting
881*0ac9a9daSXin Li any unsorted small buckets [ss, j], for j != ss.
882*0ac9a9daSXin Li Hopefully previous pointer-scanning phases have already
883*0ac9a9daSXin Li completed many of the small buckets [ss, j], so
884*0ac9a9daSXin Li we don't have to sort them at all.
885*0ac9a9daSXin Li --*/
886*0ac9a9daSXin Li for (j = 0; j <= 255; j++) {
887*0ac9a9daSXin Li if (j != ss) {
888*0ac9a9daSXin Li sb = (ss << 8) + j;
889*0ac9a9daSXin Li if ( ! (ftab[sb] & SETMASK) ) {
890*0ac9a9daSXin Li Int32 lo = ftab[sb] & CLEARMASK;
891*0ac9a9daSXin Li Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
892*0ac9a9daSXin Li if (hi > lo) {
893*0ac9a9daSXin Li if (verb >= 4)
894*0ac9a9daSXin Li VPrintf4 ( " qsort [0x%x, 0x%x] "
895*0ac9a9daSXin Li "done %d this %d\n",
896*0ac9a9daSXin Li ss, j, numQSorted, hi - lo + 1 );
897*0ac9a9daSXin Li mainQSort3 (
898*0ac9a9daSXin Li ptr, block, quadrant, nblock,
899*0ac9a9daSXin Li lo, hi, BZ_N_RADIX, budget
900*0ac9a9daSXin Li );
901*0ac9a9daSXin Li numQSorted += (hi - lo + 1);
902*0ac9a9daSXin Li if (*budget < 0) return;
903*0ac9a9daSXin Li }
904*0ac9a9daSXin Li }
905*0ac9a9daSXin Li ftab[sb] |= SETMASK;
906*0ac9a9daSXin Li }
907*0ac9a9daSXin Li }
908*0ac9a9daSXin Li
909*0ac9a9daSXin Li AssertH ( !bigDone[ss], 1006 );
910*0ac9a9daSXin Li
911*0ac9a9daSXin Li /*--
912*0ac9a9daSXin Li Step 2:
913*0ac9a9daSXin Li Now scan this big bucket [ss] so as to synthesise the
914*0ac9a9daSXin Li sorted order for small buckets [t, ss] for all t,
915*0ac9a9daSXin Li including, magically, the bucket [ss,ss] too.
916*0ac9a9daSXin Li This will avoid doing Real Work in subsequent Step 1's.
917*0ac9a9daSXin Li --*/
918*0ac9a9daSXin Li {
919*0ac9a9daSXin Li for (j = 0; j <= 255; j++) {
920*0ac9a9daSXin Li copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
921*0ac9a9daSXin Li copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
922*0ac9a9daSXin Li }
923*0ac9a9daSXin Li for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
924*0ac9a9daSXin Li k = ptr[j]-1; if (k < 0) k += nblock;
925*0ac9a9daSXin Li c1 = block[k];
926*0ac9a9daSXin Li if (!bigDone[c1])
927*0ac9a9daSXin Li ptr[ copyStart[c1]++ ] = k;
928*0ac9a9daSXin Li }
929*0ac9a9daSXin Li for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
930*0ac9a9daSXin Li k = ptr[j]-1; if (k < 0) k += nblock;
931*0ac9a9daSXin Li c1 = block[k];
932*0ac9a9daSXin Li if (!bigDone[c1])
933*0ac9a9daSXin Li ptr[ copyEnd[c1]-- ] = k;
934*0ac9a9daSXin Li }
935*0ac9a9daSXin Li }
936*0ac9a9daSXin Li
937*0ac9a9daSXin Li AssertH ( (copyStart[ss]-1 == copyEnd[ss])
938*0ac9a9daSXin Li ||
939*0ac9a9daSXin Li /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
940*0ac9a9daSXin Li Necessity for this case is demonstrated by compressing
941*0ac9a9daSXin Li a sequence of approximately 48.5 million of character
942*0ac9a9daSXin Li 251; 1.0.0/1.0.1 will then die here. */
943*0ac9a9daSXin Li (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
944*0ac9a9daSXin Li 1007 )
945*0ac9a9daSXin Li
946*0ac9a9daSXin Li for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
947*0ac9a9daSXin Li
948*0ac9a9daSXin Li /*--
949*0ac9a9daSXin Li Step 3:
950*0ac9a9daSXin Li The [ss] big bucket is now done. Record this fact,
951*0ac9a9daSXin Li and update the quadrant descriptors. Remember to
952*0ac9a9daSXin Li update quadrants in the overshoot area too, if
953*0ac9a9daSXin Li necessary. The "if (i < 255)" test merely skips
954*0ac9a9daSXin Li this updating for the last bucket processed, since
955*0ac9a9daSXin Li updating for the last bucket is pointless.
956*0ac9a9daSXin Li
957*0ac9a9daSXin Li The quadrant array provides a way to incrementally
958*0ac9a9daSXin Li cache sort orderings, as they appear, so as to
959*0ac9a9daSXin Li make subsequent comparisons in fullGtU() complete
960*0ac9a9daSXin Li faster. For repetitive blocks this makes a big
961*0ac9a9daSXin Li difference (but not big enough to be able to avoid
962*0ac9a9daSXin Li the fallback sorting mechanism, exponential radix sort).
963*0ac9a9daSXin Li
964*0ac9a9daSXin Li The precise meaning is: at all times:
965*0ac9a9daSXin Li
966*0ac9a9daSXin Li for 0 <= i < nblock and 0 <= j <= nblock
967*0ac9a9daSXin Li
968*0ac9a9daSXin Li if block[i] != block[j],
969*0ac9a9daSXin Li
970*0ac9a9daSXin Li then the relative values of quadrant[i] and
971*0ac9a9daSXin Li quadrant[j] are meaningless.
972*0ac9a9daSXin Li
973*0ac9a9daSXin Li else {
974*0ac9a9daSXin Li if quadrant[i] < quadrant[j]
975*0ac9a9daSXin Li then the string starting at i lexicographically
976*0ac9a9daSXin Li precedes the string starting at j
977*0ac9a9daSXin Li
978*0ac9a9daSXin Li else if quadrant[i] > quadrant[j]
979*0ac9a9daSXin Li then the string starting at j lexicographically
980*0ac9a9daSXin Li precedes the string starting at i
981*0ac9a9daSXin Li
982*0ac9a9daSXin Li else
983*0ac9a9daSXin Li the relative ordering of the strings starting
984*0ac9a9daSXin Li at i and j has not yet been determined.
985*0ac9a9daSXin Li }
986*0ac9a9daSXin Li --*/
987*0ac9a9daSXin Li bigDone[ss] = True;
988*0ac9a9daSXin Li
989*0ac9a9daSXin Li if (i < 255) {
990*0ac9a9daSXin Li Int32 bbStart = ftab[ss << 8] & CLEARMASK;
991*0ac9a9daSXin Li Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
992*0ac9a9daSXin Li Int32 shifts = 0;
993*0ac9a9daSXin Li
994*0ac9a9daSXin Li while ((bbSize >> shifts) > 65534) shifts++;
995*0ac9a9daSXin Li
996*0ac9a9daSXin Li for (j = bbSize-1; j >= 0; j--) {
997*0ac9a9daSXin Li Int32 a2update = ptr[bbStart + j];
998*0ac9a9daSXin Li UInt16 qVal = (UInt16)(j >> shifts);
999*0ac9a9daSXin Li quadrant[a2update] = qVal;
1000*0ac9a9daSXin Li if (a2update < BZ_N_OVERSHOOT)
1001*0ac9a9daSXin Li quadrant[a2update + nblock] = qVal;
1002*0ac9a9daSXin Li }
1003*0ac9a9daSXin Li AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1004*0ac9a9daSXin Li }
1005*0ac9a9daSXin Li
1006*0ac9a9daSXin Li }
1007*0ac9a9daSXin Li
1008*0ac9a9daSXin Li if (verb >= 4)
1009*0ac9a9daSXin Li VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1010*0ac9a9daSXin Li nblock, numQSorted, nblock - numQSorted );
1011*0ac9a9daSXin Li }
1012*0ac9a9daSXin Li
1013*0ac9a9daSXin Li #undef BIGFREQ
1014*0ac9a9daSXin Li #undef SETMASK
1015*0ac9a9daSXin Li #undef CLEARMASK
1016*0ac9a9daSXin Li
1017*0ac9a9daSXin Li
1018*0ac9a9daSXin Li /*---------------------------------------------*/
1019*0ac9a9daSXin Li /* Pre:
1020*0ac9a9daSXin Li nblock > 0
1021*0ac9a9daSXin Li arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1022*0ac9a9daSXin Li ((UChar*)arr2) [0 .. nblock-1] holds block
1023*0ac9a9daSXin Li arr1 exists for [0 .. nblock-1]
1024*0ac9a9daSXin Li
1025*0ac9a9daSXin Li Post:
1026*0ac9a9daSXin Li ((UChar*)arr2) [0 .. nblock-1] holds block
1027*0ac9a9daSXin Li All other areas of block destroyed
1028*0ac9a9daSXin Li ftab [ 0 .. 65536 ] destroyed
1029*0ac9a9daSXin Li arr1 [0 .. nblock-1] holds sorted order
1030*0ac9a9daSXin Li */
BZ2_blockSort(EState * s)1031*0ac9a9daSXin Li void BZ2_blockSort ( EState* s )
1032*0ac9a9daSXin Li {
1033*0ac9a9daSXin Li UInt32* ptr = s->ptr;
1034*0ac9a9daSXin Li UChar* block = s->block;
1035*0ac9a9daSXin Li UInt32* ftab = s->ftab;
1036*0ac9a9daSXin Li Int32 nblock = s->nblock;
1037*0ac9a9daSXin Li Int32 verb = s->verbosity;
1038*0ac9a9daSXin Li Int32 wfact = s->workFactor;
1039*0ac9a9daSXin Li UInt16* quadrant;
1040*0ac9a9daSXin Li Int32 budget;
1041*0ac9a9daSXin Li Int32 budgetInit;
1042*0ac9a9daSXin Li Int32 i;
1043*0ac9a9daSXin Li
1044*0ac9a9daSXin Li if (nblock < 10000) {
1045*0ac9a9daSXin Li fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1046*0ac9a9daSXin Li } else {
1047*0ac9a9daSXin Li /* Calculate the location for quadrant, remembering to get
1048*0ac9a9daSXin Li the alignment right. Assumes that &(block[0]) is at least
1049*0ac9a9daSXin Li 2-byte aligned -- this should be ok since block is really
1050*0ac9a9daSXin Li the first section of arr2.
1051*0ac9a9daSXin Li */
1052*0ac9a9daSXin Li i = nblock+BZ_N_OVERSHOOT;
1053*0ac9a9daSXin Li if (i & 1) i++;
1054*0ac9a9daSXin Li quadrant = (UInt16*)(&(block[i]));
1055*0ac9a9daSXin Li
1056*0ac9a9daSXin Li /* (wfact-1) / 3 puts the default-factor-30
1057*0ac9a9daSXin Li transition point at very roughly the same place as
1058*0ac9a9daSXin Li with v0.1 and v0.9.0.
1059*0ac9a9daSXin Li Not that it particularly matters any more, since the
1060*0ac9a9daSXin Li resulting compressed stream is now the same regardless
1061*0ac9a9daSXin Li of whether or not we use the main sort or fallback sort.
1062*0ac9a9daSXin Li */
1063*0ac9a9daSXin Li if (wfact < 1 ) wfact = 1;
1064*0ac9a9daSXin Li if (wfact > 100) wfact = 100;
1065*0ac9a9daSXin Li budgetInit = nblock * ((wfact-1) / 3);
1066*0ac9a9daSXin Li budget = budgetInit;
1067*0ac9a9daSXin Li
1068*0ac9a9daSXin Li mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1069*0ac9a9daSXin Li if (verb >= 3)
1070*0ac9a9daSXin Li VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1071*0ac9a9daSXin Li budgetInit - budget,
1072*0ac9a9daSXin Li nblock,
1073*0ac9a9daSXin Li (float)(budgetInit - budget) /
1074*0ac9a9daSXin Li (float)(nblock==0 ? 1 : nblock) );
1075*0ac9a9daSXin Li if (budget < 0) {
1076*0ac9a9daSXin Li if (verb >= 2)
1077*0ac9a9daSXin Li VPrintf0 ( " too repetitive; using fallback"
1078*0ac9a9daSXin Li " sorting algorithm\n" );
1079*0ac9a9daSXin Li fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1080*0ac9a9daSXin Li }
1081*0ac9a9daSXin Li }
1082*0ac9a9daSXin Li
1083*0ac9a9daSXin Li s->origPtr = -1;
1084*0ac9a9daSXin Li for (i = 0; i < s->nblock; i++)
1085*0ac9a9daSXin Li if (ptr[i] == 0)
1086*0ac9a9daSXin Li { s->origPtr = i; break; };
1087*0ac9a9daSXin Li
1088*0ac9a9daSXin Li AssertH( s->origPtr != -1, 1003 );
1089*0ac9a9daSXin Li }
1090*0ac9a9daSXin Li
1091*0ac9a9daSXin Li
1092*0ac9a9daSXin Li /*-------------------------------------------------------------*/
1093*0ac9a9daSXin Li /*--- end blocksort.c ---*/
1094*0ac9a9daSXin Li /*-------------------------------------------------------------*/
1095