1 // SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
2 /*
3 * erofs-utils/lib/kite_deflate.c
4 *
5 * Copyright (C) 2023, Alibaba Cloud
6 * Copyright (C) 2023, Gao Xiang <[email protected]>
7 */
8 #include "erofs/defs.h"
9 #include "erofs/print.h"
10 #include <errno.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <ctype.h>
14 #include <stdio.h>
15
16 unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
17 unsigned long sz);
18
19 #ifdef TEST
20 #define kite_dbg(x, ...) fprintf(stderr, x "\n", ##__VA_ARGS__)
21 #else
22 #define kite_dbg(x, ...)
23 #endif
24
25 #define kHistorySize32 (1U << 15)
26
27 #define kNumLenSymbols32 256
28 #define kNumLenSymbolsMax kNumLenSymbols32
29
30 #define kSymbolEndOfBlock 256
31 #define kSymbolMatch (kSymbolEndOfBlock + 1)
32 #define kNumLenSlots 29
33 #define kMainTableSize (kSymbolMatch + kNumLenSlots)
34
35 #define kFixedLenTableSize (kSymbolMatch + 31)
36 #define FixedDistTableSize 32
37
38 #define kMainTableSize (kSymbolMatch + kNumLenSlots)
39 #define kDistTableSize32 30
40
41 #define kNumLitLenCodesMin 257
42 #define kNumDistCodesMin 1
43
44 #define kNumLensCodesMin 4
45 #define kLensTableSize 19
46
47 #define kMatchMinLen 3
48 #define kMatchMaxLen32 kNumLenSymbols32 + kMatchMinLen - 1
49
50 #define kTableDirectLevels 16
51 #define kBitLensRepNumber_3_6 kTableDirectLevels
52 #define kBitLens0Number_3_10 (kBitLensRepNumber_3_6 + 1)
53 #define kBitLens0Number_11_138 (kBitLens0Number_3_10 + 1)
54
55 static u32 kstaticHuff_mainCodes[kFixedLenTableSize];
56 static const u8 kstaticHuff_litLenLevels[kFixedLenTableSize] = {
57 [0 ... 143] = 8, [144 ... 255] = 9,
58 [256 ... 279] = 7, [280 ... 287] = 8,
59 };
60 static u32 kstaticHuff_distCodes[kFixedLenTableSize];
61
62 const u8 kLenStart32[kNumLenSlots] =
63 {0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224, 255};
64
65 const u8 kLenExtraBits32[kNumLenSlots] =
66 {0,0,0,0,0,0,0,0,1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5,
67 5, 5, 5, 0};
68
69 /* First normalized distance for each code (0 = distance of 1) */
70 const u32 kDistStart[kDistTableSize32] =
71 {0,1,2,3,4,6,8,12,16,24,32,48,64,96,128,192,256,384,512,768,
72 1024,1536,2048,3072,4096,6144,8192,12288,16384,24576};
73
74 /* extra bits for each distance code */
75 const u8 kDistExtraBits[kDistTableSize32] =
76 {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
77
78 const u8 kCodeLengthAlphabetOrder[kLensTableSize] =
79 {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
80
81 const u8 kLevelExtraBits[3] = {2, 3, 7};
82
83 #define kStored 0
84 #define kFixedHuffman 1
85 #define kDynamicHuffman 2
86
87 struct kite_deflate_symbol {
88 u16 len, dist;
89 };
90
91 struct kite_deflate_table {
92 u32 mainCodes[kMainTableSize];
93 u8 litLenLevels[kMainTableSize];
94 u32 distCodes[kDistTableSize32];
95 u8 distLevels[kDistTableSize32];
96 u32 levelCodes[kLensTableSize];
97 u8 levelLens[kLensTableSize];
98
99 u8 numdistlens, numblcodes;
100 u16 numlitlens;
101 };
102
103 struct kite_deflate {
104 struct kite_deflate_table *tab;
105 const u8 *in;
106 u8 *out;
107
108 u32 inlen, outlen;
109 u32 pos_in, pos_out;
110 u32 inflightbits;
111 u8 bitpos;
112 u8 numHuffBits;
113 u32 symbols;
114
115 u32 costbits, startpos;
116 u8 encode_mode;
117 bool freq_changed, lastblock;
118
119 /* Previous match for lazy matching */
120 bool prev_valid;
121 u16 prev_longest;
122
123 u32 mainFreqs[kMainTableSize];
124 u32 distFreqs[kDistTableSize32];
125 struct kite_deflate_table tables[2];
126
127 /* don't reset the following fields */
128 struct kite_matchfinder *mf;
129 struct kite_deflate_symbol *sym;
130 u32 max_symbols;
131 bool lazy_search;
132 };
133
134 #define ZLIB_DISTANCE_TOO_FAR 4096
135
136 static u8 g_LenSlots[kNumLenSymbolsMax];
137
138 #define kNumLogBits 9 // do not change it
139 static u8 g_FastPos[1 << kNumLogBits];
140
writebits(struct kite_deflate * s,unsigned int v,u8 bits)141 static void writebits(struct kite_deflate *s, unsigned int v, u8 bits)
142 {
143 unsigned int rem = sizeof(s->inflightbits) * 8 - s->bitpos;
144
145 s->inflightbits |= (v << s->bitpos) & (!rem - 1);
146 if (bits > rem) {
147 u8 *out = s->out + s->pos_out;
148
149 out[0] = s->inflightbits & 0xff;
150 out[1] = (s->inflightbits >> 8) & 0xff;
151 out[2] = (s->inflightbits >> 16) & 0xff;
152 out[3] = (s->inflightbits >> 24) & 0xff;
153 s->pos_out += 4;
154 DBG_BUGON(s->pos_out > s->outlen);
155 s->inflightbits = v >> rem;
156 s->bitpos = bits - rem;
157 return;
158 }
159 s->bitpos += bits;
160 }
161
flushbits(struct kite_deflate * s)162 static void flushbits(struct kite_deflate *s)
163 {
164 u8 *out = s->out + s->pos_out;
165
166 if (!s->bitpos)
167 return;
168 out[0] = s->inflightbits & 0xff;
169 if (s->bitpos >= 8) {
170 out[1] = (s->inflightbits >> 8) & 0xff;
171 if (s->bitpos >= 16) {
172 out[2] = (s->inflightbits >> 16) & 0xff;
173 if (s->bitpos >= 24)
174 out[3] = (s->inflightbits >> 24) & 0xff;
175 }
176 }
177 s->pos_out += round_up(s->bitpos, 8) >> 3;
178 DBG_BUGON(s->pos_out > s->outlen);
179 s->bitpos = 0;
180 s->inflightbits = 0;
181 }
182
183 #define kMaxLen 16
184
deflate_genhuffcodes(const u8 * lens,u32 * p,unsigned int nr_codes,const u32 * bl_count)185 static void deflate_genhuffcodes(const u8 *lens, u32 *p, unsigned int nr_codes,
186 const u32 *bl_count)
187 {
188 u32 nextCodes[kMaxLen + 1]; /* next code value for each bit length */
189 unsigned int code = 0; /* running code value */
190 unsigned int bits, k;
191
192 for (bits = 1; bits <= kMaxLen; ++bits) {
193 code = (code + bl_count[bits - 1]) << 1;
194 nextCodes[bits] = code;
195 }
196
197 DBG_BUGON(code + bl_count[kMaxLen] != 1 << kMaxLen);
198
199 for (k = 0; k < nr_codes; ++k)
200 p[k] = nextCodes[lens[k]]++;
201 }
202
deflate_reversebits_one(u32 code,u8 bits)203 static u32 deflate_reversebits_one(u32 code, u8 bits)
204 {
205 unsigned int x = code;
206
207 x = ((x & 0x5555) << 1) | ((x & 0xAAAA) >> 1);
208 x = ((x & 0x3333) << 2) | ((x & 0xCCCC) >> 2);
209 x = ((x & 0x0F0F) << 4) | ((x & 0xF0F0) >> 4);
210
211 return (((x & 0x00FF) << 8) | ((x & 0xFF00) >> 8)) >> (16 - bits);
212 }
213
Huffman_ReverseBits(u32 * codes,const u8 * lens,unsigned int n)214 static void Huffman_ReverseBits(u32 *codes, const u8 *lens, unsigned int n)
215 {
216 while (n) {
217 u32 code = *codes;
218
219 *codes++ = deflate_reversebits_one(code, *lens++);
220 --n;
221 }
222 }
223
kite_deflate_init_once(void)224 static void kite_deflate_init_once(void)
225 {
226 static const u32 static_bl_count[kMaxLen + 1] = {
227 [7] = 279 - 256 + 1,
228 [8] = (143 + 1) + (287 - 280 + 1),
229 [9] = 255 - 144 + 1,
230 };
231 unsigned int i, c, j, k;
232
233 if (kstaticHuff_distCodes[31])
234 return;
235 deflate_genhuffcodes(kstaticHuff_litLenLevels, kstaticHuff_mainCodes,
236 kFixedLenTableSize, static_bl_count);
237 Huffman_ReverseBits(kstaticHuff_mainCodes, kstaticHuff_litLenLevels,
238 kFixedLenTableSize);
239
240 for (i = 0; i < ARRAY_SIZE(kstaticHuff_distCodes); ++i)
241 kstaticHuff_distCodes[i] = deflate_reversebits_one(i, 5);
242
243 for (i = 0; i < kNumLenSlots; i++) {
244 c = kLenStart32[i];
245 j = 1 << kLenExtraBits32[i];
246
247 for (k = 0; k < j; k++, c++)
248 g_LenSlots[c] = (u8)i;
249 }
250
251 c = 0;
252 for (i = 0; i < /*kFastSlots*/ kNumLogBits * 2; i++) {
253 k = 1 << kDistExtraBits[i];
254 for (j = 0; j < k; j++)
255 g_FastPos[c++] = i;
256 }
257 }
258
kite_deflate_scanlens(unsigned int numlens,u8 * lens,u32 * freqs)259 static void kite_deflate_scanlens(unsigned int numlens, u8 *lens, u32 *freqs)
260 {
261 int n; /* iterates over all tree elements */
262 int prevlen = -1; /* last emitted length */
263 int curlen; /* length of current code */
264 int nextlen = lens[0]; /* length of next code */
265 int count = 0; /* repeat count of the current code */
266 int max_count = 7; /* max repeat count */
267 int min_count = 4; /* min repeat count */
268
269 if (!nextlen)
270 max_count = 138, min_count = 3;
271
272 for (n = 0; n < numlens; n++) {
273 curlen = nextlen;
274 nextlen = n + 1 < numlens ? lens[n + 1] : -1;
275 ++count;
276
277 if (count < max_count && curlen == nextlen)
278 continue;
279 if (count < min_count) {
280 freqs[curlen] += count;
281 } else if (curlen != 0) {
282 if (curlen != prevlen)
283 freqs[curlen]++;
284 freqs[kBitLensRepNumber_3_6]++;
285 } else if (count <= 10) {
286 freqs[kBitLens0Number_3_10]++;
287 } else {
288 freqs[kBitLens0Number_11_138]++;
289 }
290
291 count = 0;
292 prevlen = curlen;
293 if (!nextlen)
294 max_count = 138, min_count = 3;
295 else if (curlen == nextlen)
296 max_count = 6, min_count = 3;
297 else
298 max_count = 7, min_count = 4;
299 }
300 }
301
kite_deflate_sendtree(struct kite_deflate * s,const u8 * lens,unsigned int numlens)302 static void kite_deflate_sendtree(struct kite_deflate *s, const u8 *lens,
303 unsigned int numlens)
304 {
305 int n; /* iterates over all tree elements */
306 int prevlen = -1; /* last emitted length */
307 int curlen; /* length of current code */
308 int nextlen = lens[0]; /* length of next code */
309 int count = 0; /* repeat count of the current code */
310 int max_count = 7; /* max repeat count */
311 int min_count = 4; /* min repeat count */
312 const u8 *bl_lens = s->tab->levelLens;
313 const u32 *bl_codes = s->tab->levelCodes;
314
315 if (!nextlen)
316 max_count = 138, min_count = 3;
317
318 for (n = 0; n < numlens; n++) {
319 curlen = nextlen;
320 nextlen = n + 1 < numlens ? lens[n + 1] : -1;
321 ++count;
322
323 if (count < max_count && curlen == nextlen)
324 continue;
325 if (count < min_count) {
326 do {
327 writebits(s, bl_codes[curlen], bl_lens[curlen]);
328 } while (--count);
329 } else if (curlen) {
330 if (curlen != prevlen) {
331 writebits(s, bl_codes[curlen], bl_lens[curlen]);
332 count--;
333 }
334 writebits(s, bl_codes[kBitLensRepNumber_3_6],
335 bl_lens[kBitLensRepNumber_3_6]);
336 writebits(s, count - 3, 2);
337 } else if (count <= 10) {
338 writebits(s, bl_codes[kBitLens0Number_3_10],
339 bl_lens[kBitLens0Number_3_10]);
340 writebits(s, count - 3, 3);
341 } else {
342 writebits(s, bl_codes[kBitLens0Number_11_138],
343 bl_lens[kBitLens0Number_11_138]);
344 writebits(s, count - 11, 7);
345 }
346
347 count = 0;
348 prevlen = curlen;
349 if (!nextlen)
350 max_count = 138, min_count = 3;
351 else if (curlen == nextlen)
352 max_count = 6, min_count = 3;
353 else
354 max_count = 7, min_count = 4;
355 }
356 }
357
kite_deflate_setfixedtrees(struct kite_deflate * s)358 static void kite_deflate_setfixedtrees(struct kite_deflate *s)
359 {
360 writebits(s, (kFixedHuffman << 1) + s->lastblock, 3);
361 }
362
kite_deflate_sendtrees(struct kite_deflate * s)363 static void kite_deflate_sendtrees(struct kite_deflate *s)
364 {
365 struct kite_deflate_table *t = s->tab;
366 unsigned int i;
367
368 writebits(s, (kDynamicHuffman << 1) + s->lastblock, 3);
369 writebits(s, t->numlitlens - kNumLitLenCodesMin, 5);
370 writebits(s, t->numdistlens - kNumDistCodesMin, 5);
371 writebits(s, t->numblcodes - kNumLensCodesMin, 4);
372
373 for (i = 0; i < t->numblcodes; i++)
374 writebits(s, t->levelLens[kCodeLengthAlphabetOrder[i]], 3);
375
376 Huffman_ReverseBits(t->levelCodes, t->levelLens, kLensTableSize);
377 kite_deflate_sendtree(s, t->litLenLevels, t->numlitlens);
378 kite_deflate_sendtree(s, t->distLevels, t->numdistlens);
379 }
380
deflateDistSlot(unsigned int pos)381 static inline unsigned int deflateDistSlot(unsigned int pos)
382 {
383 const unsigned int zz = (kNumLogBits - 1) &
384 ((((1U << kNumLogBits) - 1) - pos) >> (31 - 3));
385
386 return g_FastPos[pos >> zz] + (zz * 2);
387 }
388
kite_deflate_writeblock(struct kite_deflate * s,bool fixed)389 static void kite_deflate_writeblock(struct kite_deflate *s, bool fixed)
390 {
391 int i;
392 u32 *mainCodes, *distCodes;
393 const u8 *litLenLevels, *distLevels;
394
395 if (!fixed) {
396 struct kite_deflate_table *t = s->tab;
397
398 mainCodes = t->mainCodes; distCodes = t->distCodes;
399 litLenLevels = t->litLenLevels; distLevels = t->distLevels;
400
401 Huffman_ReverseBits(mainCodes, litLenLevels, kMainTableSize);
402 Huffman_ReverseBits(distCodes, distLevels, kDistTableSize32);
403 } else {
404 mainCodes = kstaticHuff_mainCodes;
405 distCodes = kstaticHuff_distCodes;
406
407 litLenLevels = kstaticHuff_litLenLevels;
408 distLevels = NULL;
409 }
410
411 for (i = 0; i < s->symbols; ++i) {
412 struct kite_deflate_symbol *sym = &s->sym[i];
413
414 if (sym->len < kMatchMinLen) { /* literal */
415 writebits(s, mainCodes[sym->dist],
416 litLenLevels[sym->dist]);
417 } else {
418 unsigned int lenSlot, distSlot;
419 unsigned int lc = sym->len - kMatchMinLen;
420
421 lenSlot = g_LenSlots[lc];
422 writebits(s, mainCodes[kSymbolMatch + lenSlot],
423 litLenLevels[kSymbolMatch + lenSlot]);
424 writebits(s, lc - kLenStart32[lenSlot],
425 kLenExtraBits32[lenSlot]);
426
427 distSlot = deflateDistSlot(sym->dist - 1);
428 writebits(s, distCodes[distSlot],
429 fixed ? 5 : distLevels[distSlot]);
430 writebits(s, sym->dist - 1 - kDistStart[distSlot],
431 kDistExtraBits[distSlot]);
432 }
433 }
434 writebits(s, mainCodes[kSymbolEndOfBlock],
435 litLenLevels[kSymbolEndOfBlock]);
436 }
437
Huffman_GetPrice(const u32 * freqs,const u8 * lens,u32 num)438 static u32 Huffman_GetPrice(const u32 *freqs, const u8 *lens, u32 num)
439 {
440 u32 price = 0;
441
442 while (num) {
443 price += (*lens++) * (*freqs++);
444 --num;
445 }
446 return price;
447 }
448
Huffman_GetPriceEx(const u32 * freqs,const u8 * lens,u32 num,const u8 * extraBits,u32 extraBase)449 static u32 Huffman_GetPriceEx(const u32 *freqs, const u8 *lens, u32 num,
450 const u8 *extraBits, u32 extraBase)
451 {
452 return Huffman_GetPrice(freqs, lens, num) +
453 Huffman_GetPrice(freqs + extraBase, extraBits, num - extraBase);
454 }
455
456 /* Adapted from C/HuffEnc.c (7zip) for now */
457 #define HeapSortDown(p, k, size, temp) \
458 { for (;;) { \
459 size_t s = (k << 1); \
460 if (s > size) break; \
461 if (s < size && p[s + 1] > p[s]) s++; \
462 if (temp >= p[s]) break; \
463 p[k] = p[s]; k = s; \
464 } p[k] = temp; }
465
HeapSort(u32 * p,size_t size)466 static void HeapSort(u32 *p, size_t size)
467 {
468 if (size <= 1)
469 return;
470 p--;
471 {
472 size_t i = size / 2;
473 do
474 {
475 u32 temp = p[i];
476 size_t k = i;
477 HeapSortDown(p, k, size, temp)
478 }
479 while (--i != 0);
480 }
481 /*
482 do
483 {
484 size_t k = 1;
485 UInt32 temp = p[size];
486 p[size--] = p[1];
487 HeapSortDown(p, k, size, temp)
488 }
489 while (size > 1);
490 */
491 while (size > 3)
492 {
493 u32 temp = p[size];
494 size_t k = (p[3] > p[2]) ? 3 : 2;
495 p[size--] = p[1];
496 p[1] = p[k];
497 HeapSortDown(p, k, size, temp)
498 }
499 {
500 u32 temp = p[size];
501 p[size] = p[1];
502 if (size > 2 && p[2] < temp)
503 {
504 p[1] = p[2];
505 p[2] = temp;
506 }
507 else
508 p[1] = temp;
509 }
510 }
511
512 #define NUM_BITS 10
513 #define MASK (((unsigned)1 << NUM_BITS) - 1)
514
Huffman_Generate(const u32 * freqs,u32 * p,u8 * lens,unsigned int numSymbols,unsigned int maxLen)515 static void Huffman_Generate(const u32 *freqs, u32 *p, u8 *lens,
516 unsigned int numSymbols, unsigned int maxLen)
517 {
518 u32 num, i;
519
520 num = 0;
521 /* if (maxLen > 10) maxLen = 10; */
522
523 for (i = 0; i < numSymbols; i++) {
524 u32 freq = freqs[i];
525
526 if (!freq)
527 lens[i] = 0;
528 else
529 p[num++] = i | (freq << NUM_BITS);
530 }
531 HeapSort(p, num);
532
533 if (num < 2) {
534 unsigned int minCode = 0, maxCode = 1;
535
536 if (num == 1) {
537 maxCode = (unsigned int)p[0] & MASK;
538 if (!maxCode)
539 maxCode++;
540 }
541 p[minCode] = 0;
542 p[maxCode] = 1;
543 lens[minCode] = lens[maxCode] = 1;
544 return;
545 }
546
547 {
548 u32 b, e, i;
549
550 i = b = e = 0;
551 do {
552 u32 n, m, freq;
553
554 n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
555 freq = (p[n] & ~MASK);
556 p[n] = (p[n] & MASK) | (e << NUM_BITS);
557 m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
558 freq += (p[m] & ~MASK);
559 p[m] = (p[m] & MASK) | (e << NUM_BITS);
560 p[e] = (p[e] & MASK) | freq;
561 e++;
562 } while (num - e > 1);
563
564 {
565 u32 lenCounters[kMaxLen + 1];
566
567 for (i = 0; i <= kMaxLen; i++)
568 lenCounters[i] = 0;
569
570 p[--e] &= MASK;
571 lenCounters[1] = 2;
572 while (e > 0) {
573 u32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1;
574
575 p[e] = (p[e] & MASK) | (len << NUM_BITS);
576 if (len >= maxLen)
577 for (len = maxLen - 1; lenCounters[len] == 0; len--);
578 lenCounters[len]--;
579 lenCounters[(size_t)len + 1] += 2;
580 }
581
582 {
583 u32 len;
584
585 i = 0;
586 for (len = maxLen; len != 0; len--) {
587 u32 k;
588 for (k = lenCounters[len]; k != 0; k--)
589 lens[p[i++] & MASK] = (u8)len;
590 }
591 }
592 deflate_genhuffcodes(lens, p, numSymbols, lenCounters);
593 }
594 }
595 }
596
kite_deflate_fixdynblock(struct kite_deflate * s)597 static void kite_deflate_fixdynblock(struct kite_deflate *s)
598 {
599 struct kite_deflate_table *t = s->tab;
600 unsigned int numlitlens, numdistlens, numblcodes;
601 u32 levelFreqs[kLensTableSize] = {0};
602 u32 opt_mainlen;
603
604 if (!s->freq_changed)
605 return;
606
607 /* in order to match zlib */
608 s->numHuffBits = kMaxLen;
609 // s->numHuffBits = (s->symbols > 18000 ? 12 :
610 // (s->symbols > 7000 ? 11 : (s->symbols > 2000 ? 10 : 9)));
611
612 Huffman_Generate(s->mainFreqs, t->mainCodes, t->litLenLevels,
613 kMainTableSize, s->numHuffBits);
614 Huffman_Generate(s->distFreqs, t->distCodes, t->distLevels,
615 kDistTableSize32, s->numHuffBits);
616
617 /* code lengths for the literal/length alphabet */
618 numlitlens = kMainTableSize;
619 while (numlitlens > kNumLitLenCodesMin &&
620 !t->litLenLevels[numlitlens - 1])
621 --numlitlens;
622
623 /* code lengths for the distance alphabet */
624 numdistlens = kDistTableSize32;
625 while (numdistlens > kNumDistCodesMin &&
626 !t->distLevels[numdistlens - 1])
627 --numdistlens;
628
629 kite_deflate_scanlens(numlitlens, t->litLenLevels, levelFreqs);
630 kite_deflate_scanlens(numdistlens, t->distLevels, levelFreqs);
631 Huffman_Generate(levelFreqs, t->levelCodes, t->levelLens,
632 kLensTableSize, 7);
633 numblcodes = kLensTableSize;
634 while (numblcodes > kNumLensCodesMin &&
635 !t->levelLens[kCodeLengthAlphabetOrder[numblcodes - 1]])
636 --numblcodes;
637
638 t->numlitlens = numlitlens;
639 t->numdistlens = numdistlens;
640 t->numblcodes = numblcodes;
641
642 opt_mainlen = Huffman_GetPriceEx(s->mainFreqs, t->litLenLevels,
643 kMainTableSize, kLenExtraBits32, kSymbolMatch) +
644 Huffman_GetPriceEx(s->distFreqs, t->distLevels,
645 kDistTableSize32, kDistExtraBits, 0);
646 s->costbits = 3 + 5 + 5 + 4 + 3 * numblcodes +
647 Huffman_GetPriceEx(levelFreqs, t->levelLens,
648 kLensTableSize, kLevelExtraBits, kTableDirectLevels) +
649 opt_mainlen;
650 s->freq_changed = false;
651 }
652
653
654 /*
655 * an array used used by the LZ-based encoder to hold the length-distance pairs
656 * found by LZ matchfinder.
657 */
658 struct kite_match {
659 unsigned int len;
660 unsigned int dist;
661 };
662
663 struct kite_matchfinder {
664 /* pointer to buffer with data to be compressed */
665 const u8 *buffer;
666
667 /* indicate the first byte that doesn't contain valid input data */
668 const u8 *end;
669
670 /* LZ matchfinder hash chain representation */
671 u32 *hash, *chain;
672
673 u32 base;
674
675 /* indicate the next byte to run through the match finder */
676 u32 offset;
677
678 u32 cyclic_pos;
679
680 /* maximum length of a match that the matchfinder will try to find. */
681 u16 nice_len;
682
683 /* the total sliding window size */
684 u16 wsiz;
685
686 /* how many rounds a matchfinder searches on a hash chain for */
687 u16 depth;
688
689 /* do not perform lazy search no less than this match length */
690 u16 max_lazy;
691
692 /* reduce lazy search no less than this match length */
693 u8 good_len;
694
695 /* current match for lazy matching */
696 struct kite_match *matches;
697 struct kite_match matches_matrix[2][4];
698 };
699
700 /*
701 * This mysterious table is just the CRC of each possible byte. It can be
702 * computed using the standard bit-at-a-time methods. The polynomial can
703 * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12.
704 * Add the implicit x^16, and you have the standard CRC-CCITT.
705 */
706 u16 const crc_ccitt_table[256] __attribute__((__aligned__(128))) = {
707 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
708 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
709 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
710 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
711 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
712 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
713 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
714 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
715 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
716 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
717 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
718 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
719 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
720 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
721 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
722 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
723 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
724 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
725 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
726 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
727 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
728 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
729 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
730 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
731 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
732 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
733 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
734 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
735 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
736 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
737 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
738 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
739 };
740
kite_mf_getmatches_hc3(struct kite_matchfinder * mf,u16 depth,u16 bestlen)741 int kite_mf_getmatches_hc3(struct kite_matchfinder *mf, u16 depth, u16 bestlen)
742 {
743 const u8 *cur = mf->buffer + mf->offset;
744 const u8 *qbase = mf->buffer - mf->base;
745 u32 curMatch;
746 unsigned int v, hv, i, k, p, wsiz;
747
748 if (mf->end - cur < bestlen + 1)
749 return -1;
750
751 v = get_unaligned((u16 *)cur);
752 hv = v ^ crc_ccitt_table[cur[2]];
753 curMatch = mf->hash[hv];
754 p = mf->base + mf->offset;
755 mf->hash[hv] = p;
756 mf->chain[mf->cyclic_pos] = curMatch;
757 wsiz = mf->wsiz;
758 k = 1;
759
760 if (depth) {
761 unsigned int wpos = wsiz + mf->cyclic_pos;
762
763 hv = min_t(unsigned int, mf->nice_len, mf->end - cur);
764 DBG_BUGON(hv > kMatchMaxLen32);
765 do {
766 unsigned int diff = p - curMatch;
767 const u8 *q;
768
769 if (diff >= wsiz)
770 break;
771
772 q = qbase + curMatch;
773 curMatch = mf->chain[(wpos - diff) & (wsiz - 1)];
774 if (v == get_unaligned((u16 *)q) && (bestlen < 3 || (
775 get_unaligned((u16 *)(cur + bestlen - 1)) ==
776 get_unaligned((u16 *)(q + bestlen - 1)) &&
777 !memcmp(cur + 3, q + 3, bestlen - 3)))) {
778 DBG_BUGON(cur[2] != q[2]);
779 i = erofs_memcmp2(cur + bestlen + 1,
780 q + bestlen + 1, hv - bestlen - 1);
781 bestlen += 1 + i;
782
783 k -= (k >= ARRAY_SIZE(mf->matches_matrix[0]));
784 mf->matches[k++] = (struct kite_match) {
785 .len = bestlen,
786 .dist = diff,
787 };
788 if (bestlen >= hv)
789 break;
790 }
791 } while (--depth);
792 }
793 mf->offset++;
794 mf->cyclic_pos = (mf->cyclic_pos + 1) & (wsiz - 1);
795 return k - 1;
796 }
797
kite_mf_hc3_skip(struct kite_matchfinder * mf)798 static void kite_mf_hc3_skip(struct kite_matchfinder *mf)
799 {
800 if (kite_mf_getmatches_hc3(mf, 0, 2) >= 0)
801 return;
802 mf->offset++;
803 /* mf->cyclic_pos = (mf->cyclic_pos + 1) & (mf->wsiz - 1); */
804 }
805
806 /* let's align with zlib */
807 static const struct kite_matchfinder_cfg {
808 u16 good_length; /* reduce lazy search above this match length */
809 u16 max_lazy; /* do not perform lazy search above this match length */
810 u16 nice_length; /* quit search above this match length */
811 u16 depth;
812 bool lazy_search;
813 } kite_mfcfg[10] = {
814 /* good lazy nice depth */
815 /* 0 */ {0, 0, 0, 0, false}, /* store only [unsupported] */
816 /* 1 */ {4, 4, 8, 4, false}, /* maximum speed, no lazy matches */
817 /* 2 */ {4, 5, 16, 8, false},
818 /* 3 */ {4, 6, 32, 32, false},
819
820 /* 4 */ {4, 4, 16, 16, true}, /* lazy matches */
821 /* 5 */ {8, 16, 32, 32, true},
822 /* 6 */ {8, 16, 128, 128, true},
823 /* 7 */ {8, 32, 128, 256, true},
824 /* 8 */ {32, 128, 258, 1024, true},
825 /* 9 */ {32, 258, 258, 4096, true}, /* maximum compression */
826 };
827
kite_mf_init(struct kite_matchfinder * mf,unsigned int wsiz,int level)828 static int kite_mf_init(struct kite_matchfinder *mf, unsigned int wsiz,
829 int level)
830 {
831 const struct kite_matchfinder_cfg *cfg;
832
833 if (!level || level >= ARRAY_SIZE(kite_mfcfg))
834 return -EINVAL;
835 cfg = &kite_mfcfg[level];
836
837 if (wsiz > kHistorySize32 || (1 << ilog2(wsiz)) != wsiz)
838 return -EINVAL;
839
840 mf->hash = calloc(0x10000, sizeof(mf->hash[0]));
841 if (!mf->hash)
842 return -ENOMEM;
843
844 mf->chain = malloc(sizeof(mf->chain[0]) * wsiz);
845 if (!mf->chain) {
846 free(mf->hash);
847 mf->hash = NULL;
848 return -ENOMEM;
849 }
850 mf->wsiz = wsiz;
851
852 mf->good_len = cfg->good_length;
853 mf->nice_len = cfg->nice_length;
854 mf->depth = cfg->depth;
855 mf->max_lazy = cfg->max_lazy;
856 return cfg->lazy_search;
857 }
858
kite_mf_reset(struct kite_matchfinder * mf,const void * buffer,const void * end)859 static void kite_mf_reset(struct kite_matchfinder *mf,
860 const void *buffer, const void *end)
861 {
862 mf->buffer = buffer;
863 mf->end = end;
864
865 /*
866 * Set the initial value as max_distance + 1. This would avoid hash
867 * zero initialization.
868 */
869 mf->base += mf->offset + kHistorySize32 + 1;
870
871 /*
872 * Unlike other LZ encoders like liblzma [1], we simply reset the hash
873 * chain instead of normalization. This avoids extra complexity, as we
874 * don't consider extreme large input buffers in one go.
875 *
876 * [1] https://github.com/tukaani-project/xz/blob/v5.4.0/src/liblzma/lz/lz_encoder_mf.c#L94
877 */
878 if (__erofs_unlikely(mf->base > ((typeof(mf->base))-1) >> 1)) {
879 mf->base = kHistorySize32 + 1;
880 memset(mf->hash, 0, 0x10000 * sizeof(mf->hash[0]));
881 }
882 mf->offset = 0;
883 mf->cyclic_pos = 0;
884
885 mf->matches = mf->matches_matrix[0];
886 mf->matches_matrix[0][0].len =
887 mf->matches_matrix[1][0].len = kMatchMinLen - 1;
888 }
889
deflate_count_code(struct kite_deflate * s,bool literal,unsigned int lenSlot,unsigned int distSlot)890 static bool deflate_count_code(struct kite_deflate *s, bool literal,
891 unsigned int lenSlot, unsigned int distSlot)
892 {
893 struct kite_deflate_table *t = s->tab;
894 unsigned int lenbase = (literal ? 0 : kSymbolMatch);
895 u64 rem = (s->outlen - s->pos_out) * 8 - s->bitpos;
896 bool recalc = false;
897 unsigned int bits;
898
899 s->freq_changed = true;
900 ++s->mainFreqs[lenbase + lenSlot];
901 if (!literal)
902 ++s->distFreqs[distSlot];
903
904 if (s->encode_mode == 1) {
905 if (literal) {
906 bits = kstaticHuff_litLenLevels[lenSlot];
907 goto out;
908 }
909 bits = kstaticHuff_litLenLevels[kSymbolMatch + lenSlot] +
910 kLenExtraBits32[lenSlot] + 5 + kDistExtraBits[distSlot];
911 goto out;
912 }
913
914 /* XXX: more ideas to be done later */
915 recalc |= (!literal && !t->distLevels[distSlot]);
916 recalc |= !t->litLenLevels[lenbase + lenSlot];
917 if (recalc) {
918 kite_dbg("recalc %c lS %u dS %u", literal ? 'l' : 'm',
919 lenSlot, distSlot);
920 s->tab = s->tables + (s->tab == s->tables);
921 kite_deflate_fixdynblock(s);
922 bits = 0;
923 goto out;
924 }
925
926 if (literal) {
927 bits = t->litLenLevels[lenSlot];
928 goto out;
929 }
930
931 bits = t->distLevels[distSlot] + kDistExtraBits[distSlot] +
932 t->litLenLevels[kSymbolMatch + lenSlot] +
933 kLenExtraBits32[lenSlot];
934 out:
935 if (rem < s->costbits + bits) {
936 --s->mainFreqs[lenbase + lenSlot];
937 if (!literal)
938 --s->distFreqs[distSlot];
939 if (recalc)
940 s->tab = s->tables + (s->tab == s->tables);
941 return false;
942 }
943 s->costbits += bits;
944 return true;
945 }
946
kite_deflate_tally(struct kite_deflate * s,struct kite_match * match)947 static bool kite_deflate_tally(struct kite_deflate *s,
948 struct kite_match *match)
949 {
950 struct kite_deflate_symbol *sym = s->sym + s->symbols;
951 u32 fixedcost = ~0;
952 bool hassp;
953
954 *sym = (struct kite_deflate_symbol) {
955 .len = match->len,
956 .dist = match->dist,
957 };
958
959 retry:
960 if (sym->len < kMatchMinLen) {
961 hassp = deflate_count_code(s, true, sym->dist, 0);
962 } else {
963 unsigned int lc = sym->len - kMatchMinLen;
964 unsigned int lenSlot = g_LenSlots[lc];
965 unsigned int distSlot = deflateDistSlot(sym->dist - 1);
966
967 hassp = deflate_count_code(s, false, lenSlot, distSlot);
968 }
969
970 if (!hassp) {
971 if (s->encode_mode == 1) {
972 fixedcost = s->costbits;
973 s->encode_mode = 2;
974 goto retry;
975 }
976 s->lastblock = true;
977 if (fixedcost <= s->costbits)
978 s->encode_mode = 1;
979 return true;
980 }
981 ++s->symbols;
982 return false;
983 }
984
kite_deflate_writestore(struct kite_deflate * s)985 static void kite_deflate_writestore(struct kite_deflate *s)
986 {
987 bool fb = !s->startpos && !s->bitpos;
988 unsigned int totalsiz = s->pos_in - s->prev_valid - s->startpos;
989
990 do {
991 unsigned int len = min_t(unsigned int, totalsiz, 65535);
992
993 totalsiz -= len;
994 writebits(s, (fb << 3) | (kStored << 1) |
995 (s->lastblock && !totalsiz), 3 + fb);
996 flushbits(s);
997 writebits(s, len, 16);
998 writebits(s, len ^ 0xffff, 16);
999 flushbits(s);
1000 memcpy(s->out + s->pos_out, s->in + s->startpos, len);
1001 s->pos_out += len;
1002 s->startpos += len;
1003 } while (totalsiz);
1004 }
1005
kite_deflate_endblock(struct kite_deflate * s)1006 static void kite_deflate_endblock(struct kite_deflate *s)
1007 {
1008 if (s->encode_mode == 1) {
1009 u32 fixedcost = s->costbits;
1010 unsigned int storelen, storeblocks, storecost;
1011
1012 kite_deflate_fixdynblock(s);
1013 if (fixedcost > s->costbits)
1014 s->encode_mode = 2;
1015 else
1016 s->costbits = fixedcost;
1017
1018 storelen = s->pos_in - s->prev_valid - s->startpos;
1019 storeblocks = max(DIV_ROUND_UP(storelen, 65535), 1U);
1020 storecost = (8 - s->bitpos) + storeblocks - 1 +
1021 storeblocks * 32 + storelen * 8;
1022 if (s->costbits > storecost) {
1023 s->costbits = storecost;
1024 s->encode_mode = 0;
1025 }
1026 }
1027
1028 s->lastblock |= (s->costbits + s->bitpos >=
1029 (s->outlen - s->pos_out) * 8);
1030 }
1031
kite_deflate_startblock(struct kite_deflate * s)1032 static void kite_deflate_startblock(struct kite_deflate *s)
1033 {
1034 memset(s->mainFreqs, 0, sizeof(s->mainFreqs));
1035 memset(s->distFreqs, 0, sizeof(s->distFreqs));
1036 memset(s->tables, 0, sizeof(s->tables[0]));
1037 s->symbols = 0;
1038 s->mainFreqs[kSymbolEndOfBlock]++;
1039 s->encode_mode = 1;
1040 s->tab = s->tables;
1041 s->costbits = 3 + kstaticHuff_litLenLevels[kSymbolEndOfBlock];
1042 }
1043
kite_deflate_commitblock(struct kite_deflate * s)1044 static bool kite_deflate_commitblock(struct kite_deflate *s)
1045 {
1046 if (s->encode_mode == 1) {
1047 kite_deflate_setfixedtrees(s);
1048 kite_deflate_writeblock(s, true);
1049 } else if (s->encode_mode == 2) {
1050 kite_deflate_sendtrees(s);
1051 kite_deflate_writeblock(s, false);
1052 } else {
1053 kite_deflate_writestore(s);
1054 }
1055 s->startpos = s->pos_in - s->prev_valid;
1056 return s->lastblock;
1057 }
1058
kite_deflate_fast(struct kite_deflate * s)1059 static bool kite_deflate_fast(struct kite_deflate *s)
1060 {
1061 struct kite_matchfinder *mf = s->mf;
1062
1063 kite_deflate_startblock(s);
1064 while (1) {
1065 int matches = kite_mf_getmatches_hc3(mf, mf->depth,
1066 kMatchMinLen - 1);
1067
1068 if (matches > 0) {
1069 unsigned int len = mf->matches[matches].len;
1070 unsigned int dist = mf->matches[matches].dist;
1071
1072 if (len == kMatchMinLen && dist > ZLIB_DISTANCE_TOO_FAR)
1073 goto nomatch;
1074
1075 kite_dbg("%u matches found: longest [%u,%u] of distance %u",
1076 matches, s->pos_in, s->pos_in + len - 1, dist);
1077
1078 if (kite_deflate_tally(s, mf->matches + matches))
1079 break;
1080 s->pos_in += len;
1081 /* skip the rest bytes */
1082 while (--len)
1083 kite_mf_hc3_skip(mf);
1084 } else {
1085 nomatch:
1086 mf->matches[0].dist = s->in[s->pos_in];
1087 if (isprint(s->in[s->pos_in]))
1088 kite_dbg("literal %c pos_in %u", s->in[s->pos_in], s->pos_in);
1089 else
1090 kite_dbg("literal %x pos_in %u", s->in[s->pos_in], s->pos_in);
1091
1092 if (kite_deflate_tally(s, mf->matches))
1093 break;
1094 ++s->pos_in;
1095 }
1096
1097 s->lastblock |= (s->pos_in >= s->inlen);
1098 if (s->pos_in >= s->inlen || s->symbols >= s->max_symbols) {
1099 kite_deflate_endblock(s);
1100 break;
1101 }
1102 }
1103 return kite_deflate_commitblock(s);
1104 }
1105
kite_deflate_slow(struct kite_deflate * s)1106 static bool kite_deflate_slow(struct kite_deflate *s)
1107 {
1108 struct kite_matchfinder *mf = s->mf;
1109 bool flush = false;
1110
1111 kite_deflate_startblock(s);
1112 while (1) {
1113 struct kite_match *prev_matches = mf->matches;
1114 unsigned int len = kMatchMinLen - 1;
1115 int matches;
1116 unsigned int len0;
1117
1118 mf->matches = mf->matches_matrix[
1119 mf->matches == mf->matches_matrix[0]];
1120 mf->matches[0].dist = s->in[s->pos_in];
1121
1122 len0 = prev_matches[s->prev_longest].len;
1123 if (len0 < mf->max_lazy) {
1124 matches = kite_mf_getmatches_hc3(mf, mf->depth >>
1125 (len0 >= mf->good_len), len0);
1126 if (matches > 0) {
1127 len = mf->matches[matches].len;
1128 if (len == kMatchMinLen &&
1129 mf->matches[matches].dist > ZLIB_DISTANCE_TOO_FAR) {
1130 matches = 0;
1131 len = kMatchMinLen - 1;
1132 }
1133 } else {
1134 matches = 0;
1135 }
1136 } else {
1137 matches = 0;
1138 kite_mf_hc3_skip(mf);
1139 }
1140
1141 if (len < len0) {
1142 if (kite_deflate_tally(s,
1143 prev_matches + s->prev_longest))
1144 break;
1145
1146 s->pos_in += --len0;
1147 /* skip the rest bytes */
1148 while (--len0)
1149 kite_mf_hc3_skip(mf);
1150 s->prev_valid = false;
1151 s->prev_longest = 0;
1152 } else {
1153 if (!s->prev_valid)
1154 s->prev_valid = true;
1155 else if (kite_deflate_tally(s, prev_matches))
1156 break;
1157 ++s->pos_in;
1158 s->prev_longest = matches;
1159 }
1160
1161 s->lastblock |= (s->pos_in >= s->inlen);
1162 if (s->pos_in >= s->inlen) {
1163 flush = true;
1164 break;
1165 }
1166 if (s->symbols >= s->max_symbols) {
1167 kite_deflate_endblock(s);
1168 break;
1169 }
1170 }
1171
1172 if (flush && s->prev_valid) {
1173 (void)kite_deflate_tally(s, mf->matches + s->prev_longest);
1174 s->prev_valid = false;
1175 }
1176 return kite_deflate_commitblock(s);
1177 }
1178
kite_deflate_end(struct kite_deflate * s)1179 void kite_deflate_end(struct kite_deflate *s)
1180 {
1181 if (s->mf) {
1182 if (s->mf->hash)
1183 free(s->mf->hash);
1184 if (s->mf->chain)
1185 free(s->mf->chain);
1186 free(s->mf);
1187 }
1188 if (s->sym)
1189 free(s->sym);
1190 free(s);
1191 }
1192
kite_deflate_init(int level,unsigned int dict_size)1193 struct kite_deflate *kite_deflate_init(int level, unsigned int dict_size)
1194 {
1195 struct kite_deflate *s;
1196 int err;
1197
1198 kite_deflate_init_once();
1199 s = calloc(1, sizeof(*s));
1200 if (!s)
1201 return ERR_PTR(-ENOMEM);
1202
1203 s->max_symbols = 16384;
1204 s->sym = malloc(sizeof(s->sym[0]) * s->max_symbols);
1205 if (!s->sym) {
1206 err = -ENOMEM;
1207 goto err_out;
1208 }
1209
1210 s->mf = malloc(sizeof(*s->mf));
1211 if (!s->mf) {
1212 err = -ENOMEM;
1213 goto err_out;
1214 }
1215
1216 if (!dict_size)
1217 dict_size = kHistorySize32;
1218
1219 err = kite_mf_init(s->mf, dict_size, level);
1220 if (err < 0)
1221 goto err_out;
1222
1223 s->lazy_search = err;
1224 return s;
1225 err_out:
1226 if (s->mf)
1227 free(s->mf);
1228 if (s->sym)
1229 free(s->sym);
1230 free(s);
1231 return ERR_PTR(err);
1232 }
1233
kite_deflate_destsize(struct kite_deflate * s,const u8 * in,u8 * out,unsigned int * srcsize,unsigned int target_dstsize)1234 int kite_deflate_destsize(struct kite_deflate *s, const u8 *in, u8 *out,
1235 unsigned int *srcsize, unsigned int target_dstsize)
1236 {
1237 memset(s, 0, offsetof(struct kite_deflate, mainFreqs));
1238 s->in = in;
1239 s->inlen = *srcsize;
1240 s->out = out;
1241 s->outlen = target_dstsize;
1242 kite_mf_reset(s->mf, in, in + s->inlen);
1243
1244 if (s->lazy_search)
1245 while (!kite_deflate_slow(s));
1246 else
1247 while (!kite_deflate_fast(s));
1248 flushbits(s);
1249
1250 *srcsize = s->startpos;
1251 return s->pos_out;
1252 }
1253
1254 #if TEST
1255 #include <unistd.h>
1256 #include <fcntl.h>
1257 #include <sys/mman.h>
1258
main(int argc,char * argv[])1259 int main(int argc, char *argv[])
1260 {
1261 int fd;
1262 u64 filelength;
1263 u8 out[1048576], *buf;
1264 int dstsize = 4096;
1265 unsigned int srcsize, outsize;
1266 struct kite_deflate *s;
1267
1268 fd = open(argv[1], O_RDONLY);
1269 if (fd < 0)
1270 return -errno;
1271 if (argc > 2)
1272 dstsize = atoi(argv[2]);
1273 filelength = lseek(fd, 0, SEEK_END);
1274
1275 s = kite_deflate_init(9, 0);
1276 if (IS_ERR(s))
1277 return PTR_ERR(s);
1278
1279 filelength = lseek(fd, 0, SEEK_END);
1280 buf = mmap(NULL, filelength, PROT_READ, MAP_SHARED, fd, 0);
1281 if (buf == MAP_FAILED)
1282 return -errno;
1283 close(fd);
1284
1285 srcsize = filelength;
1286 outsize = kite_deflate_destsize(s, buf, out, &srcsize, dstsize);
1287 fd = open("out.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644);
1288 write(fd, out, outsize);
1289 close(fd);
1290 kite_deflate_end(s);
1291 return 0;
1292 }
1293 #endif
1294