xref: /aosp_15_r20/external/erofs-utils/lib/kite_deflate.c (revision 33b1fccf6a0fada2c2875d400ed01119b7676ee5)
1 // SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
2 /*
3  * erofs-utils/lib/kite_deflate.c
4  *
5  * Copyright (C) 2023, Alibaba Cloud
6  * Copyright (C) 2023, Gao Xiang <[email protected]>
7  */
8 #include "erofs/defs.h"
9 #include "erofs/print.h"
10 #include <errno.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <ctype.h>
14 #include <stdio.h>
15 
16 unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
17 			    unsigned long sz);
18 
19 #ifdef TEST
20 #define kite_dbg(x, ...)	fprintf(stderr, x "\n", ##__VA_ARGS__)
21 #else
22 #define kite_dbg(x, ...)
23 #endif
24 
25 #define kHistorySize32		(1U << 15)
26 
27 #define kNumLenSymbols32	256
28 #define kNumLenSymbolsMax	kNumLenSymbols32
29 
30 #define kSymbolEndOfBlock	256
31 #define kSymbolMatch		(kSymbolEndOfBlock + 1)
32 #define kNumLenSlots		29
33 #define kMainTableSize		(kSymbolMatch + kNumLenSlots)
34 
35 #define kFixedLenTableSize	(kSymbolMatch + 31)
36 #define FixedDistTableSize	32
37 
38 #define kMainTableSize		(kSymbolMatch + kNumLenSlots)
39 #define kDistTableSize32	30
40 
41 #define kNumLitLenCodesMin	257
42 #define kNumDistCodesMin	1
43 
44 #define kNumLensCodesMin	4
45 #define kLensTableSize		19
46 
47 #define kMatchMinLen		3
48 #define kMatchMaxLen32		kNumLenSymbols32 + kMatchMinLen - 1
49 
50 #define kTableDirectLevels      16
51 #define kBitLensRepNumber_3_6   kTableDirectLevels
52 #define kBitLens0Number_3_10    (kBitLensRepNumber_3_6 + 1)
53 #define kBitLens0Number_11_138  (kBitLens0Number_3_10 + 1)
54 
55 static u32 kstaticHuff_mainCodes[kFixedLenTableSize];
56 static const u8 kstaticHuff_litLenLevels[kFixedLenTableSize] = {
57 	[0   ... 143] = 8, [144 ... 255] = 9,
58 	[256 ... 279] = 7, [280 ... 287] = 8,
59 };
60 static u32 kstaticHuff_distCodes[kFixedLenTableSize];
61 
62 const u8 kLenStart32[kNumLenSlots] =
63 	{0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224, 255};
64 
65 const u8 kLenExtraBits32[kNumLenSlots] =
66 	{0,0,0,0,0,0,0,0,1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,  4,  5,
67 	 5,  5,  5, 0};
68 
69 /* First normalized distance for each code (0 = distance of 1) */
70 const u32 kDistStart[kDistTableSize32] =
71 	{0,1,2,3,4,6,8,12,16,24,32,48,64,96,128,192,256,384,512,768,
72 	 1024,1536,2048,3072,4096,6144,8192,12288,16384,24576};
73 
74 /* extra bits for each distance code */
75 const u8 kDistExtraBits[kDistTableSize32] =
76 	{0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
77 
78 const u8 kCodeLengthAlphabetOrder[kLensTableSize] =
79 	{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
80 
81 const u8 kLevelExtraBits[3] = {2, 3, 7};
82 
83 #define kStored			0
84 #define kFixedHuffman		1
85 #define kDynamicHuffman		2
86 
87 struct kite_deflate_symbol {
88 	u16 len, dist;
89 };
90 
91 struct kite_deflate_table {
92 	u32  mainCodes[kMainTableSize];
93 	u8   litLenLevels[kMainTableSize];
94 	u32  distCodes[kDistTableSize32];
95 	u8   distLevels[kDistTableSize32];
96 	u32  levelCodes[kLensTableSize];
97 	u8   levelLens[kLensTableSize];
98 
99 	u8   numdistlens, numblcodes;
100 	u16  numlitlens;
101 };
102 
103 struct kite_deflate {
104 	struct kite_deflate_table  *tab;
105 	const u8   *in;
106 	u8         *out;
107 
108 	u32  inlen, outlen;
109 	u32  pos_in, pos_out;
110 	u32  inflightbits;
111 	u8   bitpos;
112 	u8   numHuffBits;
113 	u32  symbols;
114 
115 	u32  costbits, startpos;
116 	u8   encode_mode;
117 	bool freq_changed, lastblock;
118 
119 	/* Previous match for lazy matching */
120 	bool prev_valid;
121 	u16 prev_longest;
122 
123 	u32  mainFreqs[kMainTableSize];
124 	u32  distFreqs[kDistTableSize32];
125 	struct kite_deflate_table tables[2];
126 
127 	/* don't reset the following fields */
128 	struct kite_matchfinder *mf;
129 	struct kite_deflate_symbol *sym;
130 	u32 max_symbols;
131 	bool lazy_search;
132 };
133 
134 #define ZLIB_DISTANCE_TOO_FAR	4096
135 
136 static u8 g_LenSlots[kNumLenSymbolsMax];
137 
138 #define kNumLogBits 9		// do not change it
139 static u8 g_FastPos[1 << kNumLogBits];
140 
writebits(struct kite_deflate * s,unsigned int v,u8 bits)141 static void writebits(struct kite_deflate *s, unsigned int v, u8 bits)
142 {
143 	unsigned int rem = sizeof(s->inflightbits) * 8 - s->bitpos;
144 
145 	s->inflightbits |= (v << s->bitpos) & (!rem - 1);
146 	if (bits > rem) {
147 		u8 *out = s->out + s->pos_out;
148 
149 		out[0] = s->inflightbits & 0xff;
150 		out[1] = (s->inflightbits >> 8) & 0xff;
151 		out[2] = (s->inflightbits >> 16) & 0xff;
152 		out[3] = (s->inflightbits >> 24) & 0xff;
153 		s->pos_out += 4;
154 		DBG_BUGON(s->pos_out > s->outlen);
155 		s->inflightbits = v >> rem;
156 		s->bitpos = bits - rem;
157 		return;
158 	}
159 	s->bitpos += bits;
160 }
161 
flushbits(struct kite_deflate * s)162 static void flushbits(struct kite_deflate *s)
163 {
164 	u8 *out = s->out + s->pos_out;
165 
166 	if (!s->bitpos)
167 		return;
168 	out[0] = s->inflightbits & 0xff;
169 	if (s->bitpos >= 8) {
170 		out[1] = (s->inflightbits >> 8) & 0xff;
171 		if (s->bitpos >= 16) {
172 			out[2] = (s->inflightbits >> 16) & 0xff;
173 			if (s->bitpos >= 24)
174 				out[3] = (s->inflightbits >> 24) & 0xff;
175 		}
176 	}
177 	s->pos_out += round_up(s->bitpos, 8) >> 3;
178 	DBG_BUGON(s->pos_out > s->outlen);
179 	s->bitpos = 0;
180 	s->inflightbits = 0;
181 }
182 
183 #define kMaxLen 16
184 
deflate_genhuffcodes(const u8 * lens,u32 * p,unsigned int nr_codes,const u32 * bl_count)185 static void deflate_genhuffcodes(const u8 *lens, u32 *p, unsigned int nr_codes,
186 				 const u32 *bl_count)
187 {
188 	u32 nextCodes[kMaxLen + 1];	/* next code value for each bit length */
189 	unsigned int code = 0;		/* running code value */
190 	unsigned int bits, k;
191 
192 	for (bits = 1; bits <= kMaxLen; ++bits) {
193 		code = (code + bl_count[bits - 1]) << 1;
194 		nextCodes[bits] = code;
195 	}
196 
197 	DBG_BUGON(code + bl_count[kMaxLen] != 1 << kMaxLen);
198 
199 	for (k = 0; k < nr_codes; ++k)
200 		p[k] = nextCodes[lens[k]]++;
201 }
202 
deflate_reversebits_one(u32 code,u8 bits)203 static u32 deflate_reversebits_one(u32 code, u8 bits)
204 {
205 	unsigned int x = code;
206 
207 	x = ((x & 0x5555) << 1) | ((x & 0xAAAA) >> 1);
208 	x = ((x & 0x3333) << 2) | ((x & 0xCCCC) >> 2);
209 	x = ((x & 0x0F0F) << 4) | ((x & 0xF0F0) >> 4);
210 
211 	return (((x & 0x00FF) << 8) | ((x & 0xFF00) >> 8)) >> (16 - bits);
212 }
213 
Huffman_ReverseBits(u32 * codes,const u8 * lens,unsigned int n)214 static void Huffman_ReverseBits(u32 *codes, const u8 *lens, unsigned int n)
215 {
216 	while (n) {
217 		u32 code = *codes;
218 
219 		*codes++ = deflate_reversebits_one(code, *lens++);
220 		--n;
221 	}
222 }
223 
kite_deflate_init_once(void)224 static void kite_deflate_init_once(void)
225 {
226 	static const u32 static_bl_count[kMaxLen + 1] = {
227 		[7] = 279 - 256 + 1,
228 		[8] = (143 + 1) + (287 - 280 + 1),
229 		[9] = 255 - 144 + 1,
230 	};
231 	unsigned int i, c, j, k;
232 
233 	if (kstaticHuff_distCodes[31])
234 		return;
235 	deflate_genhuffcodes(kstaticHuff_litLenLevels, kstaticHuff_mainCodes,
236 			     kFixedLenTableSize, static_bl_count);
237 	Huffman_ReverseBits(kstaticHuff_mainCodes, kstaticHuff_litLenLevels,
238 			    kFixedLenTableSize);
239 
240 	for (i = 0; i < ARRAY_SIZE(kstaticHuff_distCodes); ++i)
241 		kstaticHuff_distCodes[i] = deflate_reversebits_one(i, 5);
242 
243 	for (i = 0; i < kNumLenSlots; i++) {
244 		c = kLenStart32[i];
245 		j = 1 << kLenExtraBits32[i];
246 
247 		for (k = 0; k < j; k++, c++)
248 			g_LenSlots[c] = (u8)i;
249 	}
250 
251 	c = 0;
252 	for (i = 0; i < /*kFastSlots*/ kNumLogBits * 2; i++) {
253 		k = 1 << kDistExtraBits[i];
254 		for (j = 0; j < k; j++)
255 			g_FastPos[c++] = i;
256 	}
257 }
258 
kite_deflate_scanlens(unsigned int numlens,u8 * lens,u32 * freqs)259 static void kite_deflate_scanlens(unsigned int numlens, u8 *lens, u32 *freqs)
260 {
261 	int n;				/* iterates over all tree elements */
262 	int prevlen = -1;		/* last emitted length */
263 	int curlen;			/* length of current code */
264 	int nextlen = lens[0];		/* length of next code */
265 	int count = 0;			/* repeat count of the current code */
266 	int max_count = 7;		/* max repeat count */
267 	int min_count = 4;		/* min repeat count */
268 
269 	if (!nextlen)
270 		max_count = 138, min_count = 3;
271 
272 	for (n = 0; n < numlens; n++) {
273 		curlen = nextlen;
274 		nextlen = n + 1 < numlens ? lens[n + 1] : -1;
275 		++count;
276 
277 		if (count < max_count && curlen == nextlen)
278 			continue;
279 		if (count < min_count) {
280 			freqs[curlen] += count;
281 		} else if (curlen != 0) {
282 			if (curlen != prevlen)
283 				freqs[curlen]++;
284 			freqs[kBitLensRepNumber_3_6]++;
285 		} else if (count <= 10) {
286 			freqs[kBitLens0Number_3_10]++;
287 		} else {
288 			freqs[kBitLens0Number_11_138]++;
289 		}
290 
291 		count = 0;
292 		prevlen = curlen;
293 		if (!nextlen)
294 			max_count = 138, min_count = 3;
295 		else if (curlen == nextlen)
296 			max_count = 6, min_count = 3;
297 		else
298 			max_count = 7, min_count = 4;
299 	}
300 }
301 
kite_deflate_sendtree(struct kite_deflate * s,const u8 * lens,unsigned int numlens)302 static void kite_deflate_sendtree(struct kite_deflate *s, const u8 *lens,
303 				  unsigned int numlens)
304 {
305 	int n;				/* iterates over all tree elements */
306 	int prevlen = -1;		/* last emitted length */
307 	int curlen;			/* length of current code */
308 	int nextlen = lens[0];		/* length of next code */
309 	int count = 0;			/* repeat count of the current code */
310 	int max_count = 7;		/* max repeat count */
311 	int min_count = 4;		/* min repeat count */
312 	const u8 *bl_lens = s->tab->levelLens;
313 	const u32 *bl_codes = s->tab->levelCodes;
314 
315 	if (!nextlen)
316 		max_count = 138, min_count = 3;
317 
318 	for (n = 0; n < numlens; n++) {
319 		curlen = nextlen;
320 		nextlen = n + 1 < numlens ? lens[n + 1] : -1;
321 		++count;
322 
323 		if (count < max_count && curlen == nextlen)
324 			continue;
325 		if (count < min_count) {
326 			do {
327 				writebits(s, bl_codes[curlen], bl_lens[curlen]);
328 			} while (--count);
329 		} else if (curlen) {
330 			if (curlen != prevlen) {
331 				writebits(s, bl_codes[curlen], bl_lens[curlen]);
332 				count--;
333 			}
334 			writebits(s, bl_codes[kBitLensRepNumber_3_6],
335 				  bl_lens[kBitLensRepNumber_3_6]);
336 			writebits(s, count - 3, 2);
337 		} else if (count <= 10) {
338 			writebits(s, bl_codes[kBitLens0Number_3_10],
339 				  bl_lens[kBitLens0Number_3_10]);
340 			writebits(s, count - 3, 3);
341 		} else {
342 			writebits(s, bl_codes[kBitLens0Number_11_138],
343 				  bl_lens[kBitLens0Number_11_138]);
344 			writebits(s, count - 11, 7);
345 		}
346 
347 		count = 0;
348 		prevlen = curlen;
349 		if (!nextlen)
350 			max_count = 138, min_count = 3;
351 		else if (curlen == nextlen)
352 			max_count = 6, min_count = 3;
353 		else
354 			max_count = 7, min_count = 4;
355 	}
356 }
357 
kite_deflate_setfixedtrees(struct kite_deflate * s)358 static void kite_deflate_setfixedtrees(struct kite_deflate *s)
359 {
360 	writebits(s, (kFixedHuffman << 1) + s->lastblock, 3);
361 }
362 
kite_deflate_sendtrees(struct kite_deflate * s)363 static void kite_deflate_sendtrees(struct kite_deflate *s)
364 {
365 	struct kite_deflate_table *t = s->tab;
366 	unsigned int i;
367 
368 	writebits(s, (kDynamicHuffman << 1) + s->lastblock, 3);
369 	writebits(s, t->numlitlens - kNumLitLenCodesMin, 5);
370 	writebits(s, t->numdistlens - kNumDistCodesMin,  5);
371 	writebits(s, t->numblcodes - kNumLensCodesMin,   4);
372 
373 	for (i = 0; i < t->numblcodes; i++)
374 		writebits(s, t->levelLens[kCodeLengthAlphabetOrder[i]], 3);
375 
376 	Huffman_ReverseBits(t->levelCodes, t->levelLens, kLensTableSize);
377 	kite_deflate_sendtree(s, t->litLenLevels, t->numlitlens);
378 	kite_deflate_sendtree(s, t->distLevels, t->numdistlens);
379 }
380 
deflateDistSlot(unsigned int pos)381 static inline unsigned int deflateDistSlot(unsigned int pos)
382 {
383 	const unsigned int zz = (kNumLogBits - 1) &
384 		((((1U << kNumLogBits) - 1) - pos) >> (31 - 3));
385 
386 	return g_FastPos[pos >> zz] + (zz * 2);
387 }
388 
kite_deflate_writeblock(struct kite_deflate * s,bool fixed)389 static void kite_deflate_writeblock(struct kite_deflate *s, bool fixed)
390 {
391 	int i;
392 	u32 *mainCodes, *distCodes;
393 	const u8 *litLenLevels, *distLevels;
394 
395 	if (!fixed) {
396 		struct kite_deflate_table *t = s->tab;
397 
398 		mainCodes = t->mainCodes; distCodes = t->distCodes;
399 		litLenLevels = t->litLenLevels;	distLevels = t->distLevels;
400 
401 		Huffman_ReverseBits(mainCodes, litLenLevels, kMainTableSize);
402 		Huffman_ReverseBits(distCodes, distLevels, kDistTableSize32);
403 	} else {
404 		mainCodes = kstaticHuff_mainCodes;
405 		distCodes = kstaticHuff_distCodes;
406 
407 		litLenLevels = kstaticHuff_litLenLevels;
408 		distLevels = NULL;
409 	}
410 
411 	for (i = 0; i < s->symbols; ++i) {
412 		struct kite_deflate_symbol *sym = &s->sym[i];
413 
414 		if (sym->len < kMatchMinLen) {		/* literal */
415 			writebits(s, mainCodes[sym->dist],
416 				  litLenLevels[sym->dist]);
417 		} else {
418 			unsigned int lenSlot, distSlot;
419 			unsigned int lc = sym->len - kMatchMinLen;
420 
421 			lenSlot = g_LenSlots[lc];
422 			writebits(s, mainCodes[kSymbolMatch + lenSlot],
423 				  litLenLevels[kSymbolMatch + lenSlot]);
424 			writebits(s, lc - kLenStart32[lenSlot],
425 				  kLenExtraBits32[lenSlot]);
426 
427 			distSlot = deflateDistSlot(sym->dist - 1);
428 			writebits(s, distCodes[distSlot],
429 				  fixed ? 5 : distLevels[distSlot]);
430 			writebits(s, sym->dist - 1 - kDistStart[distSlot],
431 				  kDistExtraBits[distSlot]);
432 		}
433 	}
434 	writebits(s, mainCodes[kSymbolEndOfBlock],
435 		  litLenLevels[kSymbolEndOfBlock]);
436 }
437 
Huffman_GetPrice(const u32 * freqs,const u8 * lens,u32 num)438 static u32 Huffman_GetPrice(const u32 *freqs, const u8 *lens, u32 num)
439 {
440 	u32 price = 0;
441 
442 	while (num) {
443 		price += (*lens++) * (*freqs++);
444 		--num;
445 	}
446 	return price;
447 }
448 
Huffman_GetPriceEx(const u32 * freqs,const u8 * lens,u32 num,const u8 * extraBits,u32 extraBase)449 static u32 Huffman_GetPriceEx(const u32 *freqs, const u8 *lens, u32 num,
450 			      const u8 *extraBits, u32 extraBase)
451 {
452 	return Huffman_GetPrice(freqs, lens, num) +
453 		Huffman_GetPrice(freqs + extraBase, extraBits, num - extraBase);
454 }
455 
456 /* Adapted from C/HuffEnc.c (7zip) for now */
457 #define HeapSortDown(p, k, size, temp) \
458   { for (;;) { \
459     size_t s = (k << 1); \
460     if (s > size) break; \
461     if (s < size && p[s + 1] > p[s]) s++; \
462     if (temp >= p[s]) break; \
463     p[k] = p[s]; k = s; \
464   } p[k] = temp; }
465 
HeapSort(u32 * p,size_t size)466 static void HeapSort(u32 *p, size_t size)
467 {
468   if (size <= 1)
469     return;
470   p--;
471   {
472     size_t i = size / 2;
473     do
474     {
475       u32 temp = p[i];
476       size_t k = i;
477       HeapSortDown(p, k, size, temp)
478     }
479     while (--i != 0);
480   }
481   /*
482   do
483   {
484     size_t k = 1;
485     UInt32 temp = p[size];
486     p[size--] = p[1];
487     HeapSortDown(p, k, size, temp)
488   }
489   while (size > 1);
490   */
491   while (size > 3)
492   {
493     u32 temp = p[size];
494     size_t k = (p[3] > p[2]) ? 3 : 2;
495     p[size--] = p[1];
496     p[1] = p[k];
497     HeapSortDown(p, k, size, temp)
498   }
499   {
500     u32 temp = p[size];
501     p[size] = p[1];
502     if (size > 2 && p[2] < temp)
503     {
504       p[1] = p[2];
505       p[2] = temp;
506     }
507     else
508       p[1] = temp;
509   }
510 }
511 
512 #define NUM_BITS 10
513 #define MASK (((unsigned)1 << NUM_BITS) - 1)
514 
Huffman_Generate(const u32 * freqs,u32 * p,u8 * lens,unsigned int numSymbols,unsigned int maxLen)515 static void Huffman_Generate(const u32 *freqs, u32 *p, u8 *lens,
516 			     unsigned int numSymbols, unsigned int maxLen)
517 {
518 	u32 num, i;
519 
520 	num = 0;
521 	/* if (maxLen > 10) maxLen = 10; */
522 
523 	for (i = 0; i < numSymbols; i++) {
524 		u32 freq = freqs[i];
525 
526 		if (!freq)
527 			lens[i] = 0;
528 		else
529 			p[num++] = i | (freq << NUM_BITS);
530 	}
531 	HeapSort(p, num);
532 
533 	if (num < 2) {
534 		unsigned int minCode = 0, maxCode = 1;
535 
536 		if (num == 1) {
537 			maxCode = (unsigned int)p[0] & MASK;
538 			if (!maxCode)
539 				maxCode++;
540 		}
541 		p[minCode] = 0;
542 		p[maxCode] = 1;
543 		lens[minCode] = lens[maxCode] = 1;
544 		return;
545 	}
546 
547 	{
548 		u32 b, e, i;
549 
550 		i = b = e = 0;
551 		do {
552 			u32 n, m, freq;
553 
554 			n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
555 			freq = (p[n] & ~MASK);
556 			p[n] = (p[n] & MASK) | (e << NUM_BITS);
557 			m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
558 			freq += (p[m] & ~MASK);
559 			p[m] = (p[m] & MASK) | (e << NUM_BITS);
560 			p[e] = (p[e] & MASK) | freq;
561 			e++;
562 		} while (num - e > 1);
563 
564 		{
565 			u32 lenCounters[kMaxLen + 1];
566 
567 			for (i = 0; i <= kMaxLen; i++)
568 				lenCounters[i] = 0;
569 
570 			p[--e] &= MASK;
571 			lenCounters[1] = 2;
572 			while (e > 0) {
573 				u32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1;
574 
575 				p[e] = (p[e] & MASK) | (len << NUM_BITS);
576 				if (len >= maxLen)
577 					for (len = maxLen - 1; lenCounters[len] == 0; len--);
578 				lenCounters[len]--;
579 				lenCounters[(size_t)len + 1] += 2;
580 			}
581 
582 			{
583 				u32 len;
584 
585 				i = 0;
586 				for (len = maxLen; len != 0; len--) {
587 					u32 k;
588 					for (k = lenCounters[len]; k != 0; k--)
589 						lens[p[i++] & MASK] = (u8)len;
590 				}
591 			}
592 			deflate_genhuffcodes(lens, p, numSymbols, lenCounters);
593 		}
594 	}
595 }
596 
kite_deflate_fixdynblock(struct kite_deflate * s)597 static void kite_deflate_fixdynblock(struct kite_deflate *s)
598 {
599 	struct kite_deflate_table *t = s->tab;
600 	unsigned int numlitlens, numdistlens, numblcodes;
601 	u32 levelFreqs[kLensTableSize] = {0};
602 	u32 opt_mainlen;
603 
604 	if (!s->freq_changed)
605 		return;
606 
607 	/* in order to match zlib */
608 	s->numHuffBits = kMaxLen;
609 //	s->numHuffBits = (s->symbols > 18000 ? 12 :
610 //		(s->symbols > 7000 ? 11 : (s->symbols > 2000 ? 10 : 9)));
611 
612 	Huffman_Generate(s->mainFreqs, t->mainCodes, t->litLenLevels,
613 			 kMainTableSize, s->numHuffBits);
614 	Huffman_Generate(s->distFreqs, t->distCodes, t->distLevels,
615 			 kDistTableSize32, s->numHuffBits);
616 
617 	/* code lengths for the literal/length alphabet */
618 	numlitlens = kMainTableSize;
619 	while (numlitlens > kNumLitLenCodesMin &&
620 	       !t->litLenLevels[numlitlens - 1])
621 		--numlitlens;
622 
623 	/* code lengths for the distance alphabet */
624 	numdistlens = kDistTableSize32;
625 	while (numdistlens > kNumDistCodesMin &&
626 	       !t->distLevels[numdistlens - 1])
627 		--numdistlens;
628 
629 	kite_deflate_scanlens(numlitlens, t->litLenLevels, levelFreqs);
630 	kite_deflate_scanlens(numdistlens, t->distLevels, levelFreqs);
631 	Huffman_Generate(levelFreqs, t->levelCodes, t->levelLens,
632 			 kLensTableSize, 7);
633 	numblcodes = kLensTableSize;
634 	while (numblcodes > kNumLensCodesMin &&
635 	       !t->levelLens[kCodeLengthAlphabetOrder[numblcodes - 1]])
636 		--numblcodes;
637 
638 	t->numlitlens = numlitlens;
639 	t->numdistlens = numdistlens;
640 	t->numblcodes = numblcodes;
641 
642 	opt_mainlen = Huffman_GetPriceEx(s->mainFreqs, t->litLenLevels,
643 			kMainTableSize, kLenExtraBits32, kSymbolMatch) +
644 		Huffman_GetPriceEx(s->distFreqs, t->distLevels,
645 			kDistTableSize32, kDistExtraBits, 0);
646 	s->costbits = 3 + 5 + 5 + 4 + 3 * numblcodes +
647 		Huffman_GetPriceEx(levelFreqs, t->levelLens,
648 			kLensTableSize, kLevelExtraBits, kTableDirectLevels) +
649 		opt_mainlen;
650 	s->freq_changed = false;
651 }
652 
653 
654 /*
655  * an array used used by the LZ-based encoder to hold the length-distance pairs
656  * found by LZ matchfinder.
657  */
658 struct kite_match {
659 	unsigned int len;
660 	unsigned int dist;
661 };
662 
663 struct kite_matchfinder {
664 	/* pointer to buffer with data to be compressed */
665 	const u8 *buffer;
666 
667 	/* indicate the first byte that doesn't contain valid input data */
668 	const u8 *end;
669 
670 	/* LZ matchfinder hash chain representation */
671 	u32 *hash, *chain;
672 
673 	u32 base;
674 
675 	/* indicate the next byte to run through the match finder */
676 	u32 offset;
677 
678 	u32 cyclic_pos;
679 
680 	/* maximum length of a match that the matchfinder will try to find. */
681 	u16 nice_len;
682 
683 	/* the total sliding window size */
684 	u16 wsiz;
685 
686 	/* how many rounds a matchfinder searches on a hash chain for */
687 	u16 depth;
688 
689 	/* do not perform lazy search no less than this match length */
690 	u16 max_lazy;
691 
692 	/* reduce lazy search no less than this match length */
693 	u8  good_len;
694 
695 	/* current match for lazy matching */
696 	struct kite_match *matches;
697 	struct kite_match matches_matrix[2][4];
698 };
699 
700 /*
701  * This mysterious table is just the CRC of each possible byte. It can be
702  * computed using the standard bit-at-a-time methods. The polynomial can
703  * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12.
704  * Add the implicit x^16, and you have the standard CRC-CCITT.
705  */
706 u16 const crc_ccitt_table[256] __attribute__((__aligned__(128))) = {
707 	0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
708 	0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
709 	0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
710 	0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
711 	0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
712 	0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
713 	0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
714 	0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
715 	0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
716 	0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
717 	0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
718 	0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
719 	0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
720 	0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
721 	0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
722 	0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
723 	0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
724 	0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
725 	0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
726 	0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
727 	0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
728 	0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
729 	0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
730 	0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
731 	0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
732 	0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
733 	0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
734 	0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
735 	0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
736 	0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
737 	0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
738 	0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
739 };
740 
kite_mf_getmatches_hc3(struct kite_matchfinder * mf,u16 depth,u16 bestlen)741 int kite_mf_getmatches_hc3(struct kite_matchfinder *mf, u16 depth, u16 bestlen)
742 {
743 	const u8 *cur = mf->buffer + mf->offset;
744 	const u8 *qbase = mf->buffer - mf->base;
745 	u32 curMatch;
746 	unsigned int v, hv, i, k, p, wsiz;
747 
748 	if (mf->end - cur < bestlen + 1)
749 		return -1;
750 
751 	v = get_unaligned((u16 *)cur);
752 	hv = v ^ crc_ccitt_table[cur[2]];
753 	curMatch = mf->hash[hv];
754 	p = mf->base + mf->offset;
755 	mf->hash[hv] = p;
756 	mf->chain[mf->cyclic_pos] = curMatch;
757 	wsiz = mf->wsiz;
758 	k = 1;
759 
760 	if (depth) {
761 		unsigned int wpos = wsiz + mf->cyclic_pos;
762 
763 		hv = min_t(unsigned int, mf->nice_len, mf->end - cur);
764 		DBG_BUGON(hv > kMatchMaxLen32);
765 		do {
766 			unsigned int diff = p - curMatch;
767 			const u8 *q;
768 
769 			if (diff >= wsiz)
770 				break;
771 
772 			q = qbase + curMatch;
773 			curMatch = mf->chain[(wpos - diff) & (wsiz - 1)];
774 			if (v == get_unaligned((u16 *)q) && (bestlen < 3 || (
775 			    get_unaligned((u16 *)(cur + bestlen - 1)) ==
776 			    get_unaligned((u16 *)(q + bestlen - 1)) &&
777 			    !memcmp(cur + 3, q + 3, bestlen - 3)))) {
778 				DBG_BUGON(cur[2] != q[2]);
779 				i = erofs_memcmp2(cur + bestlen + 1,
780 					q + bestlen + 1, hv - bestlen - 1);
781 				bestlen += 1 + i;
782 
783 				k -= (k >= ARRAY_SIZE(mf->matches_matrix[0]));
784 				mf->matches[k++] = (struct kite_match) {
785 					.len = bestlen,
786 					.dist = diff,
787 				};
788 				if (bestlen >= hv)
789 					break;
790 			}
791 		} while (--depth);
792 	}
793 	mf->offset++;
794 	mf->cyclic_pos = (mf->cyclic_pos + 1) & (wsiz - 1);
795 	return k - 1;
796 }
797 
kite_mf_hc3_skip(struct kite_matchfinder * mf)798 static void kite_mf_hc3_skip(struct kite_matchfinder *mf)
799 {
800 	if (kite_mf_getmatches_hc3(mf, 0, 2) >= 0)
801 		return;
802 	mf->offset++;
803 	/* mf->cyclic_pos = (mf->cyclic_pos + 1) & (mf->wsiz - 1); */
804 }
805 
806 /* let's align with zlib */
807 static const struct kite_matchfinder_cfg {
808 	u16  good_length;	/* reduce lazy search above this match length */
809 	u16  max_lazy;	/* do not perform lazy search above this match length */
810 	u16  nice_length;	/* quit search above this match length */
811 	u16  depth;
812 	bool lazy_search;
813 } kite_mfcfg[10] = {
814 /*      good lazy nice depth */
815 /* 0 */ {0,    0,  0,    0, false},	/* store only [unsupported] */
816 /* 1 */ {4,    4,  8,    4, false},	/* maximum speed, no lazy matches */
817 /* 2 */ {4,    5, 16,    8, false},
818 /* 3 */ {4,    6, 32,   32, false},
819 
820 /* 4 */ {4,    4,  16,   16, true},	/* lazy matches */
821 /* 5 */ {8,   16,  32,   32, true},
822 /* 6 */ {8,   16, 128,  128, true},
823 /* 7 */ {8,   32, 128,  256, true},
824 /* 8 */ {32, 128, 258, 1024, true},
825 /* 9 */ {32, 258, 258, 4096, true},	/* maximum compression */
826 };
827 
kite_mf_init(struct kite_matchfinder * mf,unsigned int wsiz,int level)828 static int kite_mf_init(struct kite_matchfinder *mf, unsigned int wsiz,
829 			int level)
830 {
831 	const struct kite_matchfinder_cfg *cfg;
832 
833 	if (!level || level >= ARRAY_SIZE(kite_mfcfg))
834 		return -EINVAL;
835 	cfg = &kite_mfcfg[level];
836 
837 	if (wsiz > kHistorySize32 || (1 << ilog2(wsiz)) != wsiz)
838 		return -EINVAL;
839 
840 	mf->hash = calloc(0x10000, sizeof(mf->hash[0]));
841 	if (!mf->hash)
842 		return -ENOMEM;
843 
844 	mf->chain = malloc(sizeof(mf->chain[0]) * wsiz);
845 	if (!mf->chain) {
846 		free(mf->hash);
847 		mf->hash = NULL;
848 		return -ENOMEM;
849 	}
850 	mf->wsiz = wsiz;
851 
852 	mf->good_len = cfg->good_length;
853 	mf->nice_len = cfg->nice_length;
854 	mf->depth = cfg->depth;
855 	mf->max_lazy = cfg->max_lazy;
856 	return cfg->lazy_search;
857 }
858 
kite_mf_reset(struct kite_matchfinder * mf,const void * buffer,const void * end)859 static void kite_mf_reset(struct kite_matchfinder *mf,
860 			  const void *buffer, const void *end)
861 {
862 	mf->buffer = buffer;
863 	mf->end = end;
864 
865 	/*
866 	 * Set the initial value as max_distance + 1.  This would avoid hash
867 	 * zero initialization.
868 	 */
869 	mf->base += mf->offset + kHistorySize32 + 1;
870 
871 	/*
872 	 * Unlike other LZ encoders like liblzma [1], we simply reset the hash
873 	 * chain instead of normalization.  This avoids extra complexity, as we
874 	 * don't consider extreme large input buffers in one go.
875 	 *
876 	 * [1] https://github.com/tukaani-project/xz/blob/v5.4.0/src/liblzma/lz/lz_encoder_mf.c#L94
877 	 */
878 	if (__erofs_unlikely(mf->base > ((typeof(mf->base))-1) >> 1)) {
879 		mf->base = kHistorySize32 + 1;
880 		memset(mf->hash, 0, 0x10000 * sizeof(mf->hash[0]));
881 	}
882 	mf->offset = 0;
883 	mf->cyclic_pos = 0;
884 
885 	mf->matches = mf->matches_matrix[0];
886 	mf->matches_matrix[0][0].len =
887 		mf->matches_matrix[1][0].len = kMatchMinLen - 1;
888 }
889 
deflate_count_code(struct kite_deflate * s,bool literal,unsigned int lenSlot,unsigned int distSlot)890 static bool deflate_count_code(struct kite_deflate *s, bool literal,
891 			       unsigned int lenSlot, unsigned int distSlot)
892 {
893 	struct kite_deflate_table *t = s->tab;
894 	unsigned int lenbase = (literal ? 0 : kSymbolMatch);
895 	u64 rem = (s->outlen - s->pos_out) * 8 - s->bitpos;
896 	bool recalc = false;
897 	unsigned int bits;
898 
899 	s->freq_changed = true;
900 	++s->mainFreqs[lenbase + lenSlot];
901 	if (!literal)
902 		++s->distFreqs[distSlot];
903 
904 	if (s->encode_mode == 1) {
905 		if (literal) {
906 			bits = kstaticHuff_litLenLevels[lenSlot];
907 			goto out;
908 		}
909 		bits = kstaticHuff_litLenLevels[kSymbolMatch + lenSlot] +
910 			kLenExtraBits32[lenSlot] + 5 + kDistExtraBits[distSlot];
911 		goto out;
912 	}
913 
914 	/* XXX: more ideas to be done later */
915 	recalc |= (!literal && !t->distLevels[distSlot]);
916 	recalc |= !t->litLenLevels[lenbase + lenSlot];
917 	if (recalc) {
918 		kite_dbg("recalc %c lS %u dS %u", literal ? 'l' : 'm',
919 			 lenSlot, distSlot);
920 		s->tab = s->tables + (s->tab == s->tables);
921 		kite_deflate_fixdynblock(s);
922 		bits = 0;
923 		goto out;
924 	}
925 
926 	if (literal) {
927 		bits = t->litLenLevels[lenSlot];
928 		goto out;
929 	}
930 
931 	bits = t->distLevels[distSlot] + kDistExtraBits[distSlot] +
932 		t->litLenLevels[kSymbolMatch + lenSlot] +
933 		kLenExtraBits32[lenSlot];
934 out:
935 	if (rem < s->costbits + bits) {
936 		--s->mainFreqs[lenbase + lenSlot];
937 		if (!literal)
938 			--s->distFreqs[distSlot];
939 		if (recalc)
940 			s->tab = s->tables + (s->tab == s->tables);
941 		return false;
942 	}
943 	s->costbits += bits;
944 	return true;
945 }
946 
kite_deflate_tally(struct kite_deflate * s,struct kite_match * match)947 static bool kite_deflate_tally(struct kite_deflate *s,
948 			       struct kite_match *match)
949 {
950 	struct kite_deflate_symbol *sym = s->sym + s->symbols;
951 	u32 fixedcost = ~0;
952 	bool hassp;
953 
954 	*sym = (struct kite_deflate_symbol) {
955 		.len = match->len,
956 		.dist = match->dist,
957 	};
958 
959 retry:
960 	if (sym->len < kMatchMinLen) {
961 		hassp = deflate_count_code(s, true, sym->dist, 0);
962 	} else {
963 		unsigned int lc = sym->len - kMatchMinLen;
964 		unsigned int lenSlot = g_LenSlots[lc];
965 		unsigned int distSlot = deflateDistSlot(sym->dist - 1);
966 
967 		hassp = deflate_count_code(s, false, lenSlot, distSlot);
968 	}
969 
970 	if (!hassp) {
971 		if (s->encode_mode == 1) {
972 			fixedcost = s->costbits;
973 			s->encode_mode = 2;
974 			goto retry;
975 		}
976 		s->lastblock = true;
977 		if (fixedcost <= s->costbits)
978 			s->encode_mode = 1;
979 		return true;
980 	}
981 	++s->symbols;
982 	return false;
983 }
984 
kite_deflate_writestore(struct kite_deflate * s)985 static void kite_deflate_writestore(struct kite_deflate *s)
986 {
987 	bool fb = !s->startpos && !s->bitpos;
988 	unsigned int totalsiz = s->pos_in - s->prev_valid - s->startpos;
989 
990 	do {
991 		unsigned int len = min_t(unsigned int, totalsiz, 65535);
992 
993 		totalsiz -= len;
994 		writebits(s, (fb << 3) | (kStored << 1) |
995 			  (s->lastblock && !totalsiz), 3 + fb);
996 		flushbits(s);
997 		writebits(s, len, 16);
998 		writebits(s, len ^ 0xffff, 16);
999 		flushbits(s);
1000 		memcpy(s->out + s->pos_out, s->in + s->startpos, len);
1001 		s->pos_out += len;
1002 		s->startpos += len;
1003 	} while (totalsiz);
1004 }
1005 
kite_deflate_endblock(struct kite_deflate * s)1006 static void kite_deflate_endblock(struct kite_deflate *s)
1007 {
1008 	if (s->encode_mode == 1) {
1009 		u32 fixedcost = s->costbits;
1010 		unsigned int storelen, storeblocks, storecost;
1011 
1012 		kite_deflate_fixdynblock(s);
1013 		if (fixedcost > s->costbits)
1014 			s->encode_mode = 2;
1015 		else
1016 			s->costbits = fixedcost;
1017 
1018 		storelen = s->pos_in - s->prev_valid - s->startpos;
1019 		storeblocks = max(DIV_ROUND_UP(storelen, 65535), 1U);
1020 		storecost = (8 - s->bitpos) + storeblocks - 1 +
1021 			storeblocks * 32 + storelen * 8;
1022 		if (s->costbits > storecost) {
1023 			s->costbits = storecost;
1024 			s->encode_mode = 0;
1025 		}
1026 	}
1027 
1028 	s->lastblock |= (s->costbits + s->bitpos >=
1029 			(s->outlen - s->pos_out) * 8);
1030 }
1031 
kite_deflate_startblock(struct kite_deflate * s)1032 static void kite_deflate_startblock(struct kite_deflate *s)
1033 {
1034 	memset(s->mainFreqs, 0, sizeof(s->mainFreqs));
1035 	memset(s->distFreqs, 0, sizeof(s->distFreqs));
1036 	memset(s->tables, 0, sizeof(s->tables[0]));
1037 	s->symbols = 0;
1038 	s->mainFreqs[kSymbolEndOfBlock]++;
1039 	s->encode_mode = 1;
1040 	s->tab = s->tables;
1041 	s->costbits = 3 + kstaticHuff_litLenLevels[kSymbolEndOfBlock];
1042 }
1043 
kite_deflate_commitblock(struct kite_deflate * s)1044 static bool kite_deflate_commitblock(struct kite_deflate *s)
1045 {
1046 	if (s->encode_mode == 1) {
1047 		kite_deflate_setfixedtrees(s);
1048 		kite_deflate_writeblock(s, true);
1049 	} else if (s->encode_mode == 2) {
1050 		kite_deflate_sendtrees(s);
1051 		kite_deflate_writeblock(s, false);
1052 	} else {
1053 		kite_deflate_writestore(s);
1054 	}
1055 	s->startpos = s->pos_in - s->prev_valid;
1056 	return s->lastblock;
1057 }
1058 
kite_deflate_fast(struct kite_deflate * s)1059 static bool kite_deflate_fast(struct kite_deflate *s)
1060 {
1061 	struct kite_matchfinder *mf = s->mf;
1062 
1063 	kite_deflate_startblock(s);
1064 	while (1) {
1065 		int matches = kite_mf_getmatches_hc3(mf, mf->depth,
1066 				kMatchMinLen - 1);
1067 
1068 		if (matches > 0) {
1069 			unsigned int len = mf->matches[matches].len;
1070 			unsigned int dist = mf->matches[matches].dist;
1071 
1072 			if (len == kMatchMinLen && dist > ZLIB_DISTANCE_TOO_FAR)
1073 				goto nomatch;
1074 
1075 			kite_dbg("%u matches found: longest [%u,%u] of distance %u",
1076 				 matches, s->pos_in, s->pos_in + len - 1, dist);
1077 
1078 			if (kite_deflate_tally(s, mf->matches + matches))
1079 				break;
1080 			s->pos_in += len;
1081 			/* skip the rest bytes */
1082 			while (--len)
1083 				kite_mf_hc3_skip(mf);
1084 		} else {
1085 nomatch:
1086 			mf->matches[0].dist = s->in[s->pos_in];
1087 			if (isprint(s->in[s->pos_in]))
1088 				kite_dbg("literal %c pos_in %u", s->in[s->pos_in], s->pos_in);
1089 			else
1090 				kite_dbg("literal %x pos_in %u", s->in[s->pos_in], s->pos_in);
1091 
1092 			if (kite_deflate_tally(s, mf->matches))
1093 				break;
1094 			++s->pos_in;
1095 		}
1096 
1097 		s->lastblock |= (s->pos_in >= s->inlen);
1098 		if (s->pos_in >= s->inlen || s->symbols >= s->max_symbols) {
1099 			kite_deflate_endblock(s);
1100 			break;
1101 		}
1102 	}
1103 	return kite_deflate_commitblock(s);
1104 }
1105 
kite_deflate_slow(struct kite_deflate * s)1106 static bool kite_deflate_slow(struct kite_deflate *s)
1107 {
1108 	struct kite_matchfinder *mf = s->mf;
1109 	bool flush = false;
1110 
1111 	kite_deflate_startblock(s);
1112 	while (1) {
1113 		struct kite_match *prev_matches = mf->matches;
1114 		unsigned int len = kMatchMinLen - 1;
1115 		int matches;
1116 		unsigned int len0;
1117 
1118 		mf->matches = mf->matches_matrix[
1119 				mf->matches == mf->matches_matrix[0]];
1120 		mf->matches[0].dist = s->in[s->pos_in];
1121 
1122 		len0 = prev_matches[s->prev_longest].len;
1123 		if (len0 < mf->max_lazy) {
1124 			matches = kite_mf_getmatches_hc3(mf, mf->depth >>
1125 				(len0 >= mf->good_len), len0);
1126 			if (matches > 0) {
1127 				len = mf->matches[matches].len;
1128 				if (len == kMatchMinLen &&
1129 				    mf->matches[matches].dist > ZLIB_DISTANCE_TOO_FAR) {
1130 					matches = 0;
1131 					len = kMatchMinLen - 1;
1132 				}
1133 			} else {
1134 				matches = 0;
1135 			}
1136 		} else {
1137 			matches = 0;
1138 			kite_mf_hc3_skip(mf);
1139 		}
1140 
1141 		if (len < len0) {
1142 			if (kite_deflate_tally(s,
1143 					prev_matches + s->prev_longest))
1144 				break;
1145 
1146 			s->pos_in += --len0;
1147 			/* skip the rest bytes */
1148 			while (--len0)
1149 				kite_mf_hc3_skip(mf);
1150 			s->prev_valid = false;
1151 			s->prev_longest = 0;
1152 		} else {
1153 			if (!s->prev_valid)
1154 				s->prev_valid = true;
1155 			else if (kite_deflate_tally(s, prev_matches))
1156 				break;
1157 			++s->pos_in;
1158 			s->prev_longest = matches;
1159 		}
1160 
1161 		s->lastblock |= (s->pos_in >= s->inlen);
1162 		if (s->pos_in >= s->inlen) {
1163 			flush = true;
1164 			break;
1165 		}
1166 		if (s->symbols >= s->max_symbols) {
1167 			kite_deflate_endblock(s);
1168 			break;
1169 		}
1170 	}
1171 
1172 	if (flush && s->prev_valid) {
1173 		(void)kite_deflate_tally(s, mf->matches + s->prev_longest);
1174 		s->prev_valid = false;
1175 	}
1176 	return kite_deflate_commitblock(s);
1177 }
1178 
kite_deflate_end(struct kite_deflate * s)1179 void kite_deflate_end(struct kite_deflate *s)
1180 {
1181 	if (s->mf) {
1182 		if (s->mf->hash)
1183 			free(s->mf->hash);
1184 		if (s->mf->chain)
1185 			free(s->mf->chain);
1186 		free(s->mf);
1187 	}
1188 	if (s->sym)
1189 		free(s->sym);
1190 	free(s);
1191 }
1192 
kite_deflate_init(int level,unsigned int dict_size)1193 struct kite_deflate *kite_deflate_init(int level, unsigned int dict_size)
1194 {
1195 	struct kite_deflate *s;
1196 	int err;
1197 
1198 	kite_deflate_init_once();
1199 	s = calloc(1, sizeof(*s));
1200 	if (!s)
1201 		return ERR_PTR(-ENOMEM);
1202 
1203 	s->max_symbols = 16384;
1204 	s->sym = malloc(sizeof(s->sym[0]) * s->max_symbols);
1205 	if (!s->sym) {
1206 		err = -ENOMEM;
1207 		goto err_out;
1208 	}
1209 
1210 	s->mf = malloc(sizeof(*s->mf));
1211 	if (!s->mf) {
1212 		err = -ENOMEM;
1213 		goto err_out;
1214 	}
1215 
1216 	if (!dict_size)
1217 		dict_size = kHistorySize32;
1218 
1219 	err = kite_mf_init(s->mf, dict_size, level);
1220 	if (err < 0)
1221 		goto err_out;
1222 
1223 	s->lazy_search = err;
1224 	return s;
1225 err_out:
1226 	if (s->mf)
1227 		free(s->mf);
1228 	if (s->sym)
1229 		free(s->sym);
1230 	free(s);
1231 	return ERR_PTR(err);
1232 }
1233 
kite_deflate_destsize(struct kite_deflate * s,const u8 * in,u8 * out,unsigned int * srcsize,unsigned int target_dstsize)1234 int kite_deflate_destsize(struct kite_deflate *s, const u8 *in, u8 *out,
1235 			   unsigned int *srcsize, unsigned int target_dstsize)
1236 {
1237 	memset(s, 0, offsetof(struct kite_deflate, mainFreqs));
1238 	s->in = in;
1239 	s->inlen = *srcsize;
1240 	s->out = out;
1241 	s->outlen = target_dstsize;
1242 	kite_mf_reset(s->mf, in, in + s->inlen);
1243 
1244 	if (s->lazy_search)
1245 		while (!kite_deflate_slow(s));
1246 	else
1247 		while (!kite_deflate_fast(s));
1248 	flushbits(s);
1249 
1250 	*srcsize = s->startpos;
1251 	return s->pos_out;
1252 }
1253 
1254 #if TEST
1255 #include <unistd.h>
1256 #include <fcntl.h>
1257 #include <sys/mman.h>
1258 
main(int argc,char * argv[])1259 int main(int argc, char *argv[])
1260 {
1261 	int fd;
1262 	u64 filelength;
1263 	u8 out[1048576], *buf;
1264 	int dstsize = 4096;
1265 	unsigned int srcsize, outsize;
1266 	struct kite_deflate *s;
1267 
1268 	fd = open(argv[1], O_RDONLY);
1269 	if (fd < 0)
1270 		return -errno;
1271 	if (argc > 2)
1272 		dstsize = atoi(argv[2]);
1273 	filelength = lseek(fd, 0, SEEK_END);
1274 
1275 	s = kite_deflate_init(9, 0);
1276 	if (IS_ERR(s))
1277 		return PTR_ERR(s);
1278 
1279 	filelength = lseek(fd, 0, SEEK_END);
1280 	buf = mmap(NULL, filelength, PROT_READ, MAP_SHARED, fd, 0);
1281 	if (buf == MAP_FAILED)
1282 		return -errno;
1283 	close(fd);
1284 
1285 	srcsize = filelength;
1286 	outsize = kite_deflate_destsize(s, buf, out, &srcsize, dstsize);
1287 	fd = open("out.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644);
1288 	write(fd, out, outsize);
1289 	close(fd);
1290 	kite_deflate_end(s);
1291 	return 0;
1292 }
1293 #endif
1294