1 /* Copyright 2015 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 /* Function for fast encoding of an input fragment, independently from the input
8 history. This function uses one-pass processing: when we find a backward
9 match, we immediately emit the corresponding command and literal codes to
10 the bit stream.
11
12 Adapted from the CompressFragment() function in
13 https://github.com/google/snappy/blob/master/snappy.cc */
14
15 #include "compress_fragment.h"
16
17 #include <string.h> /* memcmp, memcpy, memset */
18
19 #include "../common/platform.h"
20 #include <brotli/types.h>
21 #include "brotli_bit_stream.h"
22 #include "entropy_encode.h"
23 #include "fast_log.h"
24 #include "find_match_length.h"
25 #include "write_bits.h"
26
27 #if defined(__cplusplus) || defined(c_plusplus)
28 extern "C" {
29 #endif
30
31 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
32
33 /* kHashMul32 multiplier has these properties:
34 * The multiplier must be odd. Otherwise we may lose the highest bit.
35 * No long streaks of ones or zeros.
36 * There is no effort to ensure that it is a prime, the oddity is enough
37 for this use.
38 * The number has been tuned heuristically against compression benchmarks. */
39 static const uint32_t kHashMul32 = 0x1E35A7BD;
40
Hash(const uint8_t * p,size_t shift)41 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
42 const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
43 return (uint32_t)(h >> shift);
44 }
45
HashBytesAtOffset(uint64_t v,int offset,size_t shift)46 static BROTLI_INLINE uint32_t HashBytesAtOffset(
47 uint64_t v, int offset, size_t shift) {
48 BROTLI_DCHECK(offset >= 0);
49 BROTLI_DCHECK(offset <= 3);
50 {
51 const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
52 return (uint32_t)(h >> shift);
53 }
54 }
55
IsMatch(const uint8_t * p1,const uint8_t * p2)56 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
57 return TO_BROTLI_BOOL(
58 BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
59 p1[4] == p2[4]);
60 }
61
62 /* Builds a literal prefix code into "depths" and "bits" based on the statistics
63 of the "input" string and stores it into the bit stream.
64 Note that the prefix code here is built from the pre-LZ77 input, therefore
65 we can only approximate the statistics of the actual literal stream.
66 Moreover, for long inputs we build a histogram from a sample of the input
67 and thus have to assign a non-zero depth for each literal.
68 Returns estimated compression ratio millibytes/char for encoding given input
69 with generated code. */
BuildAndStoreLiteralPrefixCode(BrotliOnePassArena * s,const uint8_t * input,const size_t input_size,uint8_t depths[256],uint16_t bits[256],size_t * storage_ix,uint8_t * storage)70 static size_t BuildAndStoreLiteralPrefixCode(BrotliOnePassArena* s,
71 const uint8_t* input,
72 const size_t input_size,
73 uint8_t depths[256],
74 uint16_t bits[256],
75 size_t* storage_ix,
76 uint8_t* storage) {
77 uint32_t* BROTLI_RESTRICT const histogram = s->histogram;
78 size_t histogram_total;
79 size_t i;
80 memset(histogram, 0, sizeof(s->histogram));
81
82 if (input_size < (1 << 15)) {
83 for (i = 0; i < input_size; ++i) {
84 ++histogram[input[i]];
85 }
86 histogram_total = input_size;
87 for (i = 0; i < 256; ++i) {
88 /* We weigh the first 11 samples with weight 3 to account for the
89 balancing effect of the LZ77 phase on the histogram. */
90 const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
91 histogram[i] += adjust;
92 histogram_total += adjust;
93 }
94 } else {
95 static const size_t kSampleRate = 29;
96 for (i = 0; i < input_size; i += kSampleRate) {
97 ++histogram[input[i]];
98 }
99 histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
100 for (i = 0; i < 256; ++i) {
101 /* We add 1 to each population count to avoid 0 bit depths (since this is
102 only a sample and we don't know if the symbol appears or not), and we
103 weigh the first 11 samples with weight 3 to account for the balancing
104 effect of the LZ77 phase on the histogram (more frequent symbols are
105 more likely to be in backward references instead as literals). */
106 const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
107 histogram[i] += adjust;
108 histogram_total += adjust;
109 }
110 }
111 BrotliBuildAndStoreHuffmanTreeFast(s->tree, histogram, histogram_total,
112 /* max_bits = */ 8,
113 depths, bits, storage_ix, storage);
114 {
115 size_t literal_ratio = 0;
116 for (i = 0; i < 256; ++i) {
117 if (histogram[i]) literal_ratio += histogram[i] * depths[i];
118 }
119 /* Estimated encoding ratio, millibytes per symbol. */
120 return (literal_ratio * 125) / histogram_total;
121 }
122 }
123
124 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
125 "bits" based on "histogram" and stores it into the bit stream. */
BuildAndStoreCommandPrefixCode(BrotliOnePassArena * s,size_t * storage_ix,uint8_t * storage)126 static void BuildAndStoreCommandPrefixCode(BrotliOnePassArena* s,
127 size_t* storage_ix, uint8_t* storage) {
128 const uint32_t* const histogram = s->cmd_histo;
129 uint8_t* const depth = s->cmd_depth;
130 uint16_t* const bits = s->cmd_bits;
131 uint8_t* BROTLI_RESTRICT const tmp_depth = s->tmp_depth;
132 uint16_t* BROTLI_RESTRICT const tmp_bits = s->tmp_bits;
133 /* TODO(eustas): do only once on initialization. */
134 memset(tmp_depth, 0, BROTLI_NUM_COMMAND_SYMBOLS);
135
136 BrotliCreateHuffmanTree(histogram, 64, 15, s->tree, depth);
137 BrotliCreateHuffmanTree(&histogram[64], 64, 14, s->tree, &depth[64]);
138 /* We have to jump through a few hoops here in order to compute
139 the command bits because the symbols are in a different order than in
140 the full alphabet. This looks complicated, but having the symbols
141 in this order in the command bits saves a few branches in the Emit*
142 functions. */
143 memcpy(tmp_depth, depth, 24);
144 memcpy(tmp_depth + 24, depth + 40, 8);
145 memcpy(tmp_depth + 32, depth + 24, 8);
146 memcpy(tmp_depth + 40, depth + 48, 8);
147 memcpy(tmp_depth + 48, depth + 32, 8);
148 memcpy(tmp_depth + 56, depth + 56, 8);
149 BrotliConvertBitDepthsToSymbols(tmp_depth, 64, tmp_bits);
150 memcpy(bits, tmp_bits, 48);
151 memcpy(bits + 24, tmp_bits + 32, 16);
152 memcpy(bits + 32, tmp_bits + 48, 16);
153 memcpy(bits + 40, tmp_bits + 24, 16);
154 memcpy(bits + 48, tmp_bits + 40, 16);
155 memcpy(bits + 56, tmp_bits + 56, 16);
156 BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
157 {
158 /* Create the bit length array for the full command alphabet. */
159 size_t i;
160 memset(tmp_depth, 0, 64); /* only 64 first values were used */
161 memcpy(tmp_depth, depth, 8);
162 memcpy(tmp_depth + 64, depth + 8, 8);
163 memcpy(tmp_depth + 128, depth + 16, 8);
164 memcpy(tmp_depth + 192, depth + 24, 8);
165 memcpy(tmp_depth + 384, depth + 32, 8);
166 for (i = 0; i < 8; ++i) {
167 tmp_depth[128 + 8 * i] = depth[40 + i];
168 tmp_depth[256 + 8 * i] = depth[48 + i];
169 tmp_depth[448 + 8 * i] = depth[56 + i];
170 }
171 /* TODO(eustas): could/should full-length machinery be avoided? */
172 BrotliStoreHuffmanTree(
173 tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS, s->tree, storage_ix, storage);
174 }
175 BrotliStoreHuffmanTree(&depth[64], 64, s->tree, storage_ix, storage);
176 }
177
178 /* REQUIRES: insertlen < 6210 */
EmitInsertLen(size_t insertlen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)179 static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
180 const uint8_t depth[128],
181 const uint16_t bits[128],
182 uint32_t histo[128],
183 size_t* storage_ix,
184 uint8_t* storage) {
185 if (insertlen < 6) {
186 const size_t code = insertlen + 40;
187 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
188 ++histo[code];
189 } else if (insertlen < 130) {
190 const size_t tail = insertlen - 2;
191 const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
192 const size_t prefix = tail >> nbits;
193 const size_t inscode = (nbits << 1) + prefix + 42;
194 BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
195 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
196 ++histo[inscode];
197 } else if (insertlen < 2114) {
198 const size_t tail = insertlen - 66;
199 const uint32_t nbits = Log2FloorNonZero(tail);
200 const size_t code = nbits + 50;
201 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
202 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
203 ++histo[code];
204 } else {
205 BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
206 BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
207 ++histo[61];
208 }
209 }
210
EmitLongInsertLen(size_t insertlen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)211 static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
212 const uint8_t depth[128],
213 const uint16_t bits[128],
214 uint32_t histo[128],
215 size_t* storage_ix,
216 uint8_t* storage) {
217 if (insertlen < 22594) {
218 BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
219 BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
220 ++histo[62];
221 } else {
222 BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
223 BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
224 ++histo[63];
225 }
226 }
227
EmitCopyLen(size_t copylen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)228 static BROTLI_INLINE void EmitCopyLen(size_t copylen,
229 const uint8_t depth[128],
230 const uint16_t bits[128],
231 uint32_t histo[128],
232 size_t* storage_ix,
233 uint8_t* storage) {
234 if (copylen < 10) {
235 BrotliWriteBits(
236 depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
237 ++histo[copylen + 14];
238 } else if (copylen < 134) {
239 const size_t tail = copylen - 6;
240 const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
241 const size_t prefix = tail >> nbits;
242 const size_t code = (nbits << 1) + prefix + 20;
243 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
244 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
245 ++histo[code];
246 } else if (copylen < 2118) {
247 const size_t tail = copylen - 70;
248 const uint32_t nbits = Log2FloorNonZero(tail);
249 const size_t code = nbits + 28;
250 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
251 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
252 ++histo[code];
253 } else {
254 BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
255 BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
256 ++histo[39];
257 }
258 }
259
EmitCopyLenLastDistance(size_t copylen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)260 static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
261 const uint8_t depth[128],
262 const uint16_t bits[128],
263 uint32_t histo[128],
264 size_t* storage_ix,
265 uint8_t* storage) {
266 if (copylen < 12) {
267 BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
268 ++histo[copylen - 4];
269 } else if (copylen < 72) {
270 const size_t tail = copylen - 8;
271 const uint32_t nbits = Log2FloorNonZero(tail) - 1;
272 const size_t prefix = tail >> nbits;
273 const size_t code = (nbits << 1) + prefix + 4;
274 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
275 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
276 ++histo[code];
277 } else if (copylen < 136) {
278 const size_t tail = copylen - 8;
279 const size_t code = (tail >> 5) + 30;
280 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
281 BrotliWriteBits(5, tail & 31, storage_ix, storage);
282 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
283 ++histo[code];
284 ++histo[64];
285 } else if (copylen < 2120) {
286 const size_t tail = copylen - 72;
287 const uint32_t nbits = Log2FloorNonZero(tail);
288 const size_t code = nbits + 28;
289 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
290 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
291 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
292 ++histo[code];
293 ++histo[64];
294 } else {
295 BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
296 BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
297 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
298 ++histo[39];
299 ++histo[64];
300 }
301 }
302
EmitDistance(size_t distance,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)303 static BROTLI_INLINE void EmitDistance(size_t distance,
304 const uint8_t depth[128],
305 const uint16_t bits[128],
306 uint32_t histo[128],
307 size_t* storage_ix, uint8_t* storage) {
308 const size_t d = distance + 3;
309 const uint32_t nbits = Log2FloorNonZero(d) - 1u;
310 const size_t prefix = (d >> nbits) & 1;
311 const size_t offset = (2 + prefix) << nbits;
312 const size_t distcode = 2 * (nbits - 1) + prefix + 80;
313 BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
314 BrotliWriteBits(nbits, d - offset, storage_ix, storage);
315 ++histo[distcode];
316 }
317
EmitLiterals(const uint8_t * input,const size_t len,const uint8_t depth[256],const uint16_t bits[256],size_t * storage_ix,uint8_t * storage)318 static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
319 const uint8_t depth[256],
320 const uint16_t bits[256],
321 size_t* storage_ix, uint8_t* storage) {
322 size_t j;
323 for (j = 0; j < len; j++) {
324 const uint8_t lit = input[j];
325 BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
326 }
327 }
328
329 /* REQUIRES: len <= 1 << 24. */
BrotliStoreMetaBlockHeader(size_t len,BROTLI_BOOL is_uncompressed,size_t * storage_ix,uint8_t * storage)330 static void BrotliStoreMetaBlockHeader(
331 size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
332 uint8_t* storage) {
333 size_t nibbles = 6;
334 /* ISLAST */
335 BrotliWriteBits(1, 0, storage_ix, storage);
336 if (len <= (1U << 16)) {
337 nibbles = 4;
338 } else if (len <= (1U << 20)) {
339 nibbles = 5;
340 }
341 BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
342 BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
343 /* ISUNCOMPRESSED */
344 BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
345 }
346
UpdateBits(size_t n_bits,uint32_t bits,size_t pos,uint8_t * array)347 static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
348 uint8_t* array) {
349 while (n_bits > 0) {
350 size_t byte_pos = pos >> 3;
351 size_t n_unchanged_bits = pos & 7;
352 size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
353 size_t total_bits = n_unchanged_bits + n_changed_bits;
354 uint32_t mask =
355 (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
356 uint32_t unchanged_bits = array[byte_pos] & mask;
357 uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
358 array[byte_pos] =
359 (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
360 n_bits -= n_changed_bits;
361 bits >>= n_changed_bits;
362 pos += n_changed_bits;
363 }
364 }
365
RewindBitPosition(const size_t new_storage_ix,size_t * storage_ix,uint8_t * storage)366 static void RewindBitPosition(const size_t new_storage_ix,
367 size_t* storage_ix, uint8_t* storage) {
368 const size_t bitpos = new_storage_ix & 7;
369 const size_t mask = (1u << bitpos) - 1;
370 storage[new_storage_ix >> 3] &= (uint8_t)mask;
371 *storage_ix = new_storage_ix;
372 }
373
ShouldMergeBlock(BrotliOnePassArena * s,const uint8_t * data,size_t len,const uint8_t * depths)374 static BROTLI_BOOL ShouldMergeBlock(BrotliOnePassArena* s,
375 const uint8_t* data, size_t len, const uint8_t* depths) {
376 uint32_t* BROTLI_RESTRICT const histo = s->histogram;
377 static const size_t kSampleRate = 43;
378 size_t i;
379 memset(histo, 0, sizeof(s->histogram));
380 for (i = 0; i < len; i += kSampleRate) {
381 ++histo[data[i]];
382 }
383 {
384 const size_t total = (len + kSampleRate - 1) / kSampleRate;
385 double r = (FastLog2(total) + 0.5) * (double)total + 200;
386 for (i = 0; i < 256; ++i) {
387 r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
388 }
389 return TO_BROTLI_BOOL(r >= 0.0);
390 }
391 }
392
393 /* Acceptable loss for uncompressible speedup is 2% */
394 #define MIN_RATIO 980
395
ShouldUseUncompressedMode(const uint8_t * metablock_start,const uint8_t * next_emit,const size_t insertlen,const size_t literal_ratio)396 static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
397 const uint8_t* metablock_start, const uint8_t* next_emit,
398 const size_t insertlen, const size_t literal_ratio) {
399 const size_t compressed = (size_t)(next_emit - metablock_start);
400 if (compressed * 50 > insertlen) {
401 return BROTLI_FALSE;
402 } else {
403 return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
404 }
405 }
406
EmitUncompressedMetaBlock(const uint8_t * begin,const uint8_t * end,const size_t storage_ix_start,size_t * storage_ix,uint8_t * storage)407 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
408 const size_t storage_ix_start,
409 size_t* storage_ix, uint8_t* storage) {
410 const size_t len = (size_t)(end - begin);
411 RewindBitPosition(storage_ix_start, storage_ix, storage);
412 BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
413 *storage_ix = (*storage_ix + 7u) & ~7u;
414 memcpy(&storage[*storage_ix >> 3], begin, len);
415 *storage_ix += len << 3;
416 storage[*storage_ix >> 3] = 0;
417 }
418
419 static uint32_t kCmdHistoSeed[128] = {
420 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
421 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
422 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
423 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
424 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
425 1, 1, 1, 1, 0, 0, 0, 0,
426 };
427
BrotliCompressFragmentFastImpl(BrotliOnePassArena * s,const uint8_t * input,size_t input_size,BROTLI_BOOL is_last,int * table,size_t table_bits,size_t * storage_ix,uint8_t * storage)428 static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
429 BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
430 BROTLI_BOOL is_last, int* table, size_t table_bits,
431 size_t* storage_ix, uint8_t* storage) {
432 uint8_t* BROTLI_RESTRICT const cmd_depth = s->cmd_depth;
433 uint16_t* BROTLI_RESTRICT const cmd_bits = s->cmd_bits;
434 uint32_t* BROTLI_RESTRICT const cmd_histo = s->cmd_histo;
435 uint8_t* BROTLI_RESTRICT const lit_depth = s->lit_depth;
436 uint16_t* BROTLI_RESTRICT const lit_bits = s->lit_bits;
437 const uint8_t* ip_end;
438
439 /* "next_emit" is a pointer to the first byte that is not covered by a
440 previous copy. Bytes between "next_emit" and the start of the next copy or
441 the end of the input will be emitted as literal bytes. */
442 const uint8_t* next_emit = input;
443 /* Save the start of the first block for position and distance computations.
444 */
445 const uint8_t* base_ip = input;
446
447 static const size_t kFirstBlockSize = 3 << 15;
448 static const size_t kMergeBlockSize = 1 << 16;
449
450 const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
451 const size_t kMinMatchLen = 5;
452
453 const uint8_t* metablock_start = input;
454 size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
455 size_t total_block_size = block_size;
456 /* Save the bit position of the MLEN field of the meta-block header, so that
457 we can update it later if we decide to extend this meta-block. */
458 size_t mlen_storage_ix = *storage_ix + 3;
459
460 size_t literal_ratio;
461
462 const uint8_t* ip;
463 int last_distance;
464
465 const size_t shift = 64u - table_bits;
466
467 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
468 /* No block splits, no contexts. */
469 BrotliWriteBits(13, 0, storage_ix, storage);
470
471 literal_ratio = BuildAndStoreLiteralPrefixCode(
472 s, input, block_size, s->lit_depth, s->lit_bits, storage_ix, storage);
473
474 {
475 /* Store the pre-compressed command and distance prefix codes. */
476 size_t i;
477 for (i = 0; i + 7 < s->cmd_code_numbits; i += 8) {
478 BrotliWriteBits(8, s->cmd_code[i >> 3], storage_ix, storage);
479 }
480 }
481 BrotliWriteBits(s->cmd_code_numbits & 7,
482 s->cmd_code[s->cmd_code_numbits >> 3], storage_ix, storage);
483
484 emit_commands:
485 /* Initialize the command and distance histograms. We will gather
486 statistics of command and distance codes during the processing
487 of this block and use it to update the command and distance
488 prefix codes for the next block. */
489 memcpy(s->cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
490
491 /* "ip" is the input pointer. */
492 ip = input;
493 last_distance = -1;
494 ip_end = input + block_size;
495
496 if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
497 /* For the last block, we need to keep a 16 bytes margin so that we can be
498 sure that all distances are at most window size - 16.
499 For all other blocks, we only need to keep a margin of 5 bytes so that
500 we don't go over the block size with a copy. */
501 const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
502 input_size - kInputMarginBytes);
503 const uint8_t* ip_limit = input + len_limit;
504
505 uint32_t next_hash;
506 for (next_hash = Hash(++ip, shift); ; ) {
507 /* Step 1: Scan forward in the input looking for a 5-byte-long match.
508 If we get close to exhausting the input then goto emit_remainder.
509
510 Heuristic match skipping: If 32 bytes are scanned with no matches
511 found, start looking only at every other byte. If 32 more bytes are
512 scanned, look at every third byte, etc.. When a match is found,
513 immediately go back to looking at every byte. This is a small loss
514 (~5% performance, ~0.1% density) for compressible data due to more
515 bookkeeping, but for non-compressible data (such as JPEG) it's a huge
516 win since the compressor quickly "realizes" the data is incompressible
517 and doesn't bother looking for matches everywhere.
518
519 The "skip" variable keeps track of how many bytes there are since the
520 last match; dividing it by 32 (i.e. right-shifting by five) gives the
521 number of bytes to move ahead for each iteration. */
522 uint32_t skip = 32;
523
524 const uint8_t* next_ip = ip;
525 const uint8_t* candidate;
526 BROTLI_DCHECK(next_emit < ip);
527 trawl:
528 do {
529 uint32_t hash = next_hash;
530 uint32_t bytes_between_hash_lookups = skip++ >> 5;
531 BROTLI_DCHECK(hash == Hash(next_ip, shift));
532 ip = next_ip;
533 next_ip = ip + bytes_between_hash_lookups;
534 if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
535 goto emit_remainder;
536 }
537 next_hash = Hash(next_ip, shift);
538 candidate = ip - last_distance;
539 if (IsMatch(ip, candidate)) {
540 if (BROTLI_PREDICT_TRUE(candidate < ip)) {
541 table[hash] = (int)(ip - base_ip);
542 break;
543 }
544 }
545 candidate = base_ip + table[hash];
546 BROTLI_DCHECK(candidate >= base_ip);
547 BROTLI_DCHECK(candidate < ip);
548
549 table[hash] = (int)(ip - base_ip);
550 } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
551
552 /* Check copy distance. If candidate is not feasible, continue search.
553 Checking is done outside of hot loop to reduce overhead. */
554 if (ip - candidate > MAX_DISTANCE) goto trawl;
555
556 /* Step 2: Emit the found match together with the literal bytes from
557 "next_emit" to the bit stream, and then see if we can find a next match
558 immediately afterwards. Repeat until we find no match for the input
559 without emitting some literal bytes. */
560
561 {
562 /* We have a 5-byte match at ip, and we need to emit bytes in
563 [next_emit, ip). */
564 const uint8_t* base = ip;
565 size_t matched = 5 + FindMatchLengthWithLimit(
566 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
567 int distance = (int)(base - candidate); /* > 0 */
568 size_t insert = (size_t)(base - next_emit);
569 ip += matched;
570 BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
571 (int)(next_emit - base_ip), (unsigned long)insert, 2));
572 BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
573 if (BROTLI_PREDICT_TRUE(insert < 6210)) {
574 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
575 storage_ix, storage);
576 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
577 literal_ratio)) {
578 EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
579 storage_ix, storage);
580 input_size -= (size_t)(base - input);
581 input = base;
582 next_emit = input;
583 goto next_block;
584 } else {
585 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
586 storage_ix, storage);
587 }
588 EmitLiterals(next_emit, insert, lit_depth, lit_bits,
589 storage_ix, storage);
590 if (distance == last_distance) {
591 BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
592 ++cmd_histo[64];
593 } else {
594 EmitDistance((size_t)distance, cmd_depth, cmd_bits,
595 cmd_histo, storage_ix, storage);
596 last_distance = distance;
597 }
598 EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
599 storage_ix, storage);
600 BROTLI_LOG(("[CompressFragment] pos = %d distance = %d\n"
601 "[CompressFragment] pos = %d insert = %d copy = %d\n"
602 "[CompressFragment] pos = %d distance = %d\n",
603 (int)(base - base_ip), (int)distance,
604 (int)(base - base_ip) + 2, 0, (int)matched - 2,
605 (int)(base - base_ip) + 2, (int)distance));
606
607 next_emit = ip;
608 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
609 goto emit_remainder;
610 }
611 /* We could immediately start working at ip now, but to improve
612 compression we first update "table" with the hashes of some positions
613 within the last copy. */
614 {
615 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
616 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
617 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
618 table[prev_hash] = (int)(ip - base_ip - 3);
619 prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
620 table[prev_hash] = (int)(ip - base_ip - 2);
621 prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
622 table[prev_hash] = (int)(ip - base_ip - 1);
623
624 candidate = base_ip + table[cur_hash];
625 table[cur_hash] = (int)(ip - base_ip);
626 }
627 }
628
629 while (IsMatch(ip, candidate)) {
630 /* We have a 5-byte match at ip, and no need to emit any literal bytes
631 prior to ip. */
632 const uint8_t* base = ip;
633 size_t matched = 5 + FindMatchLengthWithLimit(
634 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
635 if (ip - candidate > MAX_DISTANCE) break;
636 ip += matched;
637 last_distance = (int)(base - candidate); /* > 0 */
638 BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
639 EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
640 storage_ix, storage);
641 EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
642 cmd_histo, storage_ix, storage);
643 BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n"
644 "[CompressFragment] pos = %d distance = %d\n",
645 (int)(base - base_ip), 0, (int)matched,
646 (int)(base - base_ip), (int)last_distance));
647
648 next_emit = ip;
649 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
650 goto emit_remainder;
651 }
652 /* We could immediately start working at ip now, but to improve
653 compression we first update "table" with the hashes of some positions
654 within the last copy. */
655 {
656 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
657 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
658 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
659 table[prev_hash] = (int)(ip - base_ip - 3);
660 prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
661 table[prev_hash] = (int)(ip - base_ip - 2);
662 prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
663 table[prev_hash] = (int)(ip - base_ip - 1);
664
665 candidate = base_ip + table[cur_hash];
666 table[cur_hash] = (int)(ip - base_ip);
667 }
668 }
669
670 next_hash = Hash(++ip, shift);
671 }
672 }
673
674 emit_remainder:
675 BROTLI_DCHECK(next_emit <= ip_end);
676 input += block_size;
677 input_size -= block_size;
678 block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
679
680 /* Decide if we want to continue this meta-block instead of emitting the
681 last insert-only command. */
682 if (input_size > 0 &&
683 total_block_size + block_size <= (1 << 20) &&
684 ShouldMergeBlock(s, input, block_size, lit_depth)) {
685 BROTLI_DCHECK(total_block_size > (1 << 16));
686 /* Update the size of the current meta-block and continue emitting commands.
687 We can do this because the current size and the new size both have 5
688 nibbles. */
689 total_block_size += block_size;
690 UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
691 goto emit_commands;
692 }
693
694 /* Emit the remaining bytes as literals. */
695 if (next_emit < ip_end) {
696 const size_t insert = (size_t)(ip_end - next_emit);
697 BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
698 (int)(next_emit - base_ip), (unsigned long)insert, 2));
699 if (BROTLI_PREDICT_TRUE(insert < 6210)) {
700 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
701 storage_ix, storage);
702 EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
703 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
704 literal_ratio)) {
705 EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
706 storage_ix, storage);
707 } else {
708 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
709 storage_ix, storage);
710 EmitLiterals(next_emit, insert, lit_depth, lit_bits,
711 storage_ix, storage);
712 }
713 }
714 next_emit = ip_end;
715
716 next_block:
717 /* If we have more data, write a new meta-block header and prefix codes and
718 then continue emitting commands. */
719 if (input_size > 0) {
720 metablock_start = input;
721 block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
722 total_block_size = block_size;
723 /* Save the bit position of the MLEN field of the meta-block header, so that
724 we can update it later if we decide to extend this meta-block. */
725 mlen_storage_ix = *storage_ix + 3;
726 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
727 /* No block splits, no contexts. */
728 BrotliWriteBits(13, 0, storage_ix, storage);
729 literal_ratio = BuildAndStoreLiteralPrefixCode(
730 s, input, block_size, lit_depth, lit_bits, storage_ix, storage);
731 BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
732 goto emit_commands;
733 }
734
735 if (!is_last) {
736 /* If this is not the last block, update the command and distance prefix
737 codes for the next block and store the compressed forms. */
738 s->cmd_code[0] = 0;
739 s->cmd_code_numbits = 0;
740 BuildAndStoreCommandPrefixCode(s, &s->cmd_code_numbits, s->cmd_code);
741 }
742 }
743
744 #define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
745
746 #define BAKE_METHOD_PARAM_(B) \
747 static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B( \
748 BrotliOnePassArena* s, const uint8_t* input, size_t input_size, \
749 BROTLI_BOOL is_last, int* table, size_t* storage_ix, uint8_t* storage) { \
750 BrotliCompressFragmentFastImpl(s, input, input_size, is_last, table, B, \
751 storage_ix, storage); \
752 }
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)753 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
754 #undef BAKE_METHOD_PARAM_
755
756 void BrotliCompressFragmentFast(
757 BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
758 BROTLI_BOOL is_last, int* table, size_t table_size,
759 size_t* storage_ix, uint8_t* storage) {
760 const size_t initial_storage_ix = *storage_ix;
761 const size_t table_bits = Log2FloorNonZero(table_size);
762
763 if (input_size == 0) {
764 BROTLI_DCHECK(is_last);
765 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
766 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
767 *storage_ix = (*storage_ix + 7u) & ~7u;
768 return;
769 }
770
771 switch (table_bits) {
772 #define CASE_(B) \
773 case B: \
774 BrotliCompressFragmentFastImpl ## B( \
775 s, input, input_size, is_last, table, storage_ix, storage);\
776 break;
777 FOR_TABLE_BITS_(CASE_)
778 #undef CASE_
779 default: BROTLI_DCHECK(0); break;
780 }
781
782 /* If output is larger than single uncompressed block, rewrite it. */
783 if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
784 EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
785 storage_ix, storage);
786 }
787
788 if (is_last) {
789 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
790 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
791 *storage_ix = (*storage_ix + 7u) & ~7u;
792 }
793 }
794
795 #undef FOR_TABLE_BITS_
796
797 #if defined(__cplusplus) || defined(c_plusplus)
798 } /* extern "C" */
799 #endif
800