1 /* Copyright 2013 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 /* Implementation of Brotli compressor. */
8
9 #include <brotli/encode.h>
10
11 #include <stdlib.h> /* free, malloc */
12 #include <string.h> /* memcpy, memset */
13
14 #include "../common/constants.h"
15 #include "../common/context.h"
16 #include "../common/platform.h"
17 #include "../common/version.h"
18 #include "backward_references.h"
19 #include "backward_references_hq.h"
20 #include "bit_cost.h"
21 #include "brotli_bit_stream.h"
22 #include "compress_fragment.h"
23 #include "compress_fragment_two_pass.h"
24 #include "dictionary_hash.h"
25 #include "encoder_dict.h"
26 #include "entropy_encode.h"
27 #include "fast_log.h"
28 #include "hash.h"
29 #include "histogram.h"
30 #include "memory.h"
31 #include "metablock.h"
32 #include "prefix.h"
33 #include "quality.h"
34 #include "ringbuffer.h"
35 #include "utf8_util.h"
36 #include "write_bits.h"
37
38 #if defined(__cplusplus) || defined(c_plusplus)
39 extern "C" {
40 #endif
41
42 #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
43
44 typedef enum BrotliEncoderStreamState {
45 /* Default state. */
46 BROTLI_STREAM_PROCESSING = 0,
47 /* Intermediate state; after next block is emitted, byte-padding should be
48 performed before getting back to default state. */
49 BROTLI_STREAM_FLUSH_REQUESTED = 1,
50 /* Last metablock was produced; no more input is acceptable. */
51 BROTLI_STREAM_FINISHED = 2,
52 /* Flushing compressed block and writing meta-data block header. */
53 BROTLI_STREAM_METADATA_HEAD = 3,
54 /* Writing metadata block body. */
55 BROTLI_STREAM_METADATA_BODY = 4
56 } BrotliEncoderStreamState;
57
58 typedef enum BrotliEncoderFlintState {
59 BROTLI_FLINT_NEEDS_2_BYTES = 2,
60 BROTLI_FLINT_NEEDS_1_BYTE = 1,
61 BROTLI_FLINT_WAITING_FOR_PROCESSING = 0,
62 BROTLI_FLINT_WAITING_FOR_FLUSHING = -1,
63 BROTLI_FLINT_DONE = -2
64 } BrotliEncoderFlintState;
65
66 typedef struct BrotliEncoderStateStruct {
67 BrotliEncoderParams params;
68
69 MemoryManager memory_manager_;
70
71 uint64_t input_pos_;
72 RingBuffer ringbuffer_;
73 size_t cmd_alloc_size_;
74 Command* commands_;
75 size_t num_commands_;
76 size_t num_literals_;
77 size_t last_insert_len_;
78 uint64_t last_flush_pos_;
79 uint64_t last_processed_pos_;
80 int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
81 int saved_dist_cache_[4];
82 uint16_t last_bytes_;
83 uint8_t last_bytes_bits_;
84 /* "Flint" is a tiny uncompressed block emitted before the continuation
85 block to unwire literal context from previous data. Despite being int8_t,
86 field is actually BrotliEncoderFlintState enum. */
87 int8_t flint_;
88 uint8_t prev_byte_;
89 uint8_t prev_byte2_;
90 size_t storage_size_;
91 uint8_t* storage_;
92
93 Hasher hasher_;
94
95 /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
96 int small_table_[1 << 10]; /* 4KiB */
97 int* large_table_; /* Allocated only when needed */
98 size_t large_table_size_;
99
100 BrotliOnePassArena* one_pass_arena_;
101 BrotliTwoPassArena* two_pass_arena_;
102
103 /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */
104 uint32_t* command_buf_;
105 uint8_t* literal_buf_;
106
107 uint8_t* next_out_;
108 size_t available_out_;
109 size_t total_out_;
110 /* Temporary buffer for padding flush bits or metadata block header / body. */
111 union {
112 uint64_t u64[2];
113 uint8_t u8[16];
114 } tiny_buf_;
115 uint32_t remaining_metadata_bytes_;
116 BrotliEncoderStreamState stream_state_;
117
118 BROTLI_BOOL is_last_block_emitted_;
119 BROTLI_BOOL is_initialized_;
120 } BrotliEncoderStateStruct;
121
InputBlockSize(BrotliEncoderState * s)122 static size_t InputBlockSize(BrotliEncoderState* s) {
123 return (size_t)1 << s->params.lgblock;
124 }
125
UnprocessedInputSize(BrotliEncoderState * s)126 static uint64_t UnprocessedInputSize(BrotliEncoderState* s) {
127 return s->input_pos_ - s->last_processed_pos_;
128 }
129
RemainingInputBlockSize(BrotliEncoderState * s)130 static size_t RemainingInputBlockSize(BrotliEncoderState* s) {
131 const uint64_t delta = UnprocessedInputSize(s);
132 size_t block_size = InputBlockSize(s);
133 if (delta >= block_size) return 0;
134 return block_size - (size_t)delta;
135 }
136
BrotliEncoderSetParameter(BrotliEncoderState * state,BrotliEncoderParameter p,uint32_t value)137 BROTLI_BOOL BrotliEncoderSetParameter(
138 BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) {
139 /* Changing parameters on the fly is not implemented yet. */
140 if (state->is_initialized_) return BROTLI_FALSE;
141 /* TODO(eustas): Validate/clamp parameters here. */
142 switch (p) {
143 case BROTLI_PARAM_MODE:
144 state->params.mode = (BrotliEncoderMode)value;
145 return BROTLI_TRUE;
146
147 case BROTLI_PARAM_QUALITY:
148 state->params.quality = (int)value;
149 return BROTLI_TRUE;
150
151 case BROTLI_PARAM_LGWIN:
152 state->params.lgwin = (int)value;
153 return BROTLI_TRUE;
154
155 case BROTLI_PARAM_LGBLOCK:
156 state->params.lgblock = (int)value;
157 return BROTLI_TRUE;
158
159 case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING:
160 if ((value != 0) && (value != 1)) return BROTLI_FALSE;
161 state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value);
162 return BROTLI_TRUE;
163
164 case BROTLI_PARAM_SIZE_HINT:
165 state->params.size_hint = value;
166 return BROTLI_TRUE;
167
168 case BROTLI_PARAM_LARGE_WINDOW:
169 state->params.large_window = TO_BROTLI_BOOL(!!value);
170 return BROTLI_TRUE;
171
172 case BROTLI_PARAM_NPOSTFIX:
173 state->params.dist.distance_postfix_bits = value;
174 return BROTLI_TRUE;
175
176 case BROTLI_PARAM_NDIRECT:
177 state->params.dist.num_direct_distance_codes = value;
178 return BROTLI_TRUE;
179
180 case BROTLI_PARAM_STREAM_OFFSET:
181 if (value > (1u << 30)) return BROTLI_FALSE;
182 state->params.stream_offset = value;
183 return BROTLI_TRUE;
184
185 default: return BROTLI_FALSE;
186 }
187 }
188
189 /* Wraps 64-bit input position to 32-bit ring-buffer position preserving
190 "not-a-first-lap" feature. */
WrapPosition(uint64_t position)191 static uint32_t WrapPosition(uint64_t position) {
192 uint32_t result = (uint32_t)position;
193 uint64_t gb = position >> 30;
194 if (gb > 2) {
195 /* Wrap every 2GiB; The first 3GB are continuous. */
196 result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30;
197 }
198 return result;
199 }
200
GetBrotliStorage(BrotliEncoderState * s,size_t size)201 static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {
202 MemoryManager* m = &s->memory_manager_;
203 if (s->storage_size_ < size) {
204 BROTLI_FREE(m, s->storage_);
205 s->storage_ = BROTLI_ALLOC(m, uint8_t, size);
206 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->storage_)) return NULL;
207 s->storage_size_ = size;
208 }
209 return s->storage_;
210 }
211
HashTableSize(size_t max_table_size,size_t input_size)212 static size_t HashTableSize(size_t max_table_size, size_t input_size) {
213 size_t htsize = 256;
214 while (htsize < max_table_size && htsize < input_size) {
215 htsize <<= 1;
216 }
217 return htsize;
218 }
219
GetHashTable(BrotliEncoderState * s,int quality,size_t input_size,size_t * table_size)220 static int* GetHashTable(BrotliEncoderState* s, int quality,
221 size_t input_size, size_t* table_size) {
222 /* Use smaller hash table when input.size() is smaller, since we
223 fill the table, incurring O(hash table size) overhead for
224 compression, and if the input is short, we won't need that
225 many hash table entries anyway. */
226 MemoryManager* m = &s->memory_manager_;
227 const size_t max_table_size = MaxHashTableSize(quality);
228 size_t htsize = HashTableSize(max_table_size, input_size);
229 int* table;
230 BROTLI_DCHECK(max_table_size >= 256);
231 if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
232 /* Only odd shifts are supported by fast-one-pass. */
233 if ((htsize & 0xAAAAA) == 0) {
234 htsize <<= 1;
235 }
236 }
237
238 if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) {
239 table = s->small_table_;
240 } else {
241 if (htsize > s->large_table_size_) {
242 s->large_table_size_ = htsize;
243 BROTLI_FREE(m, s->large_table_);
244 s->large_table_ = BROTLI_ALLOC(m, int, htsize);
245 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->large_table_)) return 0;
246 }
247 table = s->large_table_;
248 }
249
250 *table_size = htsize;
251 memset(table, 0, htsize * sizeof(*table));
252 return table;
253 }
254
EncodeWindowBits(int lgwin,BROTLI_BOOL large_window,uint16_t * last_bytes,uint8_t * last_bytes_bits)255 static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,
256 uint16_t* last_bytes, uint8_t* last_bytes_bits) {
257 if (large_window) {
258 *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);
259 *last_bytes_bits = 14;
260 } else {
261 if (lgwin == 16) {
262 *last_bytes = 0;
263 *last_bytes_bits = 1;
264 } else if (lgwin == 17) {
265 *last_bytes = 1;
266 *last_bytes_bits = 7;
267 } else if (lgwin > 17) {
268 *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);
269 *last_bytes_bits = 4;
270 } else {
271 *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);
272 *last_bytes_bits = 7;
273 }
274 }
275 }
276
277 /* TODO(eustas): move to compress_fragment.c? */
278 /* Initializes the command and distance prefix codes for the first block. */
InitCommandPrefixCodes(BrotliOnePassArena * s)279 static void InitCommandPrefixCodes(BrotliOnePassArena* s) {
280 static const uint8_t kDefaultCommandDepths[128] = {
281 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
282 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
283 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,
284 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
285 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
286 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
287 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,
288 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
289 };
290 static const uint16_t kDefaultCommandBits[128] = {
291 0, 0, 8, 9, 3, 35, 7, 71,
292 39, 103, 23, 47, 175, 111, 239, 31,
293 0, 0, 0, 4, 12, 2, 10, 6,
294 13, 29, 11, 43, 27, 59, 87, 55,
295 15, 79, 319, 831, 191, 703, 447, 959,
296 0, 14, 1, 25, 5, 21, 19, 51,
297 119, 159, 95, 223, 479, 991, 63, 575,
298 127, 639, 383, 895, 255, 767, 511, 1023,
299 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
300 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,
301 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
302 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
303 };
304 static const uint8_t kDefaultCommandCode[] = {
305 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
306 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
307 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,
308 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
309 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
310 };
311 static const size_t kDefaultCommandCodeNumBits = 448;
312 COPY_ARRAY(s->cmd_depth, kDefaultCommandDepths);
313 COPY_ARRAY(s->cmd_bits, kDefaultCommandBits);
314
315 /* Initialize the pre-compressed form of the command and distance prefix
316 codes. */
317 COPY_ARRAY(s->cmd_code, kDefaultCommandCode);
318 s->cmd_code_numbits = kDefaultCommandCodeNumBits;
319 }
320
321 /* Decide about the context map based on the ability of the prediction
322 ability of the previous byte UTF8-prefix on the next byte. The
323 prediction ability is calculated as Shannon entropy. Here we need
324 Shannon entropy instead of 'BitsEntropy' since the prefix will be
325 encoded with the remaining 6 bits of the following byte, and
326 BitsEntropy will assume that symbol to be stored alone using Huffman
327 coding. */
ChooseContextMap(int quality,uint32_t * bigram_histo,size_t * num_literal_contexts,const uint32_t ** literal_context_map)328 static void ChooseContextMap(int quality,
329 uint32_t* bigram_histo,
330 size_t* num_literal_contexts,
331 const uint32_t** literal_context_map) {
332 static const uint32_t kStaticContextMapContinuation[64] = {
333 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
334 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
337 };
338 static const uint32_t kStaticContextMapSimpleUTF8[64] = {
339 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
342 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
343 };
344
345 uint32_t monogram_histo[3] = { 0 };
346 uint32_t two_prefix_histo[6] = { 0 };
347 size_t total;
348 size_t i;
349 size_t dummy;
350 double entropy[4];
351 for (i = 0; i < 9; ++i) {
352 monogram_histo[i % 3] += bigram_histo[i];
353 two_prefix_histo[i % 6] += bigram_histo[i];
354 }
355 entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy);
356 entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
357 ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
358 entropy[3] = 0;
359 for (i = 0; i < 3; ++i) {
360 entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy);
361 }
362
363 total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2];
364 BROTLI_DCHECK(total != 0);
365 entropy[0] = 1.0 / (double)total;
366 entropy[1] *= entropy[0];
367 entropy[2] *= entropy[0];
368 entropy[3] *= entropy[0];
369
370 if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) {
371 /* 3 context models is a bit slower, don't use it at lower qualities. */
372 entropy[3] = entropy[1] * 10;
373 }
374 /* If expected savings by symbol are less than 0.2 bits, skip the
375 context modeling -- in exchange for faster decoding speed. */
376 if (entropy[1] - entropy[2] < 0.2 &&
377 entropy[1] - entropy[3] < 0.2) {
378 *num_literal_contexts = 1;
379 } else if (entropy[2] - entropy[3] < 0.02) {
380 *num_literal_contexts = 2;
381 *literal_context_map = kStaticContextMapSimpleUTF8;
382 } else {
383 *num_literal_contexts = 3;
384 *literal_context_map = kStaticContextMapContinuation;
385 }
386 }
387
388 /* Decide if we want to use a more complex static context map containing 13
389 context values, based on the entropy reduction of histograms over the
390 first 5 bits of literals. */
ShouldUseComplexStaticContextMap(const uint8_t * input,size_t start_pos,size_t length,size_t mask,int quality,size_t size_hint,size_t * num_literal_contexts,const uint32_t ** literal_context_map,uint32_t * arena)391 static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
392 size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
393 size_t* num_literal_contexts, const uint32_t** literal_context_map,
394 uint32_t* arena) {
395 static const uint32_t kStaticContextMapComplexUTF8[64] = {
396 11, 11, 12, 12, /* 0 special */
397 0, 0, 0, 0, /* 4 lf */
398 1, 1, 9, 9, /* 8 space */
399 2, 2, 2, 2, /* !, first after space/lf and after something else. */
400 1, 1, 1, 1, /* " */
401 8, 3, 3, 3, /* % */
402 1, 1, 1, 1, /* ({[ */
403 2, 2, 2, 2, /* }]) */
404 8, 4, 4, 4, /* :; */
405 8, 7, 4, 4, /* . */
406 8, 0, 0, 0, /* > */
407 3, 3, 3, 3, /* [0..9] */
408 5, 5, 10, 5, /* [A-Z] */
409 5, 5, 10, 5,
410 6, 6, 6, 6, /* [a-z] */
411 6, 6, 6, 6,
412 };
413 BROTLI_UNUSED(quality);
414 /* Try the more complex static context map only for long data. */
415 if (size_hint < (1 << 20)) {
416 return BROTLI_FALSE;
417 } else {
418 const size_t end_pos = start_pos + length;
419 /* To make entropy calculations faster, we collect histograms
420 over the 5 most significant bits of literals. One histogram
421 without context and 13 additional histograms for each context value. */
422 uint32_t* BROTLI_RESTRICT const combined_histo = arena;
423 uint32_t* BROTLI_RESTRICT const context_histo = arena + 32;
424 uint32_t total = 0;
425 double entropy[3];
426 size_t dummy;
427 size_t i;
428 ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
429 memset(arena, 0, sizeof(arena[0]) * 32 * 14);
430 for (; start_pos + 64 <= end_pos; start_pos += 4096) {
431 const size_t stride_end_pos = start_pos + 64;
432 uint8_t prev2 = input[start_pos & mask];
433 uint8_t prev1 = input[(start_pos + 1) & mask];
434 size_t pos;
435 /* To make the analysis of the data faster we only examine 64 byte long
436 strides at every 4kB intervals. */
437 for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {
438 const uint8_t literal = input[pos & mask];
439 const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[
440 BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
441 ++total;
442 ++combined_histo[literal >> 3];
443 ++context_histo[(context << 5) + (literal >> 3)];
444 prev2 = prev1;
445 prev1 = literal;
446 }
447 }
448 entropy[1] = ShannonEntropy(combined_histo, 32, &dummy);
449 entropy[2] = 0;
450 for (i = 0; i < 13; ++i) {
451 entropy[2] += ShannonEntropy(context_histo + (i << 5), 32, &dummy);
452 }
453 entropy[0] = 1.0 / (double)total;
454 entropy[1] *= entropy[0];
455 entropy[2] *= entropy[0];
456 /* The triggering heuristics below were tuned by compressing the individual
457 files of the silesia corpus. If we skip this kind of context modeling
458 for not very well compressible input (i.e. entropy using context modeling
459 is 60% of maximal entropy) or if expected savings by symbol are less
460 than 0.2 bits, then in every case when it triggers, the final compression
461 ratio is improved. Note however that this heuristics might be too strict
462 for some cases and could be tuned further. */
463 if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) {
464 return BROTLI_FALSE;
465 } else {
466 *num_literal_contexts = 13;
467 *literal_context_map = kStaticContextMapComplexUTF8;
468 return BROTLI_TRUE;
469 }
470 }
471 }
472
DecideOverLiteralContextModeling(const uint8_t * input,size_t start_pos,size_t length,size_t mask,int quality,size_t size_hint,size_t * num_literal_contexts,const uint32_t ** literal_context_map,uint32_t * arena)473 static void DecideOverLiteralContextModeling(const uint8_t* input,
474 size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
475 size_t* num_literal_contexts, const uint32_t** literal_context_map,
476 uint32_t* arena) {
477 if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) {
478 return;
479 } else if (ShouldUseComplexStaticContextMap(
480 input, start_pos, length, mask, quality, size_hint,
481 num_literal_contexts, literal_context_map, arena)) {
482 /* Context map was already set, nothing else to do. */
483 } else {
484 /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of
485 UTF8 data faster we only examine 64 byte long strides at every 4kB
486 intervals. */
487 const size_t end_pos = start_pos + length;
488 uint32_t* BROTLI_RESTRICT const bigram_prefix_histo = arena;
489 memset(bigram_prefix_histo, 0, sizeof(arena[0]) * 9);
490 for (; start_pos + 64 <= end_pos; start_pos += 4096) {
491 static const int lut[4] = { 0, 0, 1, 2 };
492 const size_t stride_end_pos = start_pos + 64;
493 int prev = lut[input[start_pos & mask] >> 6] * 3;
494 size_t pos;
495 for (pos = start_pos + 1; pos < stride_end_pos; ++pos) {
496 const uint8_t literal = input[pos & mask];
497 ++bigram_prefix_histo[prev + lut[literal >> 6]];
498 prev = lut[literal >> 6] * 3;
499 }
500 }
501 ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
502 literal_context_map);
503 }
504 }
505
ShouldCompress(const uint8_t * data,const size_t mask,const uint64_t last_flush_pos,const size_t bytes,const size_t num_literals,const size_t num_commands)506 static BROTLI_BOOL ShouldCompress(
507 const uint8_t* data, const size_t mask, const uint64_t last_flush_pos,
508 const size_t bytes, const size_t num_literals, const size_t num_commands) {
509 /* TODO(eustas): find more precise minimal block overhead. */
510 if (bytes <= 2) return BROTLI_FALSE;
511 if (num_commands < (bytes >> 8) + 2) {
512 if ((double)num_literals > 0.99 * (double)bytes) {
513 uint32_t literal_histo[256] = { 0 };
514 static const uint32_t kSampleRate = 13;
515 static const double kMinEntropy = 7.92;
516 const double bit_cost_threshold =
517 (double)bytes * kMinEntropy / kSampleRate;
518 size_t t = (bytes + kSampleRate - 1) / kSampleRate;
519 uint32_t pos = (uint32_t)last_flush_pos;
520 size_t i;
521 for (i = 0; i < t; i++) {
522 ++literal_histo[data[pos & mask]];
523 pos += kSampleRate;
524 }
525 if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
526 return BROTLI_FALSE;
527 }
528 }
529 }
530 return BROTLI_TRUE;
531 }
532
533 /* Chooses the literal context mode for a metablock */
ChooseContextMode(const BrotliEncoderParams * params,const uint8_t * data,const size_t pos,const size_t mask,const size_t length)534 static ContextType ChooseContextMode(const BrotliEncoderParams* params,
535 const uint8_t* data, const size_t pos, const size_t mask,
536 const size_t length) {
537 /* We only do the computation for the option of something else than
538 CONTEXT_UTF8 for the highest qualities */
539 if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&
540 !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {
541 return CONTEXT_SIGNED;
542 }
543 return CONTEXT_UTF8;
544 }
545
WriteMetaBlockInternal(MemoryManager * m,const uint8_t * data,const size_t mask,const uint64_t last_flush_pos,const size_t bytes,const BROTLI_BOOL is_last,ContextType literal_context_mode,const BrotliEncoderParams * params,const uint8_t prev_byte,const uint8_t prev_byte2,const size_t num_literals,const size_t num_commands,Command * commands,const int * saved_dist_cache,int * dist_cache,size_t * storage_ix,uint8_t * storage)546 static void WriteMetaBlockInternal(MemoryManager* m,
547 const uint8_t* data,
548 const size_t mask,
549 const uint64_t last_flush_pos,
550 const size_t bytes,
551 const BROTLI_BOOL is_last,
552 ContextType literal_context_mode,
553 const BrotliEncoderParams* params,
554 const uint8_t prev_byte,
555 const uint8_t prev_byte2,
556 const size_t num_literals,
557 const size_t num_commands,
558 Command* commands,
559 const int* saved_dist_cache,
560 int* dist_cache,
561 size_t* storage_ix,
562 uint8_t* storage) {
563 const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);
564 uint16_t last_bytes;
565 uint8_t last_bytes_bits;
566 ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
567 BrotliEncoderParams block_params = *params;
568
569 if (bytes == 0) {
570 /* Write the ISLAST and ISEMPTY bits. */
571 BrotliWriteBits(2, 3, storage_ix, storage);
572 *storage_ix = (*storage_ix + 7u) & ~7u;
573 return;
574 }
575
576 if (!ShouldCompress(data, mask, last_flush_pos, bytes,
577 num_literals, num_commands)) {
578 /* Restore the distance cache, as its last update by
579 CreateBackwardReferences is now unused. */
580 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
581 BrotliStoreUncompressedMetaBlock(is_last, data,
582 wrapped_last_flush_pos, mask, bytes,
583 storage_ix, storage);
584 return;
585 }
586
587 BROTLI_DCHECK(*storage_ix <= 14);
588 last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
589 last_bytes_bits = (uint8_t)(*storage_ix);
590 if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
591 BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
592 bytes, mask, is_last, params,
593 commands, num_commands,
594 storage_ix, storage);
595 if (BROTLI_IS_OOM(m)) return;
596 } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
597 BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,
598 bytes, mask, is_last, params,
599 commands, num_commands,
600 storage_ix, storage);
601 if (BROTLI_IS_OOM(m)) return;
602 } else {
603 MetaBlockSplit mb;
604 InitMetaBlockSplit(&mb);
605 if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
606 size_t num_literal_contexts = 1;
607 const uint32_t* literal_context_map = NULL;
608 if (!params->disable_literal_context_modeling) {
609 /* TODO(eustas): pull to higher level and reuse. */
610 uint32_t* arena = BROTLI_ALLOC(m, uint32_t, 14 * 32);
611 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return;
612 DecideOverLiteralContextModeling(
613 data, wrapped_last_flush_pos, bytes, mask, params->quality,
614 params->size_hint, &num_literal_contexts,
615 &literal_context_map, arena);
616 BROTLI_FREE(m, arena);
617 }
618 BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
619 prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
620 literal_context_map, commands, num_commands, &mb);
621 if (BROTLI_IS_OOM(m)) return;
622 } else {
623 BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, &block_params,
624 prev_byte, prev_byte2,
625 commands, num_commands,
626 literal_context_mode,
627 &mb);
628 if (BROTLI_IS_OOM(m)) return;
629 }
630 if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {
631 /* The number of distance symbols effectively used for distance
632 histograms. It might be less than distance alphabet size
633 for "Large Window Brotli" (32-bit). */
634 BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
635 }
636 BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
637 prev_byte, prev_byte2,
638 is_last,
639 &block_params,
640 literal_context_mode,
641 commands, num_commands,
642 &mb,
643 storage_ix, storage);
644 if (BROTLI_IS_OOM(m)) return;
645 DestroyMetaBlockSplit(m, &mb);
646 }
647 if (bytes + 4 < (*storage_ix >> 3)) {
648 /* Restore the distance cache and last byte. */
649 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
650 storage[0] = (uint8_t)last_bytes;
651 storage[1] = (uint8_t)(last_bytes >> 8);
652 *storage_ix = last_bytes_bits;
653 BrotliStoreUncompressedMetaBlock(is_last, data,
654 wrapped_last_flush_pos, mask,
655 bytes, storage_ix, storage);
656 }
657 }
658
ChooseDistanceParams(BrotliEncoderParams * params)659 static void ChooseDistanceParams(BrotliEncoderParams* params) {
660 uint32_t distance_postfix_bits = 0;
661 uint32_t num_direct_distance_codes = 0;
662
663 if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) {
664 uint32_t ndirect_msb;
665 if (params->mode == BROTLI_MODE_FONT) {
666 distance_postfix_bits = 1;
667 num_direct_distance_codes = 12;
668 } else {
669 distance_postfix_bits = params->dist.distance_postfix_bits;
670 num_direct_distance_codes = params->dist.num_direct_distance_codes;
671 }
672 ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F;
673 if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX ||
674 num_direct_distance_codes > BROTLI_MAX_NDIRECT ||
675 (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) {
676 distance_postfix_bits = 0;
677 num_direct_distance_codes = 0;
678 }
679 }
680
681 BrotliInitDistanceParams(¶ms->dist, distance_postfix_bits,
682 num_direct_distance_codes, params->large_window);
683 }
684
EnsureInitialized(BrotliEncoderState * s)685 static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
686 MemoryManager* m = &s->memory_manager_;
687 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
688 if (s->is_initialized_) return BROTLI_TRUE;
689
690 s->last_bytes_bits_ = 0;
691 s->last_bytes_ = 0;
692 s->flint_ = BROTLI_FLINT_DONE;
693 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
694
695 SanitizeParams(&s->params);
696 s->params.lgblock = ComputeLgBlock(&s->params);
697 ChooseDistanceParams(&s->params);
698
699 if (s->params.stream_offset != 0) {
700 s->flint_ = BROTLI_FLINT_NEEDS_2_BYTES;
701 /* Poison the distance cache. -16 +- 3 is still less than zero (invalid). */
702 s->dist_cache_[0] = -16;
703 s->dist_cache_[1] = -16;
704 s->dist_cache_[2] = -16;
705 s->dist_cache_[3] = -16;
706 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
707 }
708
709 RingBufferSetup(&s->params, &s->ringbuffer_);
710
711 /* Initialize last byte with stream header. */
712 {
713 int lgwin = s->params.lgwin;
714 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
715 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
716 lgwin = BROTLI_MAX(int, lgwin, 18);
717 }
718 if (s->params.stream_offset == 0) {
719 EncodeWindowBits(lgwin, s->params.large_window,
720 &s->last_bytes_, &s->last_bytes_bits_);
721 } else {
722 /* Bigger values have the same effect, but could cause overflows. */
723 s->params.stream_offset = BROTLI_MIN(size_t,
724 s->params.stream_offset, BROTLI_MAX_BACKWARD_LIMIT(lgwin));
725 }
726 }
727
728 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
729 s->one_pass_arena_ = BROTLI_ALLOC(m, BrotliOnePassArena, 1);
730 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
731 InitCommandPrefixCodes(s->one_pass_arena_);
732 } else if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
733 s->two_pass_arena_ = BROTLI_ALLOC(m, BrotliTwoPassArena, 1);
734 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
735 }
736
737 s->is_initialized_ = BROTLI_TRUE;
738 return BROTLI_TRUE;
739 }
740
BrotliEncoderInitParams(BrotliEncoderParams * params)741 static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
742 params->mode = BROTLI_DEFAULT_MODE;
743 params->large_window = BROTLI_FALSE;
744 params->quality = BROTLI_DEFAULT_QUALITY;
745 params->lgwin = BROTLI_DEFAULT_WINDOW;
746 params->lgblock = 0;
747 params->stream_offset = 0;
748 params->size_hint = 0;
749 params->disable_literal_context_modeling = BROTLI_FALSE;
750 BrotliInitSharedEncoderDictionary(¶ms->dictionary);
751 params->dist.distance_postfix_bits = 0;
752 params->dist.num_direct_distance_codes = 0;
753 params->dist.alphabet_size_max =
754 BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
755 params->dist.alphabet_size_limit = params->dist.alphabet_size_max;
756 params->dist.max_distance = BROTLI_MAX_DISTANCE;
757 }
758
BrotliEncoderCleanupParams(MemoryManager * m,BrotliEncoderParams * params)759 static void BrotliEncoderCleanupParams(MemoryManager* m,
760 BrotliEncoderParams* params) {
761 BrotliCleanupSharedEncoderDictionary(m, ¶ms->dictionary);
762 }
763
BrotliEncoderInitState(BrotliEncoderState * s)764 static void BrotliEncoderInitState(BrotliEncoderState* s) {
765 BrotliEncoderInitParams(&s->params);
766 s->input_pos_ = 0;
767 s->num_commands_ = 0;
768 s->num_literals_ = 0;
769 s->last_insert_len_ = 0;
770 s->last_flush_pos_ = 0;
771 s->last_processed_pos_ = 0;
772 s->prev_byte_ = 0;
773 s->prev_byte2_ = 0;
774 s->storage_size_ = 0;
775 s->storage_ = 0;
776 HasherInit(&s->hasher_);
777 s->large_table_ = NULL;
778 s->large_table_size_ = 0;
779 s->one_pass_arena_ = NULL;
780 s->two_pass_arena_ = NULL;
781 s->command_buf_ = NULL;
782 s->literal_buf_ = NULL;
783 s->next_out_ = NULL;
784 s->available_out_ = 0;
785 s->total_out_ = 0;
786 s->stream_state_ = BROTLI_STREAM_PROCESSING;
787 s->is_last_block_emitted_ = BROTLI_FALSE;
788 s->is_initialized_ = BROTLI_FALSE;
789
790 RingBufferInit(&s->ringbuffer_);
791
792 s->commands_ = 0;
793 s->cmd_alloc_size_ = 0;
794
795 /* Initialize distance cache. */
796 s->dist_cache_[0] = 4;
797 s->dist_cache_[1] = 11;
798 s->dist_cache_[2] = 15;
799 s->dist_cache_[3] = 16;
800 /* Save the state of the distance cache in case we need to restore it for
801 emitting an uncompressed block. */
802 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
803 }
804
BrotliEncoderCreateInstance(brotli_alloc_func alloc_func,brotli_free_func free_func,void * opaque)805 BrotliEncoderState* BrotliEncoderCreateInstance(
806 brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
807 BrotliEncoderState* state = (BrotliEncoderState*)BrotliBootstrapAlloc(
808 sizeof(BrotliEncoderState), alloc_func, free_func, opaque);
809 if (state == NULL) {
810 /* BROTLI_DUMP(); */
811 return 0;
812 }
813 BrotliInitMemoryManager(
814 &state->memory_manager_, alloc_func, free_func, opaque);
815 BrotliEncoderInitState(state);
816 return state;
817 }
818
BrotliEncoderCleanupState(BrotliEncoderState * s)819 static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
820 MemoryManager* m = &s->memory_manager_;
821 if (BROTLI_IS_OOM(m)) {
822 BrotliWipeOutMemoryManager(m);
823 return;
824 }
825 BROTLI_FREE(m, s->storage_);
826 BROTLI_FREE(m, s->commands_);
827 RingBufferFree(m, &s->ringbuffer_);
828 DestroyHasher(m, &s->hasher_);
829 BROTLI_FREE(m, s->large_table_);
830 BROTLI_FREE(m, s->one_pass_arena_);
831 BROTLI_FREE(m, s->two_pass_arena_);
832 BROTLI_FREE(m, s->command_buf_);
833 BROTLI_FREE(m, s->literal_buf_);
834 BrotliEncoderCleanupParams(m, &s->params);
835 }
836
837 /* Deinitializes and frees BrotliEncoderState instance. */
BrotliEncoderDestroyInstance(BrotliEncoderState * state)838 void BrotliEncoderDestroyInstance(BrotliEncoderState* state) {
839 if (!state) {
840 return;
841 } else {
842 BrotliEncoderCleanupState(state);
843 BrotliBootstrapFree(state, &state->memory_manager_);
844 }
845 }
846
847 /*
848 Copies the given input data to the internal ring buffer of the compressor.
849 No processing of the data occurs at this time and this function can be
850 called multiple times before calling WriteBrotliData() to process the
851 accumulated input. At most input_block_size() bytes of input data can be
852 copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
853 */
CopyInputToRingBuffer(BrotliEncoderState * s,const size_t input_size,const uint8_t * input_buffer)854 static void CopyInputToRingBuffer(BrotliEncoderState* s,
855 const size_t input_size,
856 const uint8_t* input_buffer) {
857 RingBuffer* ringbuffer_ = &s->ringbuffer_;
858 MemoryManager* m = &s->memory_manager_;
859 RingBufferWrite(m, input_buffer, input_size, ringbuffer_);
860 if (BROTLI_IS_OOM(m)) return;
861 s->input_pos_ += input_size;
862
863 /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
864 hashing not depend on uninitialized data. This makes compression
865 deterministic and it prevents uninitialized memory warnings in Valgrind.
866 Even without erasing, the output would be valid (but nondeterministic).
867
868 Background information: The compressor stores short (at most 8 bytes)
869 substrings of the input already read in a hash table, and detects
870 repetitions by looking up such substrings in the hash table. If it
871 can find a substring, it checks whether the substring is really there
872 in the ring buffer (or it's just a hash collision). Should the hash
873 table become corrupt, this check makes sure that the output is
874 still valid, albeit the compression ratio would be bad.
875
876 The compressor populates the hash table from the ring buffer as it's
877 reading new bytes from the input. However, at the last few indexes of
878 the ring buffer, there are not enough bytes to build full-length
879 substrings from. Since the hash table always contains full-length
880 substrings, we erase with dummy zeros here to make sure that those
881 substrings will contain zeros at the end instead of uninitialized
882 data.
883
884 Please note that erasing is not necessary (because the
885 memory region is already initialized since he ring buffer
886 has a `tail' that holds a copy of the beginning,) so we
887 skip erasing if we have already gone around at least once in
888 the ring buffer.
889
890 Only clear during the first round of ring-buffer writes. On
891 subsequent rounds data in the ring-buffer would be affected. */
892 if (ringbuffer_->pos_ <= ringbuffer_->mask_) {
893 /* This is the first time when the ring buffer is being written.
894 We clear 7 bytes just after the bytes that have been copied from
895 the input buffer.
896
897 The ring-buffer has a "tail" that holds a copy of the beginning,
898 but only once the ring buffer has been fully written once, i.e.,
899 pos <= mask. For the first time, we need to write values
900 in this tail (where index may be larger than mask), so that
901 we have exactly defined behavior and don't read uninitialized
902 memory. Due to performance reasons, hashing reads data using a
903 LOAD64, which can go 7 bytes beyond the bytes written in the
904 ring-buffer. */
905 memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7);
906 }
907 }
908
909 /* Marks all input as processed.
910 Returns true if position wrapping occurs. */
UpdateLastProcessedPos(BrotliEncoderState * s)911 static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {
912 uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
913 uint32_t wrapped_input_pos = WrapPosition(s->input_pos_);
914 s->last_processed_pos_ = s->input_pos_;
915 return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);
916 }
917
ExtendLastCommand(BrotliEncoderState * s,uint32_t * bytes,uint32_t * wrapped_last_processed_pos)918 static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
919 uint32_t* wrapped_last_processed_pos) {
920 Command* last_command = &s->commands_[s->num_commands_ - 1];
921 const uint8_t* data = s->ringbuffer_.buffer_;
922 const uint32_t mask = s->ringbuffer_.mask_;
923 uint64_t max_backward_distance =
924 (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP;
925 uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;
926 uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;
927 uint64_t max_distance = last_processed_pos < max_backward_distance ?
928 last_processed_pos : max_backward_distance;
929 uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
930 uint32_t distance_code = CommandRestoreDistanceCode(last_command,
931 &s->params.dist);
932 const CompoundDictionary* dict = &s->params.dictionary.compound;
933 size_t compound_dictionary_size = dict->total_size;
934 if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
935 distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
936 if (cmd_dist <= max_distance) {
937 while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
938 data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {
939 last_command->copy_len_++;
940 (*bytes)--;
941 (*wrapped_last_processed_pos)++;
942 }
943 } else {
944 if ((cmd_dist - max_distance - 1) < compound_dictionary_size &&
945 last_copy_len < cmd_dist - max_distance) {
946 size_t address =
947 compound_dictionary_size - (size_t)(cmd_dist - max_distance) +
948 (size_t)last_copy_len;
949 size_t br_index = 0;
950 size_t br_offset;
951 const uint8_t* chunk;
952 size_t chunk_length;
953 while (address >= dict->chunk_offsets[br_index + 1]) br_index++;
954 br_offset = address - dict->chunk_offsets[br_index];
955 chunk = dict->chunk_source[br_index];
956 chunk_length =
957 dict->chunk_offsets[br_index + 1] - dict->chunk_offsets[br_index];
958 while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
959 chunk[br_offset]) {
960 last_command->copy_len_++;
961 (*bytes)--;
962 (*wrapped_last_processed_pos)++;
963 if (++br_offset == chunk_length) {
964 br_index++;
965 br_offset = 0;
966 if (br_index != dict->num_chunks) {
967 chunk = dict->chunk_source[br_index];
968 chunk_length = dict->chunk_offsets[br_index + 1] -
969 dict->chunk_offsets[br_index];
970 } else {
971 break;
972 }
973 }
974 }
975 }
976 }
977 /* The copy length is at most the metablock size, and thus expressible. */
978 GetLengthCode(last_command->insert_len_,
979 (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +
980 (int)(last_command->copy_len_ >> 25)),
981 TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),
982 &last_command->cmd_prefix_);
983 }
984 }
985
986 /*
987 Processes the accumulated input data and sets |*out_size| to the length of
988 the new output meta-block, or to zero if no new output meta-block has been
989 created (in this case the processed input data is buffered internally).
990 If |*out_size| is positive, |*output| points to the start of the output
991 data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is
992 always created. However, until |is_last| is BROTLI_TRUE encoder may retain up
993 to 7 bits of the last byte of output. To force encoder to dump the remaining
994 bits use WriteMetadata() to append an empty meta-data block.
995 Returns BROTLI_FALSE if the size of the input data is larger than
996 input_block_size().
997 */
EncodeData(BrotliEncoderState * s,const BROTLI_BOOL is_last,const BROTLI_BOOL force_flush,size_t * out_size,uint8_t ** output)998 static BROTLI_BOOL EncodeData(
999 BrotliEncoderState* s, const BROTLI_BOOL is_last,
1000 const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {
1001 const uint64_t delta = UnprocessedInputSize(s);
1002 uint32_t bytes = (uint32_t)delta;
1003 uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
1004 uint8_t* data;
1005 uint32_t mask;
1006 MemoryManager* m = &s->memory_manager_;
1007 ContextType literal_context_mode;
1008 ContextLut literal_context_lut;
1009
1010 data = s->ringbuffer_.buffer_;
1011 mask = s->ringbuffer_.mask_;
1012
1013 if (s->params.quality > s->params.dictionary.max_quality) return BROTLI_FALSE;
1014 /* Adding more blocks after "last" block is forbidden. */
1015 if (s->is_last_block_emitted_) return BROTLI_FALSE;
1016 if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;
1017
1018 if (delta > InputBlockSize(s)) {
1019 return BROTLI_FALSE;
1020 }
1021 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY &&
1022 !s->command_buf_) {
1023 s->command_buf_ =
1024 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
1025 s->literal_buf_ =
1026 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
1027 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
1028 BROTLI_IS_NULL(s->literal_buf_)) {
1029 return BROTLI_FALSE;
1030 }
1031 }
1032
1033 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
1034 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1035 uint8_t* storage;
1036 size_t storage_ix = s->last_bytes_bits_;
1037 size_t table_size;
1038 int* table;
1039
1040 if (delta == 0 && !is_last) {
1041 /* We have no new input data and we don't have to finish the stream, so
1042 nothing to do. */
1043 *out_size = 0;
1044 return BROTLI_TRUE;
1045 }
1046 storage = GetBrotliStorage(s, 2 * bytes + 503);
1047 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1048 storage[0] = (uint8_t)s->last_bytes_;
1049 storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1050 table = GetHashTable(s, s->params.quality, bytes, &table_size);
1051 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1052 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1053 BrotliCompressFragmentFast(
1054 s->one_pass_arena_, &data[wrapped_last_processed_pos & mask],
1055 bytes, is_last,
1056 table, table_size,
1057 &storage_ix, storage);
1058 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1059 } else {
1060 BrotliCompressFragmentTwoPass(
1061 s->two_pass_arena_, &data[wrapped_last_processed_pos & mask],
1062 bytes, is_last,
1063 s->command_buf_, s->literal_buf_,
1064 table, table_size,
1065 &storage_ix, storage);
1066 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1067 }
1068 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1069 s->last_bytes_bits_ = storage_ix & 7u;
1070 UpdateLastProcessedPos(s);
1071 *output = &storage[0];
1072 *out_size = storage_ix >> 3;
1073 return BROTLI_TRUE;
1074 }
1075
1076 {
1077 /* Theoretical max number of commands is 1 per 2 bytes. */
1078 size_t newsize = s->num_commands_ + bytes / 2 + 1;
1079 if (newsize > s->cmd_alloc_size_) {
1080 Command* new_commands;
1081 /* Reserve a bit more memory to allow merging with a next block
1082 without reallocation: that would impact speed. */
1083 newsize += (bytes / 4) + 16;
1084 s->cmd_alloc_size_ = newsize;
1085 new_commands = BROTLI_ALLOC(m, Command, newsize);
1086 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) return BROTLI_FALSE;
1087 if (s->commands_) {
1088 memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);
1089 BROTLI_FREE(m, s->commands_);
1090 }
1091 s->commands_ = new_commands;
1092 }
1093 }
1094
1095 InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
1096 wrapped_last_processed_pos, bytes, is_last);
1097
1098 literal_context_mode = ChooseContextMode(
1099 &s->params, data, WrapPosition(s->last_flush_pos_),
1100 mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
1101 literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
1102
1103 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1104
1105 if (s->num_commands_ && s->last_insert_len_ == 0) {
1106 ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);
1107 }
1108
1109 if (s->params.quality == ZOPFLIFICATION_QUALITY) {
1110 BROTLI_DCHECK(s->params.hasher.type == 10);
1111 BrotliCreateZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1112 data, mask, literal_context_lut, &s->params,
1113 &s->hasher_, s->dist_cache_,
1114 &s->last_insert_len_, &s->commands_[s->num_commands_],
1115 &s->num_commands_, &s->num_literals_);
1116 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1117 } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
1118 BROTLI_DCHECK(s->params.hasher.type == 10);
1119 BrotliCreateHqZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1120 data, mask, literal_context_lut, &s->params,
1121 &s->hasher_, s->dist_cache_,
1122 &s->last_insert_len_, &s->commands_[s->num_commands_],
1123 &s->num_commands_, &s->num_literals_);
1124 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1125 } else {
1126 BrotliCreateBackwardReferences(bytes, wrapped_last_processed_pos,
1127 data, mask, literal_context_lut, &s->params,
1128 &s->hasher_, s->dist_cache_,
1129 &s->last_insert_len_, &s->commands_[s->num_commands_],
1130 &s->num_commands_, &s->num_literals_);
1131 }
1132
1133 {
1134 const size_t max_length = MaxMetablockSize(&s->params);
1135 const size_t max_literals = max_length / 8;
1136 const size_t max_commands = max_length / 8;
1137 const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_);
1138 /* If maximal possible additional block doesn't fit metablock, flush now. */
1139 /* TODO(eustas): Postpone decision until next block arrives? */
1140 const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL(
1141 processed_bytes + InputBlockSize(s) <= max_length);
1142 /* If block splitting is not used, then flush as soon as there is some
1143 amount of commands / literals produced. */
1144 const BROTLI_BOOL should_flush = TO_BROTLI_BOOL(
1145 s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT &&
1146 s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS);
1147 if (!is_last && !force_flush && !should_flush &&
1148 next_input_fits_metablock &&
1149 s->num_literals_ < max_literals &&
1150 s->num_commands_ < max_commands) {
1151 /* Merge with next input block. Everything will happen later. */
1152 if (UpdateLastProcessedPos(s)) {
1153 HasherReset(&s->hasher_);
1154 }
1155 *out_size = 0;
1156 return BROTLI_TRUE;
1157 }
1158 }
1159
1160 /* Create the last insert-only command. */
1161 if (s->last_insert_len_ > 0) {
1162 InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_);
1163 s->num_literals_ += s->last_insert_len_;
1164 s->last_insert_len_ = 0;
1165 }
1166
1167 if (!is_last && s->input_pos_ == s->last_flush_pos_) {
1168 /* We have no new input data and we don't have to finish the stream, so
1169 nothing to do. */
1170 *out_size = 0;
1171 return BROTLI_TRUE;
1172 }
1173 BROTLI_DCHECK(s->input_pos_ >= s->last_flush_pos_);
1174 BROTLI_DCHECK(s->input_pos_ > s->last_flush_pos_ || is_last);
1175 BROTLI_DCHECK(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);
1176 {
1177 const uint32_t metablock_size =
1178 (uint32_t)(s->input_pos_ - s->last_flush_pos_);
1179 uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);
1180 size_t storage_ix = s->last_bytes_bits_;
1181 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1182 storage[0] = (uint8_t)s->last_bytes_;
1183 storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1184 WriteMetaBlockInternal(
1185 m, data, mask, s->last_flush_pos_, metablock_size, is_last,
1186 literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,
1187 s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
1188 s->dist_cache_, &storage_ix, storage);
1189 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1190 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1191 s->last_bytes_bits_ = storage_ix & 7u;
1192 s->last_flush_pos_ = s->input_pos_;
1193 if (UpdateLastProcessedPos(s)) {
1194 HasherReset(&s->hasher_);
1195 }
1196 if (s->last_flush_pos_ > 0) {
1197 s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
1198 }
1199 if (s->last_flush_pos_ > 1) {
1200 s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask];
1201 }
1202 s->num_commands_ = 0;
1203 s->num_literals_ = 0;
1204 /* Save the state of the distance cache in case we need to restore it for
1205 emitting an uncompressed block. */
1206 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
1207 *output = &storage[0];
1208 *out_size = storage_ix >> 3;
1209 return BROTLI_TRUE;
1210 }
1211 }
1212
1213 /* Dumps remaining output bits and metadata header to |header|.
1214 Returns number of produced bytes.
1215 REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long.
1216 REQUIRED: |block_size| <= (1 << 24). */
WriteMetadataHeader(BrotliEncoderState * s,const size_t block_size,uint8_t * header)1217 static size_t WriteMetadataHeader(
1218 BrotliEncoderState* s, const size_t block_size, uint8_t* header) {
1219 size_t storage_ix;
1220 storage_ix = s->last_bytes_bits_;
1221 header[0] = (uint8_t)s->last_bytes_;
1222 header[1] = (uint8_t)(s->last_bytes_ >> 8);
1223 s->last_bytes_ = 0;
1224 s->last_bytes_bits_ = 0;
1225
1226 BrotliWriteBits(1, 0, &storage_ix, header);
1227 BrotliWriteBits(2, 3, &storage_ix, header);
1228 BrotliWriteBits(1, 0, &storage_ix, header);
1229 if (block_size == 0) {
1230 BrotliWriteBits(2, 0, &storage_ix, header);
1231 } else {
1232 uint32_t nbits = (block_size == 1) ? 0 :
1233 (Log2FloorNonZero((uint32_t)block_size - 1) + 1);
1234 uint32_t nbytes = (nbits + 7) / 8;
1235 BrotliWriteBits(2, nbytes, &storage_ix, header);
1236 BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header);
1237 }
1238 return (storage_ix + 7u) >> 3;
1239 }
1240
BrotliCompressBufferQuality10(int lgwin,size_t input_size,const uint8_t * input_buffer,size_t * encoded_size,uint8_t * encoded_buffer)1241 static BROTLI_NOINLINE BROTLI_BOOL BrotliCompressBufferQuality10(
1242 int lgwin, size_t input_size, const uint8_t* input_buffer,
1243 size_t* encoded_size, uint8_t* encoded_buffer) {
1244 MemoryManager* m =
1245 (MemoryManager*)BrotliBootstrapAlloc(sizeof(MemoryManager), 0, 0, 0);
1246
1247 const size_t mask = BROTLI_SIZE_MAX >> 1;
1248 int dist_cache[4] = { 4, 11, 15, 16 };
1249 int saved_dist_cache[4] = { 4, 11, 15, 16 };
1250 BROTLI_BOOL ok = BROTLI_TRUE;
1251 const size_t max_out_size = *encoded_size;
1252 size_t total_out_size = 0;
1253 uint16_t last_bytes;
1254 uint8_t last_bytes_bits;
1255
1256 const size_t hasher_eff_size = BROTLI_MIN(size_t,
1257 input_size, BROTLI_MAX_BACKWARD_LIMIT(lgwin) + BROTLI_WINDOW_GAP);
1258
1259 const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
1260 size_t max_block_size;
1261 const size_t max_metablock_size = (size_t)1 << lgmetablock;
1262 const size_t max_literals_per_metablock = max_metablock_size / 8;
1263 const size_t max_commands_per_metablock = max_metablock_size / 8;
1264 size_t metablock_start = 0;
1265 uint8_t prev_byte = 0;
1266 uint8_t prev_byte2 = 0;
1267
1268 BrotliEncoderParams* params = NULL;
1269 Hasher* hasher = NULL;
1270
1271 if (m == NULL) return BROTLI_FALSE;
1272 BrotliInitMemoryManager(m, 0, 0, 0);
1273 params = BROTLI_ALLOC(m, BrotliEncoderParams, 2);
1274 hasher = BROTLI_ALLOC(m, Hasher, 1);
1275 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(params) || BROTLI_IS_NULL(hasher)) {
1276 goto oom;
1277 }
1278 BrotliEncoderInitParams(params);
1279 HasherInit(hasher);
1280
1281 params->quality = 10;
1282 params->lgwin = lgwin;
1283 if (lgwin > BROTLI_MAX_WINDOW_BITS) {
1284 params->large_window = BROTLI_TRUE;
1285 }
1286 SanitizeParams(params);
1287 params->lgblock = ComputeLgBlock(params);
1288 ChooseDistanceParams(params);
1289 max_block_size = (size_t)1 << params->lgblock;
1290
1291 /* Since default static dictionary is used we assume that
1292 * params->quality < params->dictionary.max_quality. */
1293
1294 BROTLI_DCHECK(input_size <= mask + 1);
1295 EncodeWindowBits(lgwin, params->large_window, &last_bytes, &last_bytes_bits);
1296 InitOrStitchToPreviousBlock(m, hasher, input_buffer, mask, params,
1297 0, hasher_eff_size, BROTLI_TRUE);
1298 if (BROTLI_IS_OOM(m)) goto oom;
1299
1300 while (ok && metablock_start < input_size) {
1301 const size_t metablock_end =
1302 BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size);
1303 const size_t expected_num_commands =
1304 (metablock_end - metablock_start) / 12 + 16;
1305 Command* commands = 0;
1306 size_t num_commands = 0;
1307 size_t last_insert_len = 0;
1308 size_t num_literals = 0;
1309 size_t metablock_size = 0;
1310 size_t cmd_alloc_size = 0;
1311 BROTLI_BOOL is_last;
1312 uint8_t* storage;
1313 size_t storage_ix;
1314
1315 ContextType literal_context_mode = ChooseContextMode(params,
1316 input_buffer, metablock_start, mask, metablock_end - metablock_start);
1317 ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
1318
1319 size_t block_start;
1320 for (block_start = metablock_start; block_start < metablock_end; ) {
1321 size_t block_size =
1322 BROTLI_MIN(size_t, metablock_end - block_start, max_block_size);
1323 ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1);
1324 size_t path_size;
1325 size_t new_cmd_alloc_size;
1326 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) goto oom;
1327 BrotliInitZopfliNodes(nodes, block_size + 1);
1328 StitchToPreviousBlockH10(&hasher->privat._H10, block_size, block_start,
1329 input_buffer, mask);
1330 path_size = BrotliZopfliComputeShortestPath(m, block_size, block_start,
1331 input_buffer, mask, literal_context_lut, params, dist_cache, hasher,
1332 nodes);
1333 if (BROTLI_IS_OOM(m)) goto oom;
1334 /* We allocate a command buffer in the first iteration of this loop that
1335 will be likely big enough for the whole metablock, so that for most
1336 inputs we will not have to reallocate in later iterations. We do the
1337 allocation here and not before the loop, because if the input is small,
1338 this will be allocated after the Zopfli cost model is freed, so this
1339 will not increase peak memory usage.
1340 TODO(eustas): If the first allocation is too small, increase command
1341 buffer size exponentially. */
1342 new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands,
1343 num_commands + path_size + 1);
1344 if (cmd_alloc_size != new_cmd_alloc_size) {
1345 Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size);
1346 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) goto oom;
1347 cmd_alloc_size = new_cmd_alloc_size;
1348 if (commands) {
1349 memcpy(new_commands, commands, sizeof(Command) * num_commands);
1350 BROTLI_FREE(m, commands);
1351 }
1352 commands = new_commands;
1353 }
1354 BrotliZopfliCreateCommands(block_size, block_start, &nodes[0], dist_cache,
1355 &last_insert_len, params, &commands[num_commands], &num_literals);
1356 num_commands += path_size;
1357 block_start += block_size;
1358 metablock_size += block_size;
1359 BROTLI_FREE(m, nodes);
1360 if (num_literals > max_literals_per_metablock ||
1361 num_commands > max_commands_per_metablock) {
1362 break;
1363 }
1364 }
1365
1366 if (last_insert_len > 0) {
1367 InitInsertCommand(&commands[num_commands++], last_insert_len);
1368 num_literals += last_insert_len;
1369 }
1370
1371 is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size);
1372 storage = NULL;
1373 storage_ix = last_bytes_bits;
1374
1375 if (metablock_size == 0) {
1376 /* Write the ISLAST and ISEMPTY bits. */
1377 storage = BROTLI_ALLOC(m, uint8_t, 16);
1378 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
1379 storage[0] = (uint8_t)last_bytes;
1380 storage[1] = (uint8_t)(last_bytes >> 8);
1381 BrotliWriteBits(2, 3, &storage_ix, storage);
1382 storage_ix = (storage_ix + 7u) & ~7u;
1383 } else if (!ShouldCompress(input_buffer, mask, metablock_start,
1384 metablock_size, num_literals, num_commands)) {
1385 /* Restore the distance cache, as its last update by
1386 CreateBackwardReferences is now unused. */
1387 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
1388 storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16);
1389 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
1390 storage[0] = (uint8_t)last_bytes;
1391 storage[1] = (uint8_t)(last_bytes >> 8);
1392 BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
1393 metablock_start, mask, metablock_size,
1394 &storage_ix, storage);
1395 } else {
1396 MetaBlockSplit mb;
1397 BrotliEncoderParams* block_params = params + 1;
1398 *block_params = *params; /* shallow copy */
1399 InitMetaBlockSplit(&mb);
1400 BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask,
1401 block_params,
1402 prev_byte, prev_byte2,
1403 commands, num_commands,
1404 literal_context_mode,
1405 &mb);
1406 if (BROTLI_IS_OOM(m)) goto oom;
1407 {
1408 /* The number of distance symbols effectively used for distance
1409 histograms. It might be less than distance alphabet size
1410 for "Large Window Brotli" (32-bit). */
1411 BrotliOptimizeHistograms(block_params->dist.alphabet_size_limit, &mb);
1412 }
1413 storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503);
1414 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
1415 storage[0] = (uint8_t)last_bytes;
1416 storage[1] = (uint8_t)(last_bytes >> 8);
1417 BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,
1418 mask, prev_byte, prev_byte2,
1419 is_last,
1420 block_params,
1421 literal_context_mode,
1422 commands, num_commands,
1423 &mb,
1424 &storage_ix, storage);
1425 if (BROTLI_IS_OOM(m)) goto oom;
1426 if (metablock_size + 4 < (storage_ix >> 3)) {
1427 /* Restore the distance cache and last byte. */
1428 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
1429 storage[0] = (uint8_t)last_bytes;
1430 storage[1] = (uint8_t)(last_bytes >> 8);
1431 storage_ix = last_bytes_bits;
1432 BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
1433 metablock_start, mask,
1434 metablock_size, &storage_ix, storage);
1435 }
1436 DestroyMetaBlockSplit(m, &mb);
1437 }
1438 last_bytes = (uint16_t)(storage[storage_ix >> 3]);
1439 last_bytes_bits = storage_ix & 7u;
1440 metablock_start += metablock_size;
1441 if (metablock_start < input_size) {
1442 prev_byte = input_buffer[metablock_start - 1];
1443 prev_byte2 = input_buffer[metablock_start - 2];
1444 }
1445 /* Save the state of the distance cache in case we need to restore it for
1446 emitting an uncompressed block. */
1447 memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
1448
1449 {
1450 const size_t out_size = storage_ix >> 3;
1451 total_out_size += out_size;
1452 if (total_out_size <= max_out_size) {
1453 memcpy(encoded_buffer, storage, out_size);
1454 encoded_buffer += out_size;
1455 } else {
1456 ok = BROTLI_FALSE;
1457 }
1458 }
1459 BROTLI_FREE(m, storage);
1460 BROTLI_FREE(m, commands);
1461 }
1462
1463 *encoded_size = total_out_size;
1464 DestroyHasher(m, hasher);
1465 BROTLI_FREE(m, hasher);
1466 BrotliEncoderCleanupParams(m, params);
1467 BROTLI_FREE(m, params);
1468 BrotliBootstrapFree(m, m);
1469 return ok;
1470
1471 oom:
1472 BrotliWipeOutMemoryManager(m);
1473 BrotliBootstrapFree(m, m);
1474 return BROTLI_FALSE;
1475 }
1476
BrotliEncoderMaxCompressedSize(size_t input_size)1477 size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
1478 /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
1479 size_t num_large_blocks = input_size >> 14;
1480 size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;
1481 size_t result = input_size + overhead;
1482 if (input_size == 0) return 2;
1483 return (result < input_size) ? 0 : result;
1484 }
1485
1486 /* Wraps data to uncompressed brotli stream with minimal window size.
1487 |output| should point at region with at least BrotliEncoderMaxCompressedSize
1488 addressable bytes.
1489 Returns the length of stream. */
MakeUncompressedStream(const uint8_t * input,size_t input_size,uint8_t * output)1490 static size_t MakeUncompressedStream(
1491 const uint8_t* input, size_t input_size, uint8_t* output) {
1492 size_t size = input_size;
1493 size_t result = 0;
1494 size_t offset = 0;
1495 if (input_size == 0) {
1496 output[0] = 6;
1497 return 1;
1498 }
1499 output[result++] = 0x21; /* window bits = 10, is_last = false */
1500 output[result++] = 0x03; /* empty metadata, padding */
1501 while (size > 0) {
1502 uint32_t nibbles = 0;
1503 uint32_t chunk_size;
1504 uint32_t bits;
1505 chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size;
1506 if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1;
1507 bits =
1508 (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles));
1509 output[result++] = (uint8_t)bits;
1510 output[result++] = (uint8_t)(bits >> 8);
1511 output[result++] = (uint8_t)(bits >> 16);
1512 if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24);
1513 memcpy(&output[result], &input[offset], chunk_size);
1514 result += chunk_size;
1515 offset += chunk_size;
1516 size -= chunk_size;
1517 }
1518 output[result++] = 3;
1519 return result;
1520 }
1521
BrotliEncoderCompress(int quality,int lgwin,BrotliEncoderMode mode,size_t input_size,const uint8_t input_buffer[BROTLI_ARRAY_PARAM (input_size)],size_t * encoded_size,uint8_t encoded_buffer[BROTLI_ARRAY_PARAM (* encoded_size)])1522 BROTLI_BOOL BrotliEncoderCompress(
1523 int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
1524 const uint8_t input_buffer[BROTLI_ARRAY_PARAM(input_size)],
1525 size_t* encoded_size,
1526 uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(*encoded_size)]) {
1527 BrotliEncoderState* s;
1528 size_t out_size = *encoded_size;
1529 const uint8_t* input_start = input_buffer;
1530 uint8_t* output_start = encoded_buffer;
1531 size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size);
1532 if (out_size == 0) {
1533 /* Output buffer needs at least one byte. */
1534 return BROTLI_FALSE;
1535 }
1536 if (input_size == 0) {
1537 /* Handle the special case of empty input. */
1538 *encoded_size = 1;
1539 *encoded_buffer = 6;
1540 return BROTLI_TRUE;
1541 }
1542 if (quality == 10) {
1543 /* TODO(eustas): Implement this direct path for all quality levels. */
1544 const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS,
1545 BROTLI_MAX(int, 16, lgwin));
1546 int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer,
1547 encoded_size, encoded_buffer);
1548 if (!ok || (max_out_size && *encoded_size > max_out_size)) {
1549 goto fallback;
1550 }
1551 return BROTLI_TRUE;
1552 }
1553
1554 s = BrotliEncoderCreateInstance(0, 0, 0);
1555 if (!s) {
1556 return BROTLI_FALSE;
1557 } else {
1558 size_t available_in = input_size;
1559 const uint8_t* next_in = input_buffer;
1560 size_t available_out = *encoded_size;
1561 uint8_t* next_out = encoded_buffer;
1562 size_t total_out = 0;
1563 BROTLI_BOOL result = BROTLI_FALSE;
1564 BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
1565 BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
1566 BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
1567 BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);
1568 if (lgwin > BROTLI_MAX_WINDOW_BITS) {
1569 BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);
1570 }
1571 result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
1572 &available_in, &next_in, &available_out, &next_out, &total_out);
1573 if (!BrotliEncoderIsFinished(s)) result = 0;
1574 *encoded_size = total_out;
1575 BrotliEncoderDestroyInstance(s);
1576 if (!result || (max_out_size && *encoded_size > max_out_size)) {
1577 goto fallback;
1578 }
1579 return BROTLI_TRUE;
1580 }
1581 fallback:
1582 *encoded_size = 0;
1583 if (!max_out_size) return BROTLI_FALSE;
1584 if (out_size >= max_out_size) {
1585 *encoded_size =
1586 MakeUncompressedStream(input_start, input_size, output_start);
1587 return BROTLI_TRUE;
1588 }
1589 return BROTLI_FALSE;
1590 }
1591
InjectBytePaddingBlock(BrotliEncoderState * s)1592 static void InjectBytePaddingBlock(BrotliEncoderState* s) {
1593 uint32_t seal = s->last_bytes_;
1594 size_t seal_bits = s->last_bytes_bits_;
1595 uint8_t* destination;
1596 s->last_bytes_ = 0;
1597 s->last_bytes_bits_ = 0;
1598 /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
1599 seal |= 0x6u << seal_bits;
1600 seal_bits += 6;
1601 /* If we have already created storage, then append to it.
1602 Storage is valid until next block is being compressed. */
1603 if (s->next_out_) {
1604 destination = s->next_out_ + s->available_out_;
1605 } else {
1606 destination = s->tiny_buf_.u8;
1607 s->next_out_ = destination;
1608 }
1609 destination[0] = (uint8_t)seal;
1610 if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);
1611 if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);
1612 s->available_out_ += (seal_bits + 7) >> 3;
1613 }
1614
1615 /* Injects padding bits or pushes compressed data to output.
1616 Returns false if nothing is done. */
InjectFlushOrPushOutput(BrotliEncoderState * s,size_t * available_out,uint8_t ** next_out,size_t * total_out)1617 static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
1618 size_t* available_out, uint8_t** next_out, size_t* total_out) {
1619 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1620 s->last_bytes_bits_ != 0) {
1621 InjectBytePaddingBlock(s);
1622 return BROTLI_TRUE;
1623 }
1624
1625 if (s->available_out_ != 0 && *available_out != 0) {
1626 size_t copy_output_size =
1627 BROTLI_MIN(size_t, s->available_out_, *available_out);
1628 memcpy(*next_out, s->next_out_, copy_output_size);
1629 *next_out += copy_output_size;
1630 *available_out -= copy_output_size;
1631 s->next_out_ += copy_output_size;
1632 s->available_out_ -= copy_output_size;
1633 s->total_out_ += copy_output_size;
1634 if (total_out) *total_out = s->total_out_;
1635 return BROTLI_TRUE;
1636 }
1637
1638 return BROTLI_FALSE;
1639 }
1640
CheckFlushComplete(BrotliEncoderState * s)1641 static void CheckFlushComplete(BrotliEncoderState* s) {
1642 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1643 s->available_out_ == 0) {
1644 s->stream_state_ = BROTLI_STREAM_PROCESSING;
1645 s->next_out_ = 0;
1646 }
1647 }
1648
BrotliEncoderCompressStreamFast(BrotliEncoderState * s,BrotliEncoderOperation op,size_t * available_in,const uint8_t ** next_in,size_t * available_out,uint8_t ** next_out,size_t * total_out)1649 static BROTLI_BOOL BrotliEncoderCompressStreamFast(
1650 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1651 const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
1652 size_t* total_out) {
1653 const size_t block_size_limit = (size_t)1 << s->params.lgwin;
1654 const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize,
1655 BROTLI_MIN(size_t, *available_in, block_size_limit));
1656 uint32_t* tmp_command_buf = NULL;
1657 uint32_t* command_buf = NULL;
1658 uint8_t* tmp_literal_buf = NULL;
1659 uint8_t* literal_buf = NULL;
1660 MemoryManager* m = &s->memory_manager_;
1661 if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY &&
1662 s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) {
1663 return BROTLI_FALSE;
1664 }
1665 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1666 if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) {
1667 s->command_buf_ =
1668 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
1669 s->literal_buf_ =
1670 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
1671 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
1672 BROTLI_IS_NULL(s->literal_buf_)) {
1673 return BROTLI_FALSE;
1674 }
1675 }
1676 if (s->command_buf_) {
1677 command_buf = s->command_buf_;
1678 literal_buf = s->literal_buf_;
1679 } else {
1680 tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);
1681 tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);
1682 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp_command_buf) ||
1683 BROTLI_IS_NULL(tmp_literal_buf)) {
1684 return BROTLI_FALSE;
1685 }
1686 command_buf = tmp_command_buf;
1687 literal_buf = tmp_literal_buf;
1688 }
1689 }
1690
1691 while (BROTLI_TRUE) {
1692 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1693 continue;
1694 }
1695
1696 /* Compress block only when internal output buffer is empty, stream is not
1697 finished, there is no pending flush request, and there is either
1698 additional input or pending operation. */
1699 if (s->available_out_ == 0 &&
1700 s->stream_state_ == BROTLI_STREAM_PROCESSING &&
1701 (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) {
1702 size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in);
1703 BROTLI_BOOL is_last =
1704 (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
1705 BROTLI_BOOL force_flush =
1706 (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
1707 size_t max_out_size = 2 * block_size + 503;
1708 BROTLI_BOOL inplace = BROTLI_TRUE;
1709 uint8_t* storage = NULL;
1710 size_t storage_ix = s->last_bytes_bits_;
1711 size_t table_size;
1712 int* table;
1713
1714 if (force_flush && block_size == 0) {
1715 s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1716 continue;
1717 }
1718 if (max_out_size <= *available_out) {
1719 storage = *next_out;
1720 } else {
1721 inplace = BROTLI_FALSE;
1722 storage = GetBrotliStorage(s, max_out_size);
1723 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1724 }
1725 storage[0] = (uint8_t)s->last_bytes_;
1726 storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1727 table = GetHashTable(s, s->params.quality, block_size, &table_size);
1728 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1729
1730 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1731 BrotliCompressFragmentFast(s->one_pass_arena_, *next_in, block_size,
1732 is_last, table, table_size, &storage_ix, storage);
1733 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1734 } else {
1735 BrotliCompressFragmentTwoPass(s->two_pass_arena_, *next_in, block_size,
1736 is_last, command_buf, literal_buf, table, table_size,
1737 &storage_ix, storage);
1738 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1739 }
1740 if (block_size != 0) {
1741 *next_in += block_size;
1742 *available_in -= block_size;
1743 }
1744 if (inplace) {
1745 size_t out_bytes = storage_ix >> 3;
1746 BROTLI_DCHECK(out_bytes <= *available_out);
1747 BROTLI_DCHECK((storage_ix & 7) == 0 || out_bytes < *available_out);
1748 *next_out += out_bytes;
1749 *available_out -= out_bytes;
1750 s->total_out_ += out_bytes;
1751 if (total_out) *total_out = s->total_out_;
1752 } else {
1753 size_t out_bytes = storage_ix >> 3;
1754 s->next_out_ = storage;
1755 s->available_out_ = out_bytes;
1756 }
1757 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1758 s->last_bytes_bits_ = storage_ix & 7u;
1759
1760 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1761 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1762 continue;
1763 }
1764 break;
1765 }
1766 BROTLI_FREE(m, tmp_command_buf);
1767 BROTLI_FREE(m, tmp_literal_buf);
1768 CheckFlushComplete(s);
1769 return BROTLI_TRUE;
1770 }
1771
ProcessMetadata(BrotliEncoderState * s,size_t * available_in,const uint8_t ** next_in,size_t * available_out,uint8_t ** next_out,size_t * total_out)1772 static BROTLI_BOOL ProcessMetadata(
1773 BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in,
1774 size_t* available_out, uint8_t** next_out, size_t* total_out) {
1775 if (*available_in > (1u << 24)) return BROTLI_FALSE;
1776 /* Switch to metadata block workflow, if required. */
1777 if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1778 s->remaining_metadata_bytes_ = (uint32_t)*available_in;
1779 s->stream_state_ = BROTLI_STREAM_METADATA_HEAD;
1780 }
1781 if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD &&
1782 s->stream_state_ != BROTLI_STREAM_METADATA_BODY) {
1783 return BROTLI_FALSE;
1784 }
1785
1786 while (BROTLI_TRUE) {
1787 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1788 continue;
1789 }
1790 if (s->available_out_ != 0) break;
1791
1792 if (s->input_pos_ != s->last_flush_pos_) {
1793 BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE,
1794 &s->available_out_, &s->next_out_);
1795 if (!result) return BROTLI_FALSE;
1796 continue;
1797 }
1798
1799 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) {
1800 s->next_out_ = s->tiny_buf_.u8;
1801 s->available_out_ =
1802 WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_);
1803 s->stream_state_ = BROTLI_STREAM_METADATA_BODY;
1804 continue;
1805 } else {
1806 /* Exit workflow only when there is no more input and no more output.
1807 Otherwise client may continue producing empty metadata blocks. */
1808 if (s->remaining_metadata_bytes_ == 0) {
1809 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
1810 s->stream_state_ = BROTLI_STREAM_PROCESSING;
1811 break;
1812 }
1813 if (*available_out) {
1814 /* Directly copy input to output. */
1815 uint32_t copy = (uint32_t)BROTLI_MIN(
1816 size_t, s->remaining_metadata_bytes_, *available_out);
1817 memcpy(*next_out, *next_in, copy);
1818 *next_in += copy;
1819 *available_in -= copy;
1820 s->remaining_metadata_bytes_ -= copy;
1821 *next_out += copy;
1822 *available_out -= copy;
1823 } else {
1824 /* This guarantees progress in "TakeOutput" workflow. */
1825 uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16);
1826 s->next_out_ = s->tiny_buf_.u8;
1827 memcpy(s->next_out_, *next_in, copy);
1828 *next_in += copy;
1829 *available_in -= copy;
1830 s->remaining_metadata_bytes_ -= copy;
1831 s->available_out_ = copy;
1832 }
1833 continue;
1834 }
1835 }
1836
1837 return BROTLI_TRUE;
1838 }
1839
UpdateSizeHint(BrotliEncoderState * s,size_t available_in)1840 static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {
1841 if (s->params.size_hint == 0) {
1842 uint64_t delta = UnprocessedInputSize(s);
1843 uint64_t tail = available_in;
1844 uint32_t limit = 1u << 30;
1845 uint32_t total;
1846 if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) {
1847 total = limit;
1848 } else {
1849 total = (uint32_t)(delta + tail);
1850 }
1851 s->params.size_hint = total;
1852 }
1853 }
1854
BrotliEncoderCompressStream(BrotliEncoderState * s,BrotliEncoderOperation op,size_t * available_in,const uint8_t ** next_in,size_t * available_out,uint8_t ** next_out,size_t * total_out)1855 BROTLI_BOOL BrotliEncoderCompressStream(
1856 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1857 const uint8_t** next_in, size_t* available_out,uint8_t** next_out,
1858 size_t* total_out) {
1859 if (!EnsureInitialized(s)) return BROTLI_FALSE;
1860
1861 /* Unfinished metadata block; check requirements. */
1862 if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) {
1863 if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE;
1864 if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE;
1865 }
1866
1867 if (op == BROTLI_OPERATION_EMIT_METADATA) {
1868 UpdateSizeHint(s, 0); /* First data metablock might be emitted here. */
1869 return ProcessMetadata(
1870 s, available_in, next_in, available_out, next_out, total_out);
1871 }
1872
1873 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD ||
1874 s->stream_state_ == BROTLI_STREAM_METADATA_BODY) {
1875 return BROTLI_FALSE;
1876 }
1877
1878 if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) {
1879 return BROTLI_FALSE;
1880 }
1881 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
1882 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1883 return BrotliEncoderCompressStreamFast(s, op, available_in, next_in,
1884 available_out, next_out, total_out);
1885 }
1886 while (BROTLI_TRUE) {
1887 size_t remaining_block_size = RemainingInputBlockSize(s);
1888 /* Shorten input to flint size. */
1889 if (s->flint_ >= 0 && remaining_block_size > (size_t)s->flint_) {
1890 remaining_block_size = (size_t)s->flint_;
1891 }
1892
1893 if (remaining_block_size != 0 && *available_in != 0) {
1894 size_t copy_input_size =
1895 BROTLI_MIN(size_t, remaining_block_size, *available_in);
1896 CopyInputToRingBuffer(s, copy_input_size, *next_in);
1897 *next_in += copy_input_size;
1898 *available_in -= copy_input_size;
1899 if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size);
1900 continue;
1901 }
1902
1903 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1904 /* Exit the "emit flint" workflow. */
1905 if (s->flint_ == BROTLI_FLINT_WAITING_FOR_FLUSHING) {
1906 CheckFlushComplete(s);
1907 if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1908 s->flint_ = BROTLI_FLINT_DONE;
1909 }
1910 }
1911 continue;
1912 }
1913
1914 /* Compress data only when internal output buffer is empty, stream is not
1915 finished and there is no pending flush request. */
1916 if (s->available_out_ == 0 &&
1917 s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1918 if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) {
1919 BROTLI_BOOL is_last = TO_BROTLI_BOOL(
1920 (*available_in == 0) && op == BROTLI_OPERATION_FINISH);
1921 BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
1922 (*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
1923 BROTLI_BOOL result;
1924 /* Force emitting (uncompressed) piece containing flint. */
1925 if (!is_last && s->flint_ == 0) {
1926 s->flint_ = BROTLI_FLINT_WAITING_FOR_FLUSHING;
1927 force_flush = BROTLI_TRUE;
1928 }
1929 UpdateSizeHint(s, *available_in);
1930 result = EncodeData(s, is_last, force_flush,
1931 &s->available_out_, &s->next_out_);
1932 if (!result) return BROTLI_FALSE;
1933 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1934 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1935 continue;
1936 }
1937 }
1938 break;
1939 }
1940 CheckFlushComplete(s);
1941 return BROTLI_TRUE;
1942 }
1943
BrotliEncoderIsFinished(BrotliEncoderState * s)1944 BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) {
1945 return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED &&
1946 !BrotliEncoderHasMoreOutput(s));
1947 }
1948
BrotliEncoderHasMoreOutput(BrotliEncoderState * s)1949 BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) {
1950 return TO_BROTLI_BOOL(s->available_out_ != 0);
1951 }
1952
BrotliEncoderTakeOutput(BrotliEncoderState * s,size_t * size)1953 const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) {
1954 size_t consumed_size = s->available_out_;
1955 uint8_t* result = s->next_out_;
1956 if (*size) {
1957 consumed_size = BROTLI_MIN(size_t, *size, s->available_out_);
1958 }
1959 if (consumed_size) {
1960 s->next_out_ += consumed_size;
1961 s->available_out_ -= consumed_size;
1962 s->total_out_ += consumed_size;
1963 CheckFlushComplete(s);
1964 *size = consumed_size;
1965 } else {
1966 *size = 0;
1967 result = 0;
1968 }
1969 return result;
1970 }
1971
BrotliEncoderVersion(void)1972 uint32_t BrotliEncoderVersion(void) {
1973 return BROTLI_VERSION;
1974 }
1975
BrotliEncoderPrepareDictionary(BrotliSharedDictionaryType type,size_t size,const uint8_t data[BROTLI_ARRAY_PARAM (size)],int quality,brotli_alloc_func alloc_func,brotli_free_func free_func,void * opaque)1976 BrotliEncoderPreparedDictionary* BrotliEncoderPrepareDictionary(
1977 BrotliSharedDictionaryType type, size_t size,
1978 const uint8_t data[BROTLI_ARRAY_PARAM(size)], int quality,
1979 brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
1980 ManagedDictionary* managed_dictionary = NULL;
1981 if (type != BROTLI_SHARED_DICTIONARY_RAW &&
1982 type != BROTLI_SHARED_DICTIONARY_SERIALIZED) {
1983 return NULL;
1984 }
1985 managed_dictionary =
1986 BrotliCreateManagedDictionary(alloc_func, free_func, opaque);
1987 if (managed_dictionary == NULL) {
1988 return NULL;
1989 }
1990 if (type == BROTLI_SHARED_DICTIONARY_RAW) {
1991 managed_dictionary->dictionary = (uint32_t*)CreatePreparedDictionary(
1992 &managed_dictionary->memory_manager_, data, size);
1993 } else {
1994 SharedEncoderDictionary* dict = (SharedEncoderDictionary*)BrotliAllocate(
1995 &managed_dictionary->memory_manager_, sizeof(SharedEncoderDictionary));
1996 managed_dictionary->dictionary = (uint32_t*)dict;
1997 if (dict != NULL) {
1998 BROTLI_BOOL ok = BrotliInitCustomSharedEncoderDictionary(
1999 &managed_dictionary->memory_manager_, data, size, quality, dict);
2000 if (!ok) {
2001 BrotliFree(&managed_dictionary->memory_manager_, dict);
2002 managed_dictionary->dictionary = NULL;
2003 }
2004 }
2005 }
2006 if (managed_dictionary->dictionary == NULL) {
2007 BrotliDestroyManagedDictionary(managed_dictionary);
2008 return NULL;
2009 }
2010 return (BrotliEncoderPreparedDictionary*)managed_dictionary;
2011 }
2012
BrotliEncoderDestroyPreparedDictionary(BrotliEncoderPreparedDictionary * dictionary)2013 void BrotliEncoderDestroyPreparedDictionary(
2014 BrotliEncoderPreparedDictionary* dictionary) {
2015 ManagedDictionary* dict = (ManagedDictionary*)dictionary;
2016 if (!dictionary) return;
2017 /* First field of dictionary structs. */
2018 /* Only managed dictionaries are eligible for destruction by this method. */
2019 if (dict->magic != kManagedDictionaryMagic) {
2020 return;
2021 }
2022 if (dict->dictionary == NULL) {
2023 /* This should never ever happen. */
2024 } else if (*dict->dictionary == kPreparedDictionaryMagic) {
2025 DestroyPreparedDictionary(
2026 &dict->memory_manager_, (PreparedDictionary*)dict->dictionary);
2027 } else if (*dict->dictionary == kSharedDictionaryMagic) {
2028 BrotliCleanupSharedEncoderDictionary(&dict->memory_manager_,
2029 (SharedEncoderDictionary*)dict->dictionary);
2030 BrotliFree(&dict->memory_manager_, dict->dictionary);
2031 } else {
2032 /* This should never ever happen. */
2033 }
2034 dict->dictionary = NULL;
2035 BrotliDestroyManagedDictionary(dict);
2036 }
2037
BrotliEncoderAttachPreparedDictionary(BrotliEncoderState * state,const BrotliEncoderPreparedDictionary * dictionary)2038 BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(BrotliEncoderState* state,
2039 const BrotliEncoderPreparedDictionary* dictionary) {
2040 /* First field of dictionary structs */
2041 const BrotliEncoderPreparedDictionary* dict = dictionary;
2042 uint32_t magic = *((const uint32_t*)dict);
2043 SharedEncoderDictionary* current = NULL;
2044 if (magic == kManagedDictionaryMagic) {
2045 /* Unwrap managed dictionary. */
2046 ManagedDictionary* managed_dictionary = (ManagedDictionary*)dict;
2047 magic = *managed_dictionary->dictionary;
2048 dict = (BrotliEncoderPreparedDictionary*)managed_dictionary->dictionary;
2049 }
2050 current = &state->params.dictionary;
2051 if (magic == kPreparedDictionaryMagic) {
2052 const PreparedDictionary* prepared = (const PreparedDictionary*)dict;
2053 if (!AttachPreparedDictionary(¤t->compound, prepared)) {
2054 return BROTLI_FALSE;
2055 }
2056 } else if (magic == kSharedDictionaryMagic) {
2057 const SharedEncoderDictionary* attached =
2058 (const SharedEncoderDictionary*)dict;
2059 BROTLI_BOOL was_default = !current->contextual.context_based &&
2060 current->contextual.num_dictionaries == 1 &&
2061 current->contextual.dict[0]->hash_table_words ==
2062 kStaticDictionaryHashWords &&
2063 current->contextual.dict[0]->hash_table_lengths ==
2064 kStaticDictionaryHashLengths;
2065 BROTLI_BOOL new_default = !attached->contextual.context_based &&
2066 attached->contextual.num_dictionaries == 1 &&
2067 attached->contextual.dict[0]->hash_table_words ==
2068 kStaticDictionaryHashWords &&
2069 attached->contextual.dict[0]->hash_table_lengths ==
2070 kStaticDictionaryHashLengths;
2071 size_t i;
2072 if (state->is_initialized_) return BROTLI_FALSE;
2073 current->max_quality =
2074 BROTLI_MIN(int, current->max_quality, attached->max_quality);
2075 for (i = 0; i < attached->compound.num_chunks; i++) {
2076 if (!AttachPreparedDictionary(¤t->compound,
2077 attached->compound.chunks[i])) {
2078 return BROTLI_FALSE;
2079 }
2080 }
2081 if (!new_default) {
2082 if (!was_default) return BROTLI_FALSE;
2083 /* Copy by value, but then set num_instances_ to 0 because their memory
2084 is managed by attached, not by current */
2085 current->contextual = attached->contextual;
2086 current->contextual.num_instances_ = 0;
2087 }
2088 } else {
2089 return BROTLI_FALSE;
2090 }
2091 return BROTLI_TRUE;
2092 }
2093
BrotliEncoderEstimatePeakMemoryUsage(int quality,int lgwin,size_t input_size)2094 size_t BrotliEncoderEstimatePeakMemoryUsage(int quality, int lgwin,
2095 size_t input_size) {
2096 BrotliEncoderParams params;
2097 BrotliEncoderInitParams(¶ms);
2098 params.quality = quality;
2099 params.lgwin = lgwin;
2100 params.size_hint = input_size;
2101 SanitizeParams(¶ms);
2102 params.lgblock = ComputeLgBlock(¶ms);
2103 ChooseHasher(¶ms, ¶ms.hasher);
2104 if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
2105 params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
2106 size_t state_size = sizeof(BrotliEncoderState);
2107 size_t block_size = BROTLI_MIN(size_t, input_size, (1ul << params.lgwin));
2108 size_t hash_table_size =
2109 HashTableSize(MaxHashTableSize(params.quality), block_size);
2110 size_t hash_size =
2111 (hash_table_size < (1u << 10)) ? 0 : sizeof(int) * hash_table_size;
2112 size_t cmdbuf_size = params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY ?
2113 5 * BROTLI_MIN(size_t, block_size, 1ul << 17) : 0;
2114 if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
2115 state_size += sizeof(BrotliOnePassArena);
2116 } else {
2117 state_size += sizeof(BrotliTwoPassArena);
2118 }
2119 return hash_size + cmdbuf_size + state_size;
2120 } else {
2121 size_t short_ringbuffer_size = (size_t)1 << params.lgblock;
2122 int ringbuffer_bits = ComputeRbBits(¶ms);
2123 size_t ringbuffer_size = input_size < short_ringbuffer_size ?
2124 input_size : (1u << ringbuffer_bits) + short_ringbuffer_size;
2125 size_t hash_size[4] = {0};
2126 size_t metablock_size =
2127 BROTLI_MIN(size_t, input_size, MaxMetablockSize(¶ms));
2128 size_t inputblock_size =
2129 BROTLI_MIN(size_t, input_size, (size_t)1 << params.lgblock);
2130 size_t cmdbuf_size = metablock_size * 2 + inputblock_size * 6;
2131 size_t outbuf_size = metablock_size * 2 + 503;
2132 size_t histogram_size = 0;
2133 HasherSize(¶ms, BROTLI_TRUE, input_size, hash_size);
2134 if (params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
2135 cmdbuf_size = BROTLI_MIN(size_t, cmdbuf_size,
2136 MAX_NUM_DELAYED_SYMBOLS * sizeof(Command) + inputblock_size * 12);
2137 }
2138 if (params.quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
2139 /* Only a very rough estimation, based on enwik8. */
2140 histogram_size = 200 << 20;
2141 } else if (params.quality >= MIN_QUALITY_FOR_BLOCK_SPLIT) {
2142 size_t literal_histograms =
2143 BROTLI_MIN(size_t, metablock_size / 6144, 256);
2144 size_t command_histograms =
2145 BROTLI_MIN(size_t, metablock_size / 6144, 256);
2146 size_t distance_histograms =
2147 BROTLI_MIN(size_t, metablock_size / 6144, 256);
2148 histogram_size = literal_histograms * sizeof(HistogramLiteral) +
2149 command_histograms * sizeof(HistogramCommand) +
2150 distance_histograms * sizeof(HistogramDistance);
2151 }
2152 return (ringbuffer_size +
2153 hash_size[0] + hash_size[1] + hash_size[2] + hash_size[3] +
2154 cmdbuf_size +
2155 outbuf_size +
2156 histogram_size);
2157 }
2158 }
BrotliEncoderGetPreparedDictionarySize(const BrotliEncoderPreparedDictionary * prepared_dictionary)2159 size_t BrotliEncoderGetPreparedDictionarySize(
2160 const BrotliEncoderPreparedDictionary* prepared_dictionary) {
2161 /* First field of dictionary structs */
2162 const BrotliEncoderPreparedDictionary* prepared = prepared_dictionary;
2163 uint32_t magic = *((const uint32_t*)prepared);
2164 size_t overhead = 0;
2165 if (magic == kManagedDictionaryMagic) {
2166 const ManagedDictionary* managed = (const ManagedDictionary*)prepared;
2167 overhead = sizeof(ManagedDictionary);
2168 magic = *managed->dictionary;
2169 prepared = (const BrotliEncoderPreparedDictionary*)managed->dictionary;
2170 }
2171
2172 if (magic == kPreparedDictionaryMagic) {
2173 const PreparedDictionary* dictionary =
2174 (const PreparedDictionary*)prepared;
2175 /* Keep in sync with step 3 of CreatePreparedDictionary */
2176 return sizeof(PreparedDictionary) + dictionary->source_size +
2177 (sizeof(uint32_t) << dictionary->slot_bits) +
2178 (sizeof(uint16_t) << dictionary->bucket_bits) +
2179 (sizeof(uint32_t) * dictionary->source_offset) + overhead;
2180 } else if (magic == kSharedDictionaryMagic) {
2181 const SharedEncoderDictionary* dictionary =
2182 (const SharedEncoderDictionary*)prepared;
2183 const CompoundDictionary* compound = &dictionary->compound;
2184 const ContextualEncoderDictionary* contextual = &dictionary->contextual;
2185 size_t result = sizeof(*dictionary);
2186 size_t i;
2187 size_t num_instances;
2188 const BrotliEncoderDictionary* instances;
2189 for (i = 0; i < compound->num_prepared_instances_; i++) {
2190 size_t size = BrotliEncoderGetPreparedDictionarySize(
2191 (const BrotliEncoderPreparedDictionary*)
2192 compound->prepared_instances_[i]);
2193 if (!size) return 0; /* error */
2194 result += size;
2195 }
2196 if (contextual->context_based) {
2197 num_instances = contextual->num_instances_;
2198 instances = contextual->instances_;
2199 result += sizeof(*instances) * num_instances;
2200 } else {
2201 num_instances = 1;
2202 instances = &contextual->instance_;
2203 }
2204 for (i = 0; i < num_instances; i++) {
2205 const BrotliEncoderDictionary* dict = &instances[i];
2206 result += dict->trie.pool_capacity * sizeof(BrotliTrieNode);
2207 if (dict->hash_table_data_words_) {
2208 result += sizeof(kStaticDictionaryHashWords);
2209 }
2210 if (dict->hash_table_data_lengths_) {
2211 result += sizeof(kStaticDictionaryHashLengths);
2212 }
2213 if (dict->buckets_data_) {
2214 result += sizeof(*dict->buckets_data_) * dict->buckets_alloc_size_;
2215 }
2216 if (dict->dict_words_data_) {
2217 result += sizeof(*dict->dict_words) * dict->dict_words_alloc_size_;
2218 }
2219 if (dict->words_instance_) {
2220 result += sizeof(*dict->words_instance_);
2221 /* data_size not added here: it is never allocated by the
2222 SharedEncoderDictionary, instead it always points to the file
2223 already loaded in memory. So if the caller wants to include
2224 this memory as well, add the size of the loaded dictionary
2225 file to this. */
2226 }
2227 }
2228 return result + overhead;
2229 }
2230 return 0; /* error */
2231 }
2232
2233 #if defined(BROTLI_TEST)
2234 size_t MakeUncompressedStreamForTest(const uint8_t*, size_t, uint8_t*);
MakeUncompressedStreamForTest(const uint8_t * input,size_t input_size,uint8_t * output)2235 size_t MakeUncompressedStreamForTest(
2236 const uint8_t* input, size_t input_size, uint8_t* output) {
2237 return MakeUncompressedStream(input, input_size, output);
2238 }
2239 #endif
2240
2241 #if defined(__cplusplus) || defined(c_plusplus)
2242 } /* extern "C" */
2243 #endif
2244