xref: /aosp_15_r20/external/cronet/third_party/brotli/enc/encode.c (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 /* Copyright 2013 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 /* Implementation of Brotli compressor. */
8 
9 #include <brotli/encode.h>
10 
11 #include <stdlib.h>  /* free, malloc */
12 #include <string.h>  /* memcpy, memset */
13 
14 #include "../common/constants.h"
15 #include "../common/context.h"
16 #include "../common/platform.h"
17 #include "../common/version.h"
18 #include "backward_references.h"
19 #include "backward_references_hq.h"
20 #include "bit_cost.h"
21 #include "brotli_bit_stream.h"
22 #include "compress_fragment.h"
23 #include "compress_fragment_two_pass.h"
24 #include "dictionary_hash.h"
25 #include "encoder_dict.h"
26 #include "entropy_encode.h"
27 #include "fast_log.h"
28 #include "hash.h"
29 #include "histogram.h"
30 #include "memory.h"
31 #include "metablock.h"
32 #include "prefix.h"
33 #include "quality.h"
34 #include "ringbuffer.h"
35 #include "utf8_util.h"
36 #include "write_bits.h"
37 
38 #if defined(__cplusplus) || defined(c_plusplus)
39 extern "C" {
40 #endif
41 
42 #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
43 
44 typedef enum BrotliEncoderStreamState {
45   /* Default state. */
46   BROTLI_STREAM_PROCESSING = 0,
47   /* Intermediate state; after next block is emitted, byte-padding should be
48      performed before getting back to default state. */
49   BROTLI_STREAM_FLUSH_REQUESTED = 1,
50   /* Last metablock was produced; no more input is acceptable. */
51   BROTLI_STREAM_FINISHED = 2,
52   /* Flushing compressed block and writing meta-data block header. */
53   BROTLI_STREAM_METADATA_HEAD = 3,
54   /* Writing metadata block body. */
55   BROTLI_STREAM_METADATA_BODY = 4
56 } BrotliEncoderStreamState;
57 
58 typedef enum BrotliEncoderFlintState {
59   BROTLI_FLINT_NEEDS_2_BYTES = 2,
60   BROTLI_FLINT_NEEDS_1_BYTE = 1,
61   BROTLI_FLINT_WAITING_FOR_PROCESSING = 0,
62   BROTLI_FLINT_WAITING_FOR_FLUSHING = -1,
63   BROTLI_FLINT_DONE = -2
64 } BrotliEncoderFlintState;
65 
66 typedef struct BrotliEncoderStateStruct {
67   BrotliEncoderParams params;
68 
69   MemoryManager memory_manager_;
70 
71   uint64_t input_pos_;
72   RingBuffer ringbuffer_;
73   size_t cmd_alloc_size_;
74   Command* commands_;
75   size_t num_commands_;
76   size_t num_literals_;
77   size_t last_insert_len_;
78   uint64_t last_flush_pos_;
79   uint64_t last_processed_pos_;
80   int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
81   int saved_dist_cache_[4];
82   uint16_t last_bytes_;
83   uint8_t last_bytes_bits_;
84   /* "Flint" is a tiny uncompressed block emitted before the continuation
85      block to unwire literal context from previous data. Despite being int8_t,
86      field is actually BrotliEncoderFlintState enum. */
87   int8_t flint_;
88   uint8_t prev_byte_;
89   uint8_t prev_byte2_;
90   size_t storage_size_;
91   uint8_t* storage_;
92 
93   Hasher hasher_;
94 
95   /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
96   int small_table_[1 << 10];  /* 4KiB */
97   int* large_table_;          /* Allocated only when needed */
98   size_t large_table_size_;
99 
100   BrotliOnePassArena* one_pass_arena_;
101   BrotliTwoPassArena* two_pass_arena_;
102 
103   /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */
104   uint32_t* command_buf_;
105   uint8_t* literal_buf_;
106 
107   uint8_t* next_out_;
108   size_t available_out_;
109   size_t total_out_;
110   /* Temporary buffer for padding flush bits or metadata block header / body. */
111   union {
112     uint64_t u64[2];
113     uint8_t u8[16];
114   } tiny_buf_;
115   uint32_t remaining_metadata_bytes_;
116   BrotliEncoderStreamState stream_state_;
117 
118   BROTLI_BOOL is_last_block_emitted_;
119   BROTLI_BOOL is_initialized_;
120 } BrotliEncoderStateStruct;
121 
InputBlockSize(BrotliEncoderState * s)122 static size_t InputBlockSize(BrotliEncoderState* s) {
123   return (size_t)1 << s->params.lgblock;
124 }
125 
UnprocessedInputSize(BrotliEncoderState * s)126 static uint64_t UnprocessedInputSize(BrotliEncoderState* s) {
127   return s->input_pos_ - s->last_processed_pos_;
128 }
129 
RemainingInputBlockSize(BrotliEncoderState * s)130 static size_t RemainingInputBlockSize(BrotliEncoderState* s) {
131   const uint64_t delta = UnprocessedInputSize(s);
132   size_t block_size = InputBlockSize(s);
133   if (delta >= block_size) return 0;
134   return block_size - (size_t)delta;
135 }
136 
BrotliEncoderSetParameter(BrotliEncoderState * state,BrotliEncoderParameter p,uint32_t value)137 BROTLI_BOOL BrotliEncoderSetParameter(
138     BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) {
139   /* Changing parameters on the fly is not implemented yet. */
140   if (state->is_initialized_) return BROTLI_FALSE;
141   /* TODO(eustas): Validate/clamp parameters here. */
142   switch (p) {
143     case BROTLI_PARAM_MODE:
144       state->params.mode = (BrotliEncoderMode)value;
145       return BROTLI_TRUE;
146 
147     case BROTLI_PARAM_QUALITY:
148       state->params.quality = (int)value;
149       return BROTLI_TRUE;
150 
151     case BROTLI_PARAM_LGWIN:
152       state->params.lgwin = (int)value;
153       return BROTLI_TRUE;
154 
155     case BROTLI_PARAM_LGBLOCK:
156       state->params.lgblock = (int)value;
157       return BROTLI_TRUE;
158 
159     case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING:
160       if ((value != 0) && (value != 1)) return BROTLI_FALSE;
161       state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value);
162       return BROTLI_TRUE;
163 
164     case BROTLI_PARAM_SIZE_HINT:
165       state->params.size_hint = value;
166       return BROTLI_TRUE;
167 
168     case BROTLI_PARAM_LARGE_WINDOW:
169       state->params.large_window = TO_BROTLI_BOOL(!!value);
170       return BROTLI_TRUE;
171 
172     case BROTLI_PARAM_NPOSTFIX:
173       state->params.dist.distance_postfix_bits = value;
174       return BROTLI_TRUE;
175 
176     case BROTLI_PARAM_NDIRECT:
177       state->params.dist.num_direct_distance_codes = value;
178       return BROTLI_TRUE;
179 
180     case BROTLI_PARAM_STREAM_OFFSET:
181       if (value > (1u << 30)) return BROTLI_FALSE;
182       state->params.stream_offset = value;
183       return BROTLI_TRUE;
184 
185     default: return BROTLI_FALSE;
186   }
187 }
188 
189 /* Wraps 64-bit input position to 32-bit ring-buffer position preserving
190    "not-a-first-lap" feature. */
WrapPosition(uint64_t position)191 static uint32_t WrapPosition(uint64_t position) {
192   uint32_t result = (uint32_t)position;
193   uint64_t gb = position >> 30;
194   if (gb > 2) {
195     /* Wrap every 2GiB; The first 3GB are continuous. */
196     result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30;
197   }
198   return result;
199 }
200 
GetBrotliStorage(BrotliEncoderState * s,size_t size)201 static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {
202   MemoryManager* m = &s->memory_manager_;
203   if (s->storage_size_ < size) {
204     BROTLI_FREE(m, s->storage_);
205     s->storage_ = BROTLI_ALLOC(m, uint8_t, size);
206     if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->storage_)) return NULL;
207     s->storage_size_ = size;
208   }
209   return s->storage_;
210 }
211 
HashTableSize(size_t max_table_size,size_t input_size)212 static size_t HashTableSize(size_t max_table_size, size_t input_size) {
213   size_t htsize = 256;
214   while (htsize < max_table_size && htsize < input_size) {
215     htsize <<= 1;
216   }
217   return htsize;
218 }
219 
GetHashTable(BrotliEncoderState * s,int quality,size_t input_size,size_t * table_size)220 static int* GetHashTable(BrotliEncoderState* s, int quality,
221                          size_t input_size, size_t* table_size) {
222   /* Use smaller hash table when input.size() is smaller, since we
223      fill the table, incurring O(hash table size) overhead for
224      compression, and if the input is short, we won't need that
225      many hash table entries anyway. */
226   MemoryManager* m = &s->memory_manager_;
227   const size_t max_table_size = MaxHashTableSize(quality);
228   size_t htsize = HashTableSize(max_table_size, input_size);
229   int* table;
230   BROTLI_DCHECK(max_table_size >= 256);
231   if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
232     /* Only odd shifts are supported by fast-one-pass. */
233     if ((htsize & 0xAAAAA) == 0) {
234       htsize <<= 1;
235     }
236   }
237 
238   if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) {
239     table = s->small_table_;
240   } else {
241     if (htsize > s->large_table_size_) {
242       s->large_table_size_ = htsize;
243       BROTLI_FREE(m, s->large_table_);
244       s->large_table_ = BROTLI_ALLOC(m, int, htsize);
245       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->large_table_)) return 0;
246     }
247     table = s->large_table_;
248   }
249 
250   *table_size = htsize;
251   memset(table, 0, htsize * sizeof(*table));
252   return table;
253 }
254 
EncodeWindowBits(int lgwin,BROTLI_BOOL large_window,uint16_t * last_bytes,uint8_t * last_bytes_bits)255 static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,
256     uint16_t* last_bytes, uint8_t* last_bytes_bits) {
257   if (large_window) {
258     *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);
259     *last_bytes_bits = 14;
260   } else {
261     if (lgwin == 16) {
262       *last_bytes = 0;
263       *last_bytes_bits = 1;
264     } else if (lgwin == 17) {
265       *last_bytes = 1;
266       *last_bytes_bits = 7;
267     } else if (lgwin > 17) {
268       *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);
269       *last_bytes_bits = 4;
270     } else {
271       *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);
272       *last_bytes_bits = 7;
273     }
274   }
275 }
276 
277 /* TODO(eustas): move to compress_fragment.c? */
278 /* Initializes the command and distance prefix codes for the first block. */
InitCommandPrefixCodes(BrotliOnePassArena * s)279 static void InitCommandPrefixCodes(BrotliOnePassArena* s) {
280   static const uint8_t kDefaultCommandDepths[128] = {
281     0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
282     0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
283     7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,
284     7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
285     5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
286     6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
287     4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,
288     12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
289   };
290   static const uint16_t kDefaultCommandBits[128] = {
291     0,   0,   8,   9,   3,  35,   7,   71,
292     39, 103,  23,  47, 175, 111, 239,   31,
293     0,   0,   0,   4,  12,   2,  10,    6,
294     13,  29,  11,  43,  27,  59,  87,   55,
295     15,  79, 319, 831, 191, 703, 447,  959,
296     0,  14,   1,  25,   5,  21,  19,   51,
297     119, 159,  95, 223, 479, 991,  63,  575,
298     127, 639, 383, 895, 255, 767, 511, 1023,
299     14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
300     27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,
301     2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
302     767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
303   };
304   static const uint8_t kDefaultCommandCode[] = {
305     0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
306     0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
307     0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,
308     0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
309     0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
310   };
311   static const size_t kDefaultCommandCodeNumBits = 448;
312   COPY_ARRAY(s->cmd_depth, kDefaultCommandDepths);
313   COPY_ARRAY(s->cmd_bits, kDefaultCommandBits);
314 
315   /* Initialize the pre-compressed form of the command and distance prefix
316      codes. */
317   COPY_ARRAY(s->cmd_code, kDefaultCommandCode);
318   s->cmd_code_numbits = kDefaultCommandCodeNumBits;
319 }
320 
321 /* Decide about the context map based on the ability of the prediction
322    ability of the previous byte UTF8-prefix on the next byte. The
323    prediction ability is calculated as Shannon entropy. Here we need
324    Shannon entropy instead of 'BitsEntropy' since the prefix will be
325    encoded with the remaining 6 bits of the following byte, and
326    BitsEntropy will assume that symbol to be stored alone using Huffman
327    coding. */
ChooseContextMap(int quality,uint32_t * bigram_histo,size_t * num_literal_contexts,const uint32_t ** literal_context_map)328 static void ChooseContextMap(int quality,
329                              uint32_t* bigram_histo,
330                              size_t* num_literal_contexts,
331                              const uint32_t** literal_context_map) {
332   static const uint32_t kStaticContextMapContinuation[64] = {
333     1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
334     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
335     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
336     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
337   };
338   static const uint32_t kStaticContextMapSimpleUTF8[64] = {
339     0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
340     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
341     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
342     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
343   };
344 
345   uint32_t monogram_histo[3] = { 0 };
346   uint32_t two_prefix_histo[6] = { 0 };
347   size_t total;
348   size_t i;
349   size_t dummy;
350   double entropy[4];
351   for (i = 0; i < 9; ++i) {
352     monogram_histo[i % 3] += bigram_histo[i];
353     two_prefix_histo[i % 6] += bigram_histo[i];
354   }
355   entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy);
356   entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
357                 ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
358   entropy[3] = 0;
359   for (i = 0; i < 3; ++i) {
360     entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy);
361   }
362 
363   total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2];
364   BROTLI_DCHECK(total != 0);
365   entropy[0] = 1.0 / (double)total;
366   entropy[1] *= entropy[0];
367   entropy[2] *= entropy[0];
368   entropy[3] *= entropy[0];
369 
370   if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) {
371     /* 3 context models is a bit slower, don't use it at lower qualities. */
372     entropy[3] = entropy[1] * 10;
373   }
374   /* If expected savings by symbol are less than 0.2 bits, skip the
375      context modeling -- in exchange for faster decoding speed. */
376   if (entropy[1] - entropy[2] < 0.2 &&
377       entropy[1] - entropy[3] < 0.2) {
378     *num_literal_contexts = 1;
379   } else if (entropy[2] - entropy[3] < 0.02) {
380     *num_literal_contexts = 2;
381     *literal_context_map = kStaticContextMapSimpleUTF8;
382   } else {
383     *num_literal_contexts = 3;
384     *literal_context_map = kStaticContextMapContinuation;
385   }
386 }
387 
388 /* Decide if we want to use a more complex static context map containing 13
389    context values, based on the entropy reduction of histograms over the
390    first 5 bits of literals. */
ShouldUseComplexStaticContextMap(const uint8_t * input,size_t start_pos,size_t length,size_t mask,int quality,size_t size_hint,size_t * num_literal_contexts,const uint32_t ** literal_context_map,uint32_t * arena)391 static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
392     size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
393     size_t* num_literal_contexts, const uint32_t** literal_context_map,
394     uint32_t* arena) {
395   static const uint32_t kStaticContextMapComplexUTF8[64] = {
396     11, 11, 12, 12, /* 0 special */
397     0, 0, 0, 0, /* 4 lf */
398     1, 1, 9, 9, /* 8 space */
399     2, 2, 2, 2, /* !, first after space/lf and after something else. */
400     1, 1, 1, 1, /* " */
401     8, 3, 3, 3, /* % */
402     1, 1, 1, 1, /* ({[ */
403     2, 2, 2, 2, /* }]) */
404     8, 4, 4, 4, /* :; */
405     8, 7, 4, 4, /* . */
406     8, 0, 0, 0, /* > */
407     3, 3, 3, 3, /* [0..9] */
408     5, 5, 10, 5, /* [A-Z] */
409     5, 5, 10, 5,
410     6, 6, 6, 6, /* [a-z] */
411     6, 6, 6, 6,
412   };
413   BROTLI_UNUSED(quality);
414   /* Try the more complex static context map only for long data. */
415   if (size_hint < (1 << 20)) {
416     return BROTLI_FALSE;
417   } else {
418     const size_t end_pos = start_pos + length;
419     /* To make entropy calculations faster, we collect histograms
420        over the 5 most significant bits of literals. One histogram
421        without context and 13 additional histograms for each context value. */
422     uint32_t* BROTLI_RESTRICT const combined_histo = arena;
423     uint32_t* BROTLI_RESTRICT const context_histo = arena + 32;
424     uint32_t total = 0;
425     double entropy[3];
426     size_t dummy;
427     size_t i;
428     ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
429     memset(arena, 0, sizeof(arena[0]) * 32 * 14);
430     for (; start_pos + 64 <= end_pos; start_pos += 4096) {
431       const size_t stride_end_pos = start_pos + 64;
432       uint8_t prev2 = input[start_pos & mask];
433       uint8_t prev1 = input[(start_pos + 1) & mask];
434       size_t pos;
435       /* To make the analysis of the data faster we only examine 64 byte long
436          strides at every 4kB intervals. */
437       for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {
438         const uint8_t literal = input[pos & mask];
439         const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[
440             BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
441         ++total;
442         ++combined_histo[literal >> 3];
443         ++context_histo[(context << 5) + (literal >> 3)];
444         prev2 = prev1;
445         prev1 = literal;
446       }
447     }
448     entropy[1] = ShannonEntropy(combined_histo, 32, &dummy);
449     entropy[2] = 0;
450     for (i = 0; i < 13; ++i) {
451       entropy[2] += ShannonEntropy(context_histo + (i << 5), 32, &dummy);
452     }
453     entropy[0] = 1.0 / (double)total;
454     entropy[1] *= entropy[0];
455     entropy[2] *= entropy[0];
456     /* The triggering heuristics below were tuned by compressing the individual
457        files of the silesia corpus. If we skip this kind of context modeling
458        for not very well compressible input (i.e. entropy using context modeling
459        is 60% of maximal entropy) or if expected savings by symbol are less
460        than 0.2 bits, then in every case when it triggers, the final compression
461        ratio is improved. Note however that this heuristics might be too strict
462        for some cases and could be tuned further. */
463     if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) {
464       return BROTLI_FALSE;
465     } else {
466       *num_literal_contexts = 13;
467       *literal_context_map = kStaticContextMapComplexUTF8;
468       return BROTLI_TRUE;
469     }
470   }
471 }
472 
DecideOverLiteralContextModeling(const uint8_t * input,size_t start_pos,size_t length,size_t mask,int quality,size_t size_hint,size_t * num_literal_contexts,const uint32_t ** literal_context_map,uint32_t * arena)473 static void DecideOverLiteralContextModeling(const uint8_t* input,
474     size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
475     size_t* num_literal_contexts, const uint32_t** literal_context_map,
476     uint32_t* arena) {
477   if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) {
478     return;
479   } else if (ShouldUseComplexStaticContextMap(
480       input, start_pos, length, mask, quality, size_hint,
481       num_literal_contexts, literal_context_map, arena)) {
482     /* Context map was already set, nothing else to do. */
483   } else {
484     /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of
485        UTF8 data faster we only examine 64 byte long strides at every 4kB
486        intervals. */
487     const size_t end_pos = start_pos + length;
488     uint32_t* BROTLI_RESTRICT const bigram_prefix_histo = arena;
489     memset(bigram_prefix_histo, 0, sizeof(arena[0]) * 9);
490     for (; start_pos + 64 <= end_pos; start_pos += 4096) {
491       static const int lut[4] = { 0, 0, 1, 2 };
492       const size_t stride_end_pos = start_pos + 64;
493       int prev = lut[input[start_pos & mask] >> 6] * 3;
494       size_t pos;
495       for (pos = start_pos + 1; pos < stride_end_pos; ++pos) {
496         const uint8_t literal = input[pos & mask];
497         ++bigram_prefix_histo[prev + lut[literal >> 6]];
498         prev = lut[literal >> 6] * 3;
499       }
500     }
501     ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
502                      literal_context_map);
503   }
504 }
505 
ShouldCompress(const uint8_t * data,const size_t mask,const uint64_t last_flush_pos,const size_t bytes,const size_t num_literals,const size_t num_commands)506 static BROTLI_BOOL ShouldCompress(
507     const uint8_t* data, const size_t mask, const uint64_t last_flush_pos,
508     const size_t bytes, const size_t num_literals, const size_t num_commands) {
509   /* TODO(eustas): find more precise minimal block overhead. */
510   if (bytes <= 2) return BROTLI_FALSE;
511   if (num_commands < (bytes >> 8) + 2) {
512     if ((double)num_literals > 0.99 * (double)bytes) {
513       uint32_t literal_histo[256] = { 0 };
514       static const uint32_t kSampleRate = 13;
515       static const double kMinEntropy = 7.92;
516       const double bit_cost_threshold =
517           (double)bytes * kMinEntropy / kSampleRate;
518       size_t t = (bytes + kSampleRate - 1) / kSampleRate;
519       uint32_t pos = (uint32_t)last_flush_pos;
520       size_t i;
521       for (i = 0; i < t; i++) {
522         ++literal_histo[data[pos & mask]];
523         pos += kSampleRate;
524       }
525       if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
526         return BROTLI_FALSE;
527       }
528     }
529   }
530   return BROTLI_TRUE;
531 }
532 
533 /* Chooses the literal context mode for a metablock */
ChooseContextMode(const BrotliEncoderParams * params,const uint8_t * data,const size_t pos,const size_t mask,const size_t length)534 static ContextType ChooseContextMode(const BrotliEncoderParams* params,
535     const uint8_t* data, const size_t pos, const size_t mask,
536     const size_t length) {
537   /* We only do the computation for the option of something else than
538      CONTEXT_UTF8 for the highest qualities */
539   if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&
540       !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {
541     return CONTEXT_SIGNED;
542   }
543   return CONTEXT_UTF8;
544 }
545 
WriteMetaBlockInternal(MemoryManager * m,const uint8_t * data,const size_t mask,const uint64_t last_flush_pos,const size_t bytes,const BROTLI_BOOL is_last,ContextType literal_context_mode,const BrotliEncoderParams * params,const uint8_t prev_byte,const uint8_t prev_byte2,const size_t num_literals,const size_t num_commands,Command * commands,const int * saved_dist_cache,int * dist_cache,size_t * storage_ix,uint8_t * storage)546 static void WriteMetaBlockInternal(MemoryManager* m,
547                                    const uint8_t* data,
548                                    const size_t mask,
549                                    const uint64_t last_flush_pos,
550                                    const size_t bytes,
551                                    const BROTLI_BOOL is_last,
552                                    ContextType literal_context_mode,
553                                    const BrotliEncoderParams* params,
554                                    const uint8_t prev_byte,
555                                    const uint8_t prev_byte2,
556                                    const size_t num_literals,
557                                    const size_t num_commands,
558                                    Command* commands,
559                                    const int* saved_dist_cache,
560                                    int* dist_cache,
561                                    size_t* storage_ix,
562                                    uint8_t* storage) {
563   const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);
564   uint16_t last_bytes;
565   uint8_t last_bytes_bits;
566   ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
567   BrotliEncoderParams block_params = *params;
568 
569   if (bytes == 0) {
570     /* Write the ISLAST and ISEMPTY bits. */
571     BrotliWriteBits(2, 3, storage_ix, storage);
572     *storage_ix = (*storage_ix + 7u) & ~7u;
573     return;
574   }
575 
576   if (!ShouldCompress(data, mask, last_flush_pos, bytes,
577                       num_literals, num_commands)) {
578     /* Restore the distance cache, as its last update by
579        CreateBackwardReferences is now unused. */
580     memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
581     BrotliStoreUncompressedMetaBlock(is_last, data,
582                                      wrapped_last_flush_pos, mask, bytes,
583                                      storage_ix, storage);
584     return;
585   }
586 
587   BROTLI_DCHECK(*storage_ix <= 14);
588   last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
589   last_bytes_bits = (uint8_t)(*storage_ix);
590   if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
591     BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
592                              bytes, mask, is_last, params,
593                              commands, num_commands,
594                              storage_ix, storage);
595     if (BROTLI_IS_OOM(m)) return;
596   } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
597     BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,
598                                 bytes, mask, is_last, params,
599                                 commands, num_commands,
600                                 storage_ix, storage);
601     if (BROTLI_IS_OOM(m)) return;
602   } else {
603     MetaBlockSplit mb;
604     InitMetaBlockSplit(&mb);
605     if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
606       size_t num_literal_contexts = 1;
607       const uint32_t* literal_context_map = NULL;
608       if (!params->disable_literal_context_modeling) {
609         /* TODO(eustas): pull to higher level and reuse. */
610         uint32_t* arena = BROTLI_ALLOC(m, uint32_t, 14 * 32);
611         if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return;
612         DecideOverLiteralContextModeling(
613             data, wrapped_last_flush_pos, bytes, mask, params->quality,
614             params->size_hint, &num_literal_contexts,
615             &literal_context_map, arena);
616         BROTLI_FREE(m, arena);
617       }
618       BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
619           prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
620           literal_context_map, commands, num_commands, &mb);
621       if (BROTLI_IS_OOM(m)) return;
622     } else {
623       BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, &block_params,
624                            prev_byte, prev_byte2,
625                            commands, num_commands,
626                            literal_context_mode,
627                            &mb);
628       if (BROTLI_IS_OOM(m)) return;
629     }
630     if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {
631       /* The number of distance symbols effectively used for distance
632          histograms. It might be less than distance alphabet size
633          for "Large Window Brotli" (32-bit). */
634       BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
635     }
636     BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
637                          prev_byte, prev_byte2,
638                          is_last,
639                          &block_params,
640                          literal_context_mode,
641                          commands, num_commands,
642                          &mb,
643                          storage_ix, storage);
644     if (BROTLI_IS_OOM(m)) return;
645     DestroyMetaBlockSplit(m, &mb);
646   }
647   if (bytes + 4 < (*storage_ix >> 3)) {
648     /* Restore the distance cache and last byte. */
649     memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
650     storage[0] = (uint8_t)last_bytes;
651     storage[1] = (uint8_t)(last_bytes >> 8);
652     *storage_ix = last_bytes_bits;
653     BrotliStoreUncompressedMetaBlock(is_last, data,
654                                      wrapped_last_flush_pos, mask,
655                                      bytes, storage_ix, storage);
656   }
657 }
658 
ChooseDistanceParams(BrotliEncoderParams * params)659 static void ChooseDistanceParams(BrotliEncoderParams* params) {
660   uint32_t distance_postfix_bits = 0;
661   uint32_t num_direct_distance_codes = 0;
662 
663   if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) {
664     uint32_t ndirect_msb;
665     if (params->mode == BROTLI_MODE_FONT) {
666       distance_postfix_bits = 1;
667       num_direct_distance_codes = 12;
668     } else {
669       distance_postfix_bits = params->dist.distance_postfix_bits;
670       num_direct_distance_codes = params->dist.num_direct_distance_codes;
671     }
672     ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F;
673     if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX ||
674         num_direct_distance_codes > BROTLI_MAX_NDIRECT ||
675         (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) {
676       distance_postfix_bits = 0;
677       num_direct_distance_codes = 0;
678     }
679   }
680 
681   BrotliInitDistanceParams(&params->dist, distance_postfix_bits,
682                            num_direct_distance_codes, params->large_window);
683 }
684 
EnsureInitialized(BrotliEncoderState * s)685 static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
686   MemoryManager* m = &s->memory_manager_;
687   if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
688   if (s->is_initialized_) return BROTLI_TRUE;
689 
690   s->last_bytes_bits_ = 0;
691   s->last_bytes_ = 0;
692   s->flint_ = BROTLI_FLINT_DONE;
693   s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
694 
695   SanitizeParams(&s->params);
696   s->params.lgblock = ComputeLgBlock(&s->params);
697   ChooseDistanceParams(&s->params);
698 
699   if (s->params.stream_offset != 0) {
700     s->flint_ = BROTLI_FLINT_NEEDS_2_BYTES;
701     /* Poison the distance cache. -16 +- 3 is still less than zero (invalid). */
702     s->dist_cache_[0] = -16;
703     s->dist_cache_[1] = -16;
704     s->dist_cache_[2] = -16;
705     s->dist_cache_[3] = -16;
706     memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
707   }
708 
709   RingBufferSetup(&s->params, &s->ringbuffer_);
710 
711   /* Initialize last byte with stream header. */
712   {
713     int lgwin = s->params.lgwin;
714     if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
715         s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
716       lgwin = BROTLI_MAX(int, lgwin, 18);
717     }
718     if (s->params.stream_offset == 0) {
719       EncodeWindowBits(lgwin, s->params.large_window,
720                        &s->last_bytes_, &s->last_bytes_bits_);
721     } else {
722       /* Bigger values have the same effect, but could cause overflows. */
723       s->params.stream_offset = BROTLI_MIN(size_t,
724           s->params.stream_offset, BROTLI_MAX_BACKWARD_LIMIT(lgwin));
725     }
726   }
727 
728   if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
729     s->one_pass_arena_ = BROTLI_ALLOC(m, BrotliOnePassArena, 1);
730     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
731     InitCommandPrefixCodes(s->one_pass_arena_);
732   } else if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
733     s->two_pass_arena_ = BROTLI_ALLOC(m, BrotliTwoPassArena, 1);
734     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
735   }
736 
737   s->is_initialized_ = BROTLI_TRUE;
738   return BROTLI_TRUE;
739 }
740 
BrotliEncoderInitParams(BrotliEncoderParams * params)741 static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
742   params->mode = BROTLI_DEFAULT_MODE;
743   params->large_window = BROTLI_FALSE;
744   params->quality = BROTLI_DEFAULT_QUALITY;
745   params->lgwin = BROTLI_DEFAULT_WINDOW;
746   params->lgblock = 0;
747   params->stream_offset = 0;
748   params->size_hint = 0;
749   params->disable_literal_context_modeling = BROTLI_FALSE;
750   BrotliInitSharedEncoderDictionary(&params->dictionary);
751   params->dist.distance_postfix_bits = 0;
752   params->dist.num_direct_distance_codes = 0;
753   params->dist.alphabet_size_max =
754       BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
755   params->dist.alphabet_size_limit = params->dist.alphabet_size_max;
756   params->dist.max_distance = BROTLI_MAX_DISTANCE;
757 }
758 
BrotliEncoderCleanupParams(MemoryManager * m,BrotliEncoderParams * params)759 static void BrotliEncoderCleanupParams(MemoryManager* m,
760     BrotliEncoderParams* params) {
761   BrotliCleanupSharedEncoderDictionary(m, &params->dictionary);
762 }
763 
BrotliEncoderInitState(BrotliEncoderState * s)764 static void BrotliEncoderInitState(BrotliEncoderState* s) {
765   BrotliEncoderInitParams(&s->params);
766   s->input_pos_ = 0;
767   s->num_commands_ = 0;
768   s->num_literals_ = 0;
769   s->last_insert_len_ = 0;
770   s->last_flush_pos_ = 0;
771   s->last_processed_pos_ = 0;
772   s->prev_byte_ = 0;
773   s->prev_byte2_ = 0;
774   s->storage_size_ = 0;
775   s->storage_ = 0;
776   HasherInit(&s->hasher_);
777   s->large_table_ = NULL;
778   s->large_table_size_ = 0;
779   s->one_pass_arena_ = NULL;
780   s->two_pass_arena_ = NULL;
781   s->command_buf_ = NULL;
782   s->literal_buf_ = NULL;
783   s->next_out_ = NULL;
784   s->available_out_ = 0;
785   s->total_out_ = 0;
786   s->stream_state_ = BROTLI_STREAM_PROCESSING;
787   s->is_last_block_emitted_ = BROTLI_FALSE;
788   s->is_initialized_ = BROTLI_FALSE;
789 
790   RingBufferInit(&s->ringbuffer_);
791 
792   s->commands_ = 0;
793   s->cmd_alloc_size_ = 0;
794 
795   /* Initialize distance cache. */
796   s->dist_cache_[0] = 4;
797   s->dist_cache_[1] = 11;
798   s->dist_cache_[2] = 15;
799   s->dist_cache_[3] = 16;
800   /* Save the state of the distance cache in case we need to restore it for
801      emitting an uncompressed block. */
802   memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
803 }
804 
BrotliEncoderCreateInstance(brotli_alloc_func alloc_func,brotli_free_func free_func,void * opaque)805 BrotliEncoderState* BrotliEncoderCreateInstance(
806     brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
807   BrotliEncoderState* state = (BrotliEncoderState*)BrotliBootstrapAlloc(
808       sizeof(BrotliEncoderState), alloc_func, free_func, opaque);
809   if (state == NULL) {
810     /* BROTLI_DUMP(); */
811     return 0;
812   }
813   BrotliInitMemoryManager(
814       &state->memory_manager_, alloc_func, free_func, opaque);
815   BrotliEncoderInitState(state);
816   return state;
817 }
818 
BrotliEncoderCleanupState(BrotliEncoderState * s)819 static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
820   MemoryManager* m = &s->memory_manager_;
821   if (BROTLI_IS_OOM(m)) {
822     BrotliWipeOutMemoryManager(m);
823     return;
824   }
825   BROTLI_FREE(m, s->storage_);
826   BROTLI_FREE(m, s->commands_);
827   RingBufferFree(m, &s->ringbuffer_);
828   DestroyHasher(m, &s->hasher_);
829   BROTLI_FREE(m, s->large_table_);
830   BROTLI_FREE(m, s->one_pass_arena_);
831   BROTLI_FREE(m, s->two_pass_arena_);
832   BROTLI_FREE(m, s->command_buf_);
833   BROTLI_FREE(m, s->literal_buf_);
834   BrotliEncoderCleanupParams(m, &s->params);
835 }
836 
837 /* Deinitializes and frees BrotliEncoderState instance. */
BrotliEncoderDestroyInstance(BrotliEncoderState * state)838 void BrotliEncoderDestroyInstance(BrotliEncoderState* state) {
839   if (!state) {
840     return;
841   } else {
842     BrotliEncoderCleanupState(state);
843     BrotliBootstrapFree(state, &state->memory_manager_);
844   }
845 }
846 
847 /*
848    Copies the given input data to the internal ring buffer of the compressor.
849    No processing of the data occurs at this time and this function can be
850    called multiple times before calling WriteBrotliData() to process the
851    accumulated input. At most input_block_size() bytes of input data can be
852    copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
853  */
CopyInputToRingBuffer(BrotliEncoderState * s,const size_t input_size,const uint8_t * input_buffer)854 static void CopyInputToRingBuffer(BrotliEncoderState* s,
855                                   const size_t input_size,
856                                   const uint8_t* input_buffer) {
857   RingBuffer* ringbuffer_ = &s->ringbuffer_;
858   MemoryManager* m = &s->memory_manager_;
859   RingBufferWrite(m, input_buffer, input_size, ringbuffer_);
860   if (BROTLI_IS_OOM(m)) return;
861   s->input_pos_ += input_size;
862 
863   /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
864      hashing not depend on uninitialized data. This makes compression
865      deterministic and it prevents uninitialized memory warnings in Valgrind.
866      Even without erasing, the output would be valid (but nondeterministic).
867 
868      Background information: The compressor stores short (at most 8 bytes)
869      substrings of the input already read in a hash table, and detects
870      repetitions by looking up such substrings in the hash table. If it
871      can find a substring, it checks whether the substring is really there
872      in the ring buffer (or it's just a hash collision). Should the hash
873      table become corrupt, this check makes sure that the output is
874      still valid, albeit the compression ratio would be bad.
875 
876      The compressor populates the hash table from the ring buffer as it's
877      reading new bytes from the input. However, at the last few indexes of
878      the ring buffer, there are not enough bytes to build full-length
879      substrings from. Since the hash table always contains full-length
880      substrings, we erase with dummy zeros here to make sure that those
881      substrings will contain zeros at the end instead of uninitialized
882      data.
883 
884      Please note that erasing is not necessary (because the
885      memory region is already initialized since he ring buffer
886      has a `tail' that holds a copy of the beginning,) so we
887      skip erasing if we have already gone around at least once in
888      the ring buffer.
889 
890      Only clear during the first round of ring-buffer writes. On
891      subsequent rounds data in the ring-buffer would be affected. */
892   if (ringbuffer_->pos_ <= ringbuffer_->mask_) {
893     /* This is the first time when the ring buffer is being written.
894        We clear 7 bytes just after the bytes that have been copied from
895        the input buffer.
896 
897        The ring-buffer has a "tail" that holds a copy of the beginning,
898        but only once the ring buffer has been fully written once, i.e.,
899        pos <= mask. For the first time, we need to write values
900        in this tail (where index may be larger than mask), so that
901        we have exactly defined behavior and don't read uninitialized
902        memory. Due to performance reasons, hashing reads data using a
903        LOAD64, which can go 7 bytes beyond the bytes written in the
904        ring-buffer. */
905     memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7);
906   }
907 }
908 
909 /* Marks all input as processed.
910    Returns true if position wrapping occurs. */
UpdateLastProcessedPos(BrotliEncoderState * s)911 static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {
912   uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
913   uint32_t wrapped_input_pos = WrapPosition(s->input_pos_);
914   s->last_processed_pos_ = s->input_pos_;
915   return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);
916 }
917 
ExtendLastCommand(BrotliEncoderState * s,uint32_t * bytes,uint32_t * wrapped_last_processed_pos)918 static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
919                               uint32_t* wrapped_last_processed_pos) {
920   Command* last_command = &s->commands_[s->num_commands_ - 1];
921   const uint8_t* data = s->ringbuffer_.buffer_;
922   const uint32_t mask = s->ringbuffer_.mask_;
923   uint64_t max_backward_distance =
924       (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP;
925   uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;
926   uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;
927   uint64_t max_distance = last_processed_pos < max_backward_distance ?
928       last_processed_pos : max_backward_distance;
929   uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
930   uint32_t distance_code = CommandRestoreDistanceCode(last_command,
931                                                       &s->params.dist);
932   const CompoundDictionary* dict = &s->params.dictionary.compound;
933   size_t compound_dictionary_size = dict->total_size;
934   if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
935       distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
936     if (cmd_dist <= max_distance) {
937       while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
938              data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {
939         last_command->copy_len_++;
940         (*bytes)--;
941         (*wrapped_last_processed_pos)++;
942       }
943     } else {
944       if ((cmd_dist - max_distance - 1) < compound_dictionary_size &&
945           last_copy_len < cmd_dist - max_distance) {
946         size_t address =
947             compound_dictionary_size - (size_t)(cmd_dist - max_distance) +
948             (size_t)last_copy_len;
949         size_t br_index = 0;
950         size_t br_offset;
951         const uint8_t* chunk;
952         size_t chunk_length;
953         while (address >= dict->chunk_offsets[br_index + 1]) br_index++;
954         br_offset = address - dict->chunk_offsets[br_index];
955         chunk = dict->chunk_source[br_index];
956         chunk_length =
957             dict->chunk_offsets[br_index + 1] - dict->chunk_offsets[br_index];
958         while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
959                chunk[br_offset]) {
960           last_command->copy_len_++;
961           (*bytes)--;
962           (*wrapped_last_processed_pos)++;
963           if (++br_offset == chunk_length) {
964             br_index++;
965             br_offset = 0;
966             if (br_index != dict->num_chunks) {
967               chunk = dict->chunk_source[br_index];
968               chunk_length = dict->chunk_offsets[br_index + 1] -
969                   dict->chunk_offsets[br_index];
970             } else {
971               break;
972             }
973           }
974         }
975       }
976     }
977     /* The copy length is at most the metablock size, and thus expressible. */
978     GetLengthCode(last_command->insert_len_,
979                   (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +
980                            (int)(last_command->copy_len_ >> 25)),
981                   TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),
982                   &last_command->cmd_prefix_);
983   }
984 }
985 
986 /*
987    Processes the accumulated input data and sets |*out_size| to the length of
988    the new output meta-block, or to zero if no new output meta-block has been
989    created (in this case the processed input data is buffered internally).
990    If |*out_size| is positive, |*output| points to the start of the output
991    data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is
992    always created. However, until |is_last| is BROTLI_TRUE encoder may retain up
993    to 7 bits of the last byte of output. To force encoder to dump the remaining
994    bits use WriteMetadata() to append an empty meta-data block.
995    Returns BROTLI_FALSE if the size of the input data is larger than
996    input_block_size().
997  */
EncodeData(BrotliEncoderState * s,const BROTLI_BOOL is_last,const BROTLI_BOOL force_flush,size_t * out_size,uint8_t ** output)998 static BROTLI_BOOL EncodeData(
999     BrotliEncoderState* s, const BROTLI_BOOL is_last,
1000     const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {
1001   const uint64_t delta = UnprocessedInputSize(s);
1002   uint32_t bytes = (uint32_t)delta;
1003   uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
1004   uint8_t* data;
1005   uint32_t mask;
1006   MemoryManager* m = &s->memory_manager_;
1007   ContextType literal_context_mode;
1008   ContextLut literal_context_lut;
1009 
1010   data = s->ringbuffer_.buffer_;
1011   mask = s->ringbuffer_.mask_;
1012 
1013   if (s->params.quality > s->params.dictionary.max_quality) return BROTLI_FALSE;
1014   /* Adding more blocks after "last" block is forbidden. */
1015   if (s->is_last_block_emitted_) return BROTLI_FALSE;
1016   if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;
1017 
1018   if (delta > InputBlockSize(s)) {
1019     return BROTLI_FALSE;
1020   }
1021   if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY &&
1022       !s->command_buf_) {
1023     s->command_buf_ =
1024         BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
1025     s->literal_buf_ =
1026         BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
1027     if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
1028         BROTLI_IS_NULL(s->literal_buf_)) {
1029       return BROTLI_FALSE;
1030     }
1031   }
1032 
1033   if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
1034       s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1035     uint8_t* storage;
1036     size_t storage_ix = s->last_bytes_bits_;
1037     size_t table_size;
1038     int* table;
1039 
1040     if (delta == 0 && !is_last) {
1041       /* We have no new input data and we don't have to finish the stream, so
1042          nothing to do. */
1043       *out_size = 0;
1044       return BROTLI_TRUE;
1045     }
1046     storage = GetBrotliStorage(s, 2 * bytes + 503);
1047     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1048     storage[0] = (uint8_t)s->last_bytes_;
1049     storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1050     table = GetHashTable(s, s->params.quality, bytes, &table_size);
1051     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1052     if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1053       BrotliCompressFragmentFast(
1054           s->one_pass_arena_, &data[wrapped_last_processed_pos & mask],
1055           bytes, is_last,
1056           table, table_size,
1057           &storage_ix, storage);
1058       if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1059     } else {
1060       BrotliCompressFragmentTwoPass(
1061           s->two_pass_arena_, &data[wrapped_last_processed_pos & mask],
1062           bytes, is_last,
1063           s->command_buf_, s->literal_buf_,
1064           table, table_size,
1065           &storage_ix, storage);
1066       if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1067     }
1068     s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1069     s->last_bytes_bits_ = storage_ix & 7u;
1070     UpdateLastProcessedPos(s);
1071     *output = &storage[0];
1072     *out_size = storage_ix >> 3;
1073     return BROTLI_TRUE;
1074   }
1075 
1076   {
1077     /* Theoretical max number of commands is 1 per 2 bytes. */
1078     size_t newsize = s->num_commands_ + bytes / 2 + 1;
1079     if (newsize > s->cmd_alloc_size_) {
1080       Command* new_commands;
1081       /* Reserve a bit more memory to allow merging with a next block
1082          without reallocation: that would impact speed. */
1083       newsize += (bytes / 4) + 16;
1084       s->cmd_alloc_size_ = newsize;
1085       new_commands = BROTLI_ALLOC(m, Command, newsize);
1086       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) return BROTLI_FALSE;
1087       if (s->commands_) {
1088         memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);
1089         BROTLI_FREE(m, s->commands_);
1090       }
1091       s->commands_ = new_commands;
1092     }
1093   }
1094 
1095   InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
1096       wrapped_last_processed_pos, bytes, is_last);
1097 
1098   literal_context_mode = ChooseContextMode(
1099       &s->params, data, WrapPosition(s->last_flush_pos_),
1100       mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
1101   literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
1102 
1103   if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1104 
1105   if (s->num_commands_ && s->last_insert_len_ == 0) {
1106     ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);
1107   }
1108 
1109   if (s->params.quality == ZOPFLIFICATION_QUALITY) {
1110     BROTLI_DCHECK(s->params.hasher.type == 10);
1111     BrotliCreateZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1112         data, mask, literal_context_lut, &s->params,
1113         &s->hasher_, s->dist_cache_,
1114         &s->last_insert_len_, &s->commands_[s->num_commands_],
1115         &s->num_commands_, &s->num_literals_);
1116     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1117   } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
1118     BROTLI_DCHECK(s->params.hasher.type == 10);
1119     BrotliCreateHqZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1120         data, mask, literal_context_lut, &s->params,
1121         &s->hasher_, s->dist_cache_,
1122         &s->last_insert_len_, &s->commands_[s->num_commands_],
1123         &s->num_commands_, &s->num_literals_);
1124     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1125   } else {
1126     BrotliCreateBackwardReferences(bytes, wrapped_last_processed_pos,
1127         data, mask, literal_context_lut, &s->params,
1128         &s->hasher_, s->dist_cache_,
1129         &s->last_insert_len_, &s->commands_[s->num_commands_],
1130         &s->num_commands_, &s->num_literals_);
1131   }
1132 
1133   {
1134     const size_t max_length = MaxMetablockSize(&s->params);
1135     const size_t max_literals = max_length / 8;
1136     const size_t max_commands = max_length / 8;
1137     const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_);
1138     /* If maximal possible additional block doesn't fit metablock, flush now. */
1139     /* TODO(eustas): Postpone decision until next block arrives? */
1140     const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL(
1141         processed_bytes + InputBlockSize(s) <= max_length);
1142     /* If block splitting is not used, then flush as soon as there is some
1143        amount of commands / literals produced. */
1144     const BROTLI_BOOL should_flush = TO_BROTLI_BOOL(
1145         s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT &&
1146         s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS);
1147     if (!is_last && !force_flush && !should_flush &&
1148         next_input_fits_metablock &&
1149         s->num_literals_ < max_literals &&
1150         s->num_commands_ < max_commands) {
1151       /* Merge with next input block. Everything will happen later. */
1152       if (UpdateLastProcessedPos(s)) {
1153         HasherReset(&s->hasher_);
1154       }
1155       *out_size = 0;
1156       return BROTLI_TRUE;
1157     }
1158   }
1159 
1160   /* Create the last insert-only command. */
1161   if (s->last_insert_len_ > 0) {
1162     InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_);
1163     s->num_literals_ += s->last_insert_len_;
1164     s->last_insert_len_ = 0;
1165   }
1166 
1167   if (!is_last && s->input_pos_ == s->last_flush_pos_) {
1168     /* We have no new input data and we don't have to finish the stream, so
1169        nothing to do. */
1170     *out_size = 0;
1171     return BROTLI_TRUE;
1172   }
1173   BROTLI_DCHECK(s->input_pos_ >= s->last_flush_pos_);
1174   BROTLI_DCHECK(s->input_pos_ > s->last_flush_pos_ || is_last);
1175   BROTLI_DCHECK(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);
1176   {
1177     const uint32_t metablock_size =
1178         (uint32_t)(s->input_pos_ - s->last_flush_pos_);
1179     uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);
1180     size_t storage_ix = s->last_bytes_bits_;
1181     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1182     storage[0] = (uint8_t)s->last_bytes_;
1183     storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1184     WriteMetaBlockInternal(
1185         m, data, mask, s->last_flush_pos_, metablock_size, is_last,
1186         literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,
1187         s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
1188         s->dist_cache_, &storage_ix, storage);
1189     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1190     s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1191     s->last_bytes_bits_ = storage_ix & 7u;
1192     s->last_flush_pos_ = s->input_pos_;
1193     if (UpdateLastProcessedPos(s)) {
1194       HasherReset(&s->hasher_);
1195     }
1196     if (s->last_flush_pos_ > 0) {
1197       s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
1198     }
1199     if (s->last_flush_pos_ > 1) {
1200       s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask];
1201     }
1202     s->num_commands_ = 0;
1203     s->num_literals_ = 0;
1204     /* Save the state of the distance cache in case we need to restore it for
1205        emitting an uncompressed block. */
1206     memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
1207     *output = &storage[0];
1208     *out_size = storage_ix >> 3;
1209     return BROTLI_TRUE;
1210   }
1211 }
1212 
1213 /* Dumps remaining output bits and metadata header to |header|.
1214    Returns number of produced bytes.
1215    REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long.
1216    REQUIRED: |block_size| <= (1 << 24). */
WriteMetadataHeader(BrotliEncoderState * s,const size_t block_size,uint8_t * header)1217 static size_t WriteMetadataHeader(
1218     BrotliEncoderState* s, const size_t block_size, uint8_t* header) {
1219   size_t storage_ix;
1220   storage_ix = s->last_bytes_bits_;
1221   header[0] = (uint8_t)s->last_bytes_;
1222   header[1] = (uint8_t)(s->last_bytes_ >> 8);
1223   s->last_bytes_ = 0;
1224   s->last_bytes_bits_ = 0;
1225 
1226   BrotliWriteBits(1, 0, &storage_ix, header);
1227   BrotliWriteBits(2, 3, &storage_ix, header);
1228   BrotliWriteBits(1, 0, &storage_ix, header);
1229   if (block_size == 0) {
1230     BrotliWriteBits(2, 0, &storage_ix, header);
1231   } else {
1232     uint32_t nbits = (block_size == 1) ? 0 :
1233         (Log2FloorNonZero((uint32_t)block_size - 1) + 1);
1234     uint32_t nbytes = (nbits + 7) / 8;
1235     BrotliWriteBits(2, nbytes, &storage_ix, header);
1236     BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header);
1237   }
1238   return (storage_ix + 7u) >> 3;
1239 }
1240 
BrotliCompressBufferQuality10(int lgwin,size_t input_size,const uint8_t * input_buffer,size_t * encoded_size,uint8_t * encoded_buffer)1241 static BROTLI_NOINLINE BROTLI_BOOL BrotliCompressBufferQuality10(
1242     int lgwin, size_t input_size, const uint8_t* input_buffer,
1243     size_t* encoded_size, uint8_t* encoded_buffer) {
1244   MemoryManager* m =
1245       (MemoryManager*)BrotliBootstrapAlloc(sizeof(MemoryManager), 0, 0, 0);
1246 
1247   const size_t mask = BROTLI_SIZE_MAX >> 1;
1248   int dist_cache[4] = { 4, 11, 15, 16 };
1249   int saved_dist_cache[4] = { 4, 11, 15, 16 };
1250   BROTLI_BOOL ok = BROTLI_TRUE;
1251   const size_t max_out_size = *encoded_size;
1252   size_t total_out_size = 0;
1253   uint16_t last_bytes;
1254   uint8_t last_bytes_bits;
1255 
1256   const size_t hasher_eff_size = BROTLI_MIN(size_t,
1257       input_size, BROTLI_MAX_BACKWARD_LIMIT(lgwin) + BROTLI_WINDOW_GAP);
1258 
1259   const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
1260   size_t max_block_size;
1261   const size_t max_metablock_size = (size_t)1 << lgmetablock;
1262   const size_t max_literals_per_metablock = max_metablock_size / 8;
1263   const size_t max_commands_per_metablock = max_metablock_size / 8;
1264   size_t metablock_start = 0;
1265   uint8_t prev_byte = 0;
1266   uint8_t prev_byte2 = 0;
1267 
1268   BrotliEncoderParams* params = NULL;
1269   Hasher* hasher = NULL;
1270 
1271   if (m == NULL) return BROTLI_FALSE;
1272   BrotliInitMemoryManager(m, 0, 0, 0);
1273   params = BROTLI_ALLOC(m, BrotliEncoderParams, 2);
1274   hasher = BROTLI_ALLOC(m, Hasher, 1);
1275   if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(params) || BROTLI_IS_NULL(hasher)) {
1276     goto oom;
1277   }
1278   BrotliEncoderInitParams(params);
1279   HasherInit(hasher);
1280 
1281   params->quality = 10;
1282   params->lgwin = lgwin;
1283   if (lgwin > BROTLI_MAX_WINDOW_BITS) {
1284     params->large_window = BROTLI_TRUE;
1285   }
1286   SanitizeParams(params);
1287   params->lgblock = ComputeLgBlock(params);
1288   ChooseDistanceParams(params);
1289   max_block_size = (size_t)1 << params->lgblock;
1290 
1291   /* Since default static dictionary is used we assume that
1292    * params->quality < params->dictionary.max_quality. */
1293 
1294   BROTLI_DCHECK(input_size <= mask + 1);
1295   EncodeWindowBits(lgwin, params->large_window, &last_bytes, &last_bytes_bits);
1296   InitOrStitchToPreviousBlock(m, hasher, input_buffer, mask, params,
1297       0, hasher_eff_size, BROTLI_TRUE);
1298   if (BROTLI_IS_OOM(m)) goto oom;
1299 
1300   while (ok && metablock_start < input_size) {
1301     const size_t metablock_end =
1302         BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size);
1303     const size_t expected_num_commands =
1304         (metablock_end - metablock_start) / 12 + 16;
1305     Command* commands = 0;
1306     size_t num_commands = 0;
1307     size_t last_insert_len = 0;
1308     size_t num_literals = 0;
1309     size_t metablock_size = 0;
1310     size_t cmd_alloc_size = 0;
1311     BROTLI_BOOL is_last;
1312     uint8_t* storage;
1313     size_t storage_ix;
1314 
1315     ContextType literal_context_mode = ChooseContextMode(params,
1316         input_buffer, metablock_start, mask, metablock_end - metablock_start);
1317     ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
1318 
1319     size_t block_start;
1320     for (block_start = metablock_start; block_start < metablock_end; ) {
1321       size_t block_size =
1322           BROTLI_MIN(size_t, metablock_end - block_start, max_block_size);
1323       ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1);
1324       size_t path_size;
1325       size_t new_cmd_alloc_size;
1326       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) goto oom;
1327       BrotliInitZopfliNodes(nodes, block_size + 1);
1328       StitchToPreviousBlockH10(&hasher->privat._H10, block_size, block_start,
1329                                input_buffer, mask);
1330       path_size = BrotliZopfliComputeShortestPath(m, block_size, block_start,
1331           input_buffer, mask, literal_context_lut, params, dist_cache, hasher,
1332           nodes);
1333       if (BROTLI_IS_OOM(m)) goto oom;
1334       /* We allocate a command buffer in the first iteration of this loop that
1335          will be likely big enough for the whole metablock, so that for most
1336          inputs we will not have to reallocate in later iterations. We do the
1337          allocation here and not before the loop, because if the input is small,
1338          this will be allocated after the Zopfli cost model is freed, so this
1339          will not increase peak memory usage.
1340          TODO(eustas): If the first allocation is too small, increase command
1341          buffer size exponentially. */
1342       new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands,
1343                                       num_commands + path_size + 1);
1344       if (cmd_alloc_size != new_cmd_alloc_size) {
1345         Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size);
1346         if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) goto oom;
1347         cmd_alloc_size = new_cmd_alloc_size;
1348         if (commands) {
1349           memcpy(new_commands, commands, sizeof(Command) * num_commands);
1350           BROTLI_FREE(m, commands);
1351         }
1352         commands = new_commands;
1353       }
1354       BrotliZopfliCreateCommands(block_size, block_start, &nodes[0], dist_cache,
1355           &last_insert_len, params, &commands[num_commands], &num_literals);
1356       num_commands += path_size;
1357       block_start += block_size;
1358       metablock_size += block_size;
1359       BROTLI_FREE(m, nodes);
1360       if (num_literals > max_literals_per_metablock ||
1361           num_commands > max_commands_per_metablock) {
1362         break;
1363       }
1364     }
1365 
1366     if (last_insert_len > 0) {
1367       InitInsertCommand(&commands[num_commands++], last_insert_len);
1368       num_literals += last_insert_len;
1369     }
1370 
1371     is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size);
1372     storage = NULL;
1373     storage_ix = last_bytes_bits;
1374 
1375     if (metablock_size == 0) {
1376       /* Write the ISLAST and ISEMPTY bits. */
1377       storage = BROTLI_ALLOC(m, uint8_t, 16);
1378       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
1379       storage[0] = (uint8_t)last_bytes;
1380       storage[1] = (uint8_t)(last_bytes >> 8);
1381       BrotliWriteBits(2, 3, &storage_ix, storage);
1382       storage_ix = (storage_ix + 7u) & ~7u;
1383     } else if (!ShouldCompress(input_buffer, mask, metablock_start,
1384                                metablock_size, num_literals, num_commands)) {
1385       /* Restore the distance cache, as its last update by
1386          CreateBackwardReferences is now unused. */
1387       memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
1388       storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16);
1389       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
1390       storage[0] = (uint8_t)last_bytes;
1391       storage[1] = (uint8_t)(last_bytes >> 8);
1392       BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
1393                                        metablock_start, mask, metablock_size,
1394                                        &storage_ix, storage);
1395     } else {
1396       MetaBlockSplit mb;
1397       BrotliEncoderParams* block_params = params + 1;
1398       *block_params = *params;  /* shallow copy */
1399       InitMetaBlockSplit(&mb);
1400       BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask,
1401                            block_params,
1402                            prev_byte, prev_byte2,
1403                            commands, num_commands,
1404                            literal_context_mode,
1405                            &mb);
1406       if (BROTLI_IS_OOM(m)) goto oom;
1407       {
1408         /* The number of distance symbols effectively used for distance
1409            histograms. It might be less than distance alphabet size
1410            for "Large Window Brotli" (32-bit). */
1411         BrotliOptimizeHistograms(block_params->dist.alphabet_size_limit, &mb);
1412       }
1413       storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503);
1414       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
1415       storage[0] = (uint8_t)last_bytes;
1416       storage[1] = (uint8_t)(last_bytes >> 8);
1417       BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,
1418                            mask, prev_byte, prev_byte2,
1419                            is_last,
1420                            block_params,
1421                            literal_context_mode,
1422                            commands, num_commands,
1423                            &mb,
1424                            &storage_ix, storage);
1425       if (BROTLI_IS_OOM(m)) goto oom;
1426       if (metablock_size + 4 < (storage_ix >> 3)) {
1427         /* Restore the distance cache and last byte. */
1428         memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
1429         storage[0] = (uint8_t)last_bytes;
1430         storage[1] = (uint8_t)(last_bytes >> 8);
1431         storage_ix = last_bytes_bits;
1432         BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
1433                                          metablock_start, mask,
1434                                          metablock_size, &storage_ix, storage);
1435       }
1436       DestroyMetaBlockSplit(m, &mb);
1437     }
1438     last_bytes = (uint16_t)(storage[storage_ix >> 3]);
1439     last_bytes_bits = storage_ix & 7u;
1440     metablock_start += metablock_size;
1441     if (metablock_start < input_size) {
1442       prev_byte = input_buffer[metablock_start - 1];
1443       prev_byte2 = input_buffer[metablock_start - 2];
1444     }
1445     /* Save the state of the distance cache in case we need to restore it for
1446        emitting an uncompressed block. */
1447     memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
1448 
1449     {
1450       const size_t out_size = storage_ix >> 3;
1451       total_out_size += out_size;
1452       if (total_out_size <= max_out_size) {
1453         memcpy(encoded_buffer, storage, out_size);
1454         encoded_buffer += out_size;
1455       } else {
1456         ok = BROTLI_FALSE;
1457       }
1458     }
1459     BROTLI_FREE(m, storage);
1460     BROTLI_FREE(m, commands);
1461   }
1462 
1463   *encoded_size = total_out_size;
1464   DestroyHasher(m, hasher);
1465   BROTLI_FREE(m, hasher);
1466   BrotliEncoderCleanupParams(m, params);
1467   BROTLI_FREE(m, params);
1468   BrotliBootstrapFree(m, m);
1469   return ok;
1470 
1471 oom:
1472   BrotliWipeOutMemoryManager(m);
1473   BrotliBootstrapFree(m, m);
1474   return BROTLI_FALSE;
1475 }
1476 
BrotliEncoderMaxCompressedSize(size_t input_size)1477 size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
1478   /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
1479   size_t num_large_blocks = input_size >> 14;
1480   size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;
1481   size_t result = input_size + overhead;
1482   if (input_size == 0) return 2;
1483   return (result < input_size) ? 0 : result;
1484 }
1485 
1486 /* Wraps data to uncompressed brotli stream with minimal window size.
1487    |output| should point at region with at least BrotliEncoderMaxCompressedSize
1488    addressable bytes.
1489    Returns the length of stream. */
MakeUncompressedStream(const uint8_t * input,size_t input_size,uint8_t * output)1490 static size_t MakeUncompressedStream(
1491     const uint8_t* input, size_t input_size, uint8_t* output) {
1492   size_t size = input_size;
1493   size_t result = 0;
1494   size_t offset = 0;
1495   if (input_size == 0) {
1496     output[0] = 6;
1497     return 1;
1498   }
1499   output[result++] = 0x21;  /* window bits = 10, is_last = false */
1500   output[result++] = 0x03;  /* empty metadata, padding */
1501   while (size > 0) {
1502     uint32_t nibbles = 0;
1503     uint32_t chunk_size;
1504     uint32_t bits;
1505     chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size;
1506     if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1;
1507     bits =
1508         (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles));
1509     output[result++] = (uint8_t)bits;
1510     output[result++] = (uint8_t)(bits >> 8);
1511     output[result++] = (uint8_t)(bits >> 16);
1512     if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24);
1513     memcpy(&output[result], &input[offset], chunk_size);
1514     result += chunk_size;
1515     offset += chunk_size;
1516     size -= chunk_size;
1517   }
1518   output[result++] = 3;
1519   return result;
1520 }
1521 
BrotliEncoderCompress(int quality,int lgwin,BrotliEncoderMode mode,size_t input_size,const uint8_t input_buffer[BROTLI_ARRAY_PARAM (input_size)],size_t * encoded_size,uint8_t encoded_buffer[BROTLI_ARRAY_PARAM (* encoded_size)])1522 BROTLI_BOOL BrotliEncoderCompress(
1523     int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
1524     const uint8_t input_buffer[BROTLI_ARRAY_PARAM(input_size)],
1525     size_t* encoded_size,
1526     uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(*encoded_size)]) {
1527   BrotliEncoderState* s;
1528   size_t out_size = *encoded_size;
1529   const uint8_t* input_start = input_buffer;
1530   uint8_t* output_start = encoded_buffer;
1531   size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size);
1532   if (out_size == 0) {
1533     /* Output buffer needs at least one byte. */
1534     return BROTLI_FALSE;
1535   }
1536   if (input_size == 0) {
1537     /* Handle the special case of empty input. */
1538     *encoded_size = 1;
1539     *encoded_buffer = 6;
1540     return BROTLI_TRUE;
1541   }
1542   if (quality == 10) {
1543     /* TODO(eustas): Implement this direct path for all quality levels. */
1544     const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS,
1545                                        BROTLI_MAX(int, 16, lgwin));
1546     int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer,
1547                                            encoded_size, encoded_buffer);
1548     if (!ok || (max_out_size && *encoded_size > max_out_size)) {
1549       goto fallback;
1550     }
1551     return BROTLI_TRUE;
1552   }
1553 
1554   s = BrotliEncoderCreateInstance(0, 0, 0);
1555   if (!s) {
1556     return BROTLI_FALSE;
1557   } else {
1558     size_t available_in = input_size;
1559     const uint8_t* next_in = input_buffer;
1560     size_t available_out = *encoded_size;
1561     uint8_t* next_out = encoded_buffer;
1562     size_t total_out = 0;
1563     BROTLI_BOOL result = BROTLI_FALSE;
1564     BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
1565     BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
1566     BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
1567     BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);
1568     if (lgwin > BROTLI_MAX_WINDOW_BITS) {
1569       BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);
1570     }
1571     result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
1572         &available_in, &next_in, &available_out, &next_out, &total_out);
1573     if (!BrotliEncoderIsFinished(s)) result = 0;
1574     *encoded_size = total_out;
1575     BrotliEncoderDestroyInstance(s);
1576     if (!result || (max_out_size && *encoded_size > max_out_size)) {
1577       goto fallback;
1578     }
1579     return BROTLI_TRUE;
1580   }
1581 fallback:
1582   *encoded_size = 0;
1583   if (!max_out_size) return BROTLI_FALSE;
1584   if (out_size >= max_out_size) {
1585     *encoded_size =
1586         MakeUncompressedStream(input_start, input_size, output_start);
1587     return BROTLI_TRUE;
1588   }
1589   return BROTLI_FALSE;
1590 }
1591 
InjectBytePaddingBlock(BrotliEncoderState * s)1592 static void InjectBytePaddingBlock(BrotliEncoderState* s) {
1593   uint32_t seal = s->last_bytes_;
1594   size_t seal_bits = s->last_bytes_bits_;
1595   uint8_t* destination;
1596   s->last_bytes_ = 0;
1597   s->last_bytes_bits_ = 0;
1598   /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
1599   seal |= 0x6u << seal_bits;
1600   seal_bits += 6;
1601   /* If we have already created storage, then append to it.
1602      Storage is valid until next block is being compressed. */
1603   if (s->next_out_) {
1604     destination = s->next_out_ + s->available_out_;
1605   } else {
1606     destination = s->tiny_buf_.u8;
1607     s->next_out_ = destination;
1608   }
1609   destination[0] = (uint8_t)seal;
1610   if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);
1611   if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);
1612   s->available_out_ += (seal_bits + 7) >> 3;
1613 }
1614 
1615 /* Injects padding bits or pushes compressed data to output.
1616    Returns false if nothing is done. */
InjectFlushOrPushOutput(BrotliEncoderState * s,size_t * available_out,uint8_t ** next_out,size_t * total_out)1617 static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
1618     size_t* available_out, uint8_t** next_out, size_t* total_out) {
1619   if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1620       s->last_bytes_bits_ != 0) {
1621     InjectBytePaddingBlock(s);
1622     return BROTLI_TRUE;
1623   }
1624 
1625   if (s->available_out_ != 0 && *available_out != 0) {
1626     size_t copy_output_size =
1627         BROTLI_MIN(size_t, s->available_out_, *available_out);
1628     memcpy(*next_out, s->next_out_, copy_output_size);
1629     *next_out += copy_output_size;
1630     *available_out -= copy_output_size;
1631     s->next_out_ += copy_output_size;
1632     s->available_out_ -= copy_output_size;
1633     s->total_out_ += copy_output_size;
1634     if (total_out) *total_out = s->total_out_;
1635     return BROTLI_TRUE;
1636   }
1637 
1638   return BROTLI_FALSE;
1639 }
1640 
CheckFlushComplete(BrotliEncoderState * s)1641 static void CheckFlushComplete(BrotliEncoderState* s) {
1642   if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1643       s->available_out_ == 0) {
1644     s->stream_state_ = BROTLI_STREAM_PROCESSING;
1645     s->next_out_ = 0;
1646   }
1647 }
1648 
BrotliEncoderCompressStreamFast(BrotliEncoderState * s,BrotliEncoderOperation op,size_t * available_in,const uint8_t ** next_in,size_t * available_out,uint8_t ** next_out,size_t * total_out)1649 static BROTLI_BOOL BrotliEncoderCompressStreamFast(
1650     BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1651     const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
1652     size_t* total_out) {
1653   const size_t block_size_limit = (size_t)1 << s->params.lgwin;
1654   const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize,
1655       BROTLI_MIN(size_t, *available_in, block_size_limit));
1656   uint32_t* tmp_command_buf = NULL;
1657   uint32_t* command_buf = NULL;
1658   uint8_t* tmp_literal_buf = NULL;
1659   uint8_t* literal_buf = NULL;
1660   MemoryManager* m = &s->memory_manager_;
1661   if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY &&
1662       s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) {
1663     return BROTLI_FALSE;
1664   }
1665   if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1666     if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) {
1667       s->command_buf_ =
1668           BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
1669       s->literal_buf_ =
1670           BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
1671       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
1672           BROTLI_IS_NULL(s->literal_buf_)) {
1673         return BROTLI_FALSE;
1674       }
1675     }
1676     if (s->command_buf_) {
1677       command_buf = s->command_buf_;
1678       literal_buf = s->literal_buf_;
1679     } else {
1680       tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);
1681       tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);
1682       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp_command_buf) ||
1683           BROTLI_IS_NULL(tmp_literal_buf)) {
1684         return BROTLI_FALSE;
1685       }
1686       command_buf = tmp_command_buf;
1687       literal_buf = tmp_literal_buf;
1688     }
1689   }
1690 
1691   while (BROTLI_TRUE) {
1692     if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1693       continue;
1694     }
1695 
1696     /* Compress block only when internal output buffer is empty, stream is not
1697        finished, there is no pending flush request, and there is either
1698        additional input or pending operation. */
1699     if (s->available_out_ == 0 &&
1700         s->stream_state_ == BROTLI_STREAM_PROCESSING &&
1701         (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) {
1702       size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in);
1703       BROTLI_BOOL is_last =
1704           (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
1705       BROTLI_BOOL force_flush =
1706           (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
1707       size_t max_out_size = 2 * block_size + 503;
1708       BROTLI_BOOL inplace = BROTLI_TRUE;
1709       uint8_t* storage = NULL;
1710       size_t storage_ix = s->last_bytes_bits_;
1711       size_t table_size;
1712       int* table;
1713 
1714       if (force_flush && block_size == 0) {
1715         s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1716         continue;
1717       }
1718       if (max_out_size <= *available_out) {
1719         storage = *next_out;
1720       } else {
1721         inplace = BROTLI_FALSE;
1722         storage = GetBrotliStorage(s, max_out_size);
1723         if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1724       }
1725       storage[0] = (uint8_t)s->last_bytes_;
1726       storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1727       table = GetHashTable(s, s->params.quality, block_size, &table_size);
1728       if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1729 
1730       if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1731         BrotliCompressFragmentFast(s->one_pass_arena_, *next_in, block_size,
1732             is_last, table, table_size, &storage_ix, storage);
1733         if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1734       } else {
1735         BrotliCompressFragmentTwoPass(s->two_pass_arena_, *next_in, block_size,
1736             is_last, command_buf, literal_buf, table, table_size,
1737             &storage_ix, storage);
1738         if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1739       }
1740       if (block_size != 0) {
1741         *next_in += block_size;
1742         *available_in -= block_size;
1743       }
1744       if (inplace) {
1745         size_t out_bytes = storage_ix >> 3;
1746         BROTLI_DCHECK(out_bytes <= *available_out);
1747         BROTLI_DCHECK((storage_ix & 7) == 0 || out_bytes < *available_out);
1748         *next_out += out_bytes;
1749         *available_out -= out_bytes;
1750         s->total_out_ += out_bytes;
1751         if (total_out) *total_out = s->total_out_;
1752       } else {
1753         size_t out_bytes = storage_ix >> 3;
1754         s->next_out_ = storage;
1755         s->available_out_ = out_bytes;
1756       }
1757       s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1758       s->last_bytes_bits_ = storage_ix & 7u;
1759 
1760       if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1761       if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1762       continue;
1763     }
1764     break;
1765   }
1766   BROTLI_FREE(m, tmp_command_buf);
1767   BROTLI_FREE(m, tmp_literal_buf);
1768   CheckFlushComplete(s);
1769   return BROTLI_TRUE;
1770 }
1771 
ProcessMetadata(BrotliEncoderState * s,size_t * available_in,const uint8_t ** next_in,size_t * available_out,uint8_t ** next_out,size_t * total_out)1772 static BROTLI_BOOL ProcessMetadata(
1773     BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in,
1774     size_t* available_out, uint8_t** next_out, size_t* total_out) {
1775   if (*available_in > (1u << 24)) return BROTLI_FALSE;
1776   /* Switch to metadata block workflow, if required. */
1777   if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1778     s->remaining_metadata_bytes_ = (uint32_t)*available_in;
1779     s->stream_state_ = BROTLI_STREAM_METADATA_HEAD;
1780   }
1781   if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD &&
1782       s->stream_state_ != BROTLI_STREAM_METADATA_BODY) {
1783     return BROTLI_FALSE;
1784   }
1785 
1786   while (BROTLI_TRUE) {
1787     if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1788       continue;
1789     }
1790     if (s->available_out_ != 0) break;
1791 
1792     if (s->input_pos_ != s->last_flush_pos_) {
1793       BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE,
1794           &s->available_out_, &s->next_out_);
1795       if (!result) return BROTLI_FALSE;
1796       continue;
1797     }
1798 
1799     if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) {
1800       s->next_out_ = s->tiny_buf_.u8;
1801       s->available_out_ =
1802           WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_);
1803       s->stream_state_ = BROTLI_STREAM_METADATA_BODY;
1804       continue;
1805     } else {
1806       /* Exit workflow only when there is no more input and no more output.
1807          Otherwise client may continue producing empty metadata blocks. */
1808       if (s->remaining_metadata_bytes_ == 0) {
1809         s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
1810         s->stream_state_ = BROTLI_STREAM_PROCESSING;
1811         break;
1812       }
1813       if (*available_out) {
1814         /* Directly copy input to output. */
1815         uint32_t copy = (uint32_t)BROTLI_MIN(
1816             size_t, s->remaining_metadata_bytes_, *available_out);
1817         memcpy(*next_out, *next_in, copy);
1818         *next_in += copy;
1819         *available_in -= copy;
1820         s->remaining_metadata_bytes_ -= copy;
1821         *next_out += copy;
1822         *available_out -= copy;
1823       } else {
1824         /* This guarantees progress in "TakeOutput" workflow. */
1825         uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16);
1826         s->next_out_ = s->tiny_buf_.u8;
1827         memcpy(s->next_out_, *next_in, copy);
1828         *next_in += copy;
1829         *available_in -= copy;
1830         s->remaining_metadata_bytes_ -= copy;
1831         s->available_out_ = copy;
1832       }
1833       continue;
1834     }
1835   }
1836 
1837   return BROTLI_TRUE;
1838 }
1839 
UpdateSizeHint(BrotliEncoderState * s,size_t available_in)1840 static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {
1841   if (s->params.size_hint == 0) {
1842     uint64_t delta = UnprocessedInputSize(s);
1843     uint64_t tail = available_in;
1844     uint32_t limit = 1u << 30;
1845     uint32_t total;
1846     if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) {
1847       total = limit;
1848     } else {
1849       total = (uint32_t)(delta + tail);
1850     }
1851     s->params.size_hint = total;
1852   }
1853 }
1854 
BrotliEncoderCompressStream(BrotliEncoderState * s,BrotliEncoderOperation op,size_t * available_in,const uint8_t ** next_in,size_t * available_out,uint8_t ** next_out,size_t * total_out)1855 BROTLI_BOOL BrotliEncoderCompressStream(
1856     BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1857     const uint8_t** next_in, size_t* available_out,uint8_t** next_out,
1858     size_t* total_out) {
1859   if (!EnsureInitialized(s)) return BROTLI_FALSE;
1860 
1861   /* Unfinished metadata block; check requirements. */
1862   if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) {
1863     if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE;
1864     if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE;
1865   }
1866 
1867   if (op == BROTLI_OPERATION_EMIT_METADATA) {
1868     UpdateSizeHint(s, 0);  /* First data metablock might be emitted here. */
1869     return ProcessMetadata(
1870         s, available_in, next_in, available_out, next_out, total_out);
1871   }
1872 
1873   if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD ||
1874       s->stream_state_ == BROTLI_STREAM_METADATA_BODY) {
1875     return BROTLI_FALSE;
1876   }
1877 
1878   if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) {
1879     return BROTLI_FALSE;
1880   }
1881   if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
1882       s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1883     return BrotliEncoderCompressStreamFast(s, op, available_in, next_in,
1884         available_out, next_out, total_out);
1885   }
1886   while (BROTLI_TRUE) {
1887     size_t remaining_block_size = RemainingInputBlockSize(s);
1888     /* Shorten input to flint size. */
1889     if (s->flint_ >= 0 && remaining_block_size > (size_t)s->flint_) {
1890       remaining_block_size = (size_t)s->flint_;
1891     }
1892 
1893     if (remaining_block_size != 0 && *available_in != 0) {
1894       size_t copy_input_size =
1895           BROTLI_MIN(size_t, remaining_block_size, *available_in);
1896       CopyInputToRingBuffer(s, copy_input_size, *next_in);
1897       *next_in += copy_input_size;
1898       *available_in -= copy_input_size;
1899       if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size);
1900       continue;
1901     }
1902 
1903     if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1904       /* Exit the "emit flint" workflow. */
1905       if (s->flint_ == BROTLI_FLINT_WAITING_FOR_FLUSHING) {
1906         CheckFlushComplete(s);
1907         if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1908           s->flint_ = BROTLI_FLINT_DONE;
1909         }
1910       }
1911       continue;
1912     }
1913 
1914     /* Compress data only when internal output buffer is empty, stream is not
1915        finished and there is no pending flush request. */
1916     if (s->available_out_ == 0 &&
1917         s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1918       if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) {
1919         BROTLI_BOOL is_last = TO_BROTLI_BOOL(
1920             (*available_in == 0) && op == BROTLI_OPERATION_FINISH);
1921         BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
1922             (*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
1923         BROTLI_BOOL result;
1924         /* Force emitting (uncompressed) piece containing flint. */
1925         if (!is_last && s->flint_ == 0) {
1926           s->flint_ = BROTLI_FLINT_WAITING_FOR_FLUSHING;
1927           force_flush = BROTLI_TRUE;
1928         }
1929         UpdateSizeHint(s, *available_in);
1930         result = EncodeData(s, is_last, force_flush,
1931             &s->available_out_, &s->next_out_);
1932         if (!result) return BROTLI_FALSE;
1933         if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1934         if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1935         continue;
1936       }
1937     }
1938     break;
1939   }
1940   CheckFlushComplete(s);
1941   return BROTLI_TRUE;
1942 }
1943 
BrotliEncoderIsFinished(BrotliEncoderState * s)1944 BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) {
1945   return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED &&
1946       !BrotliEncoderHasMoreOutput(s));
1947 }
1948 
BrotliEncoderHasMoreOutput(BrotliEncoderState * s)1949 BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) {
1950   return TO_BROTLI_BOOL(s->available_out_ != 0);
1951 }
1952 
BrotliEncoderTakeOutput(BrotliEncoderState * s,size_t * size)1953 const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) {
1954   size_t consumed_size = s->available_out_;
1955   uint8_t* result = s->next_out_;
1956   if (*size) {
1957     consumed_size = BROTLI_MIN(size_t, *size, s->available_out_);
1958   }
1959   if (consumed_size) {
1960     s->next_out_ += consumed_size;
1961     s->available_out_ -= consumed_size;
1962     s->total_out_ += consumed_size;
1963     CheckFlushComplete(s);
1964     *size = consumed_size;
1965   } else {
1966     *size = 0;
1967     result = 0;
1968   }
1969   return result;
1970 }
1971 
BrotliEncoderVersion(void)1972 uint32_t BrotliEncoderVersion(void) {
1973   return BROTLI_VERSION;
1974 }
1975 
BrotliEncoderPrepareDictionary(BrotliSharedDictionaryType type,size_t size,const uint8_t data[BROTLI_ARRAY_PARAM (size)],int quality,brotli_alloc_func alloc_func,brotli_free_func free_func,void * opaque)1976 BrotliEncoderPreparedDictionary* BrotliEncoderPrepareDictionary(
1977     BrotliSharedDictionaryType type, size_t size,
1978     const uint8_t data[BROTLI_ARRAY_PARAM(size)], int quality,
1979     brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
1980   ManagedDictionary* managed_dictionary = NULL;
1981   if (type != BROTLI_SHARED_DICTIONARY_RAW &&
1982       type != BROTLI_SHARED_DICTIONARY_SERIALIZED) {
1983     return NULL;
1984   }
1985   managed_dictionary =
1986       BrotliCreateManagedDictionary(alloc_func, free_func, opaque);
1987   if (managed_dictionary == NULL) {
1988     return NULL;
1989   }
1990   if (type == BROTLI_SHARED_DICTIONARY_RAW) {
1991     managed_dictionary->dictionary = (uint32_t*)CreatePreparedDictionary(
1992         &managed_dictionary->memory_manager_, data, size);
1993   } else {
1994     SharedEncoderDictionary* dict = (SharedEncoderDictionary*)BrotliAllocate(
1995         &managed_dictionary->memory_manager_, sizeof(SharedEncoderDictionary));
1996     managed_dictionary->dictionary = (uint32_t*)dict;
1997     if (dict != NULL) {
1998       BROTLI_BOOL ok = BrotliInitCustomSharedEncoderDictionary(
1999           &managed_dictionary->memory_manager_, data, size, quality, dict);
2000       if (!ok) {
2001         BrotliFree(&managed_dictionary->memory_manager_, dict);
2002         managed_dictionary->dictionary = NULL;
2003       }
2004     }
2005   }
2006   if (managed_dictionary->dictionary == NULL) {
2007     BrotliDestroyManagedDictionary(managed_dictionary);
2008     return NULL;
2009   }
2010   return (BrotliEncoderPreparedDictionary*)managed_dictionary;
2011 }
2012 
BrotliEncoderDestroyPreparedDictionary(BrotliEncoderPreparedDictionary * dictionary)2013 void BrotliEncoderDestroyPreparedDictionary(
2014     BrotliEncoderPreparedDictionary* dictionary) {
2015   ManagedDictionary* dict = (ManagedDictionary*)dictionary;
2016   if (!dictionary) return;
2017   /* First field of dictionary structs. */
2018   /* Only managed dictionaries are eligible for destruction by this method. */
2019   if (dict->magic != kManagedDictionaryMagic) {
2020     return;
2021   }
2022   if (dict->dictionary == NULL) {
2023     /* This should never ever happen. */
2024   } else if (*dict->dictionary == kPreparedDictionaryMagic) {
2025     DestroyPreparedDictionary(
2026         &dict->memory_manager_, (PreparedDictionary*)dict->dictionary);
2027   } else if (*dict->dictionary == kSharedDictionaryMagic) {
2028     BrotliCleanupSharedEncoderDictionary(&dict->memory_manager_,
2029         (SharedEncoderDictionary*)dict->dictionary);
2030     BrotliFree(&dict->memory_manager_, dict->dictionary);
2031   } else {
2032     /* This should never ever happen. */
2033   }
2034   dict->dictionary = NULL;
2035   BrotliDestroyManagedDictionary(dict);
2036 }
2037 
BrotliEncoderAttachPreparedDictionary(BrotliEncoderState * state,const BrotliEncoderPreparedDictionary * dictionary)2038 BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(BrotliEncoderState* state,
2039     const BrotliEncoderPreparedDictionary* dictionary) {
2040   /* First field of dictionary structs */
2041   const BrotliEncoderPreparedDictionary* dict = dictionary;
2042   uint32_t magic = *((const uint32_t*)dict);
2043   SharedEncoderDictionary* current = NULL;
2044   if (magic == kManagedDictionaryMagic) {
2045     /* Unwrap managed dictionary. */
2046     ManagedDictionary* managed_dictionary = (ManagedDictionary*)dict;
2047     magic = *managed_dictionary->dictionary;
2048     dict = (BrotliEncoderPreparedDictionary*)managed_dictionary->dictionary;
2049   }
2050   current = &state->params.dictionary;
2051   if (magic == kPreparedDictionaryMagic) {
2052     const PreparedDictionary* prepared = (const PreparedDictionary*)dict;
2053     if (!AttachPreparedDictionary(&current->compound, prepared)) {
2054       return BROTLI_FALSE;
2055     }
2056   } else if (magic == kSharedDictionaryMagic) {
2057     const SharedEncoderDictionary* attached =
2058         (const SharedEncoderDictionary*)dict;
2059     BROTLI_BOOL was_default = !current->contextual.context_based &&
2060         current->contextual.num_dictionaries == 1 &&
2061         current->contextual.dict[0]->hash_table_words ==
2062         kStaticDictionaryHashWords &&
2063         current->contextual.dict[0]->hash_table_lengths ==
2064         kStaticDictionaryHashLengths;
2065     BROTLI_BOOL new_default = !attached->contextual.context_based &&
2066         attached->contextual.num_dictionaries == 1 &&
2067         attached->contextual.dict[0]->hash_table_words ==
2068         kStaticDictionaryHashWords &&
2069         attached->contextual.dict[0]->hash_table_lengths ==
2070         kStaticDictionaryHashLengths;
2071     size_t i;
2072     if (state->is_initialized_) return BROTLI_FALSE;
2073     current->max_quality =
2074         BROTLI_MIN(int, current->max_quality, attached->max_quality);
2075     for (i = 0; i < attached->compound.num_chunks; i++) {
2076       if (!AttachPreparedDictionary(&current->compound,
2077           attached->compound.chunks[i])) {
2078         return BROTLI_FALSE;
2079       }
2080     }
2081     if (!new_default) {
2082       if (!was_default) return BROTLI_FALSE;
2083       /* Copy by value, but then set num_instances_ to 0 because their memory
2084       is managed by attached, not by current */
2085       current->contextual = attached->contextual;
2086       current->contextual.num_instances_ = 0;
2087     }
2088   } else {
2089     return BROTLI_FALSE;
2090   }
2091   return BROTLI_TRUE;
2092 }
2093 
BrotliEncoderEstimatePeakMemoryUsage(int quality,int lgwin,size_t input_size)2094 size_t BrotliEncoderEstimatePeakMemoryUsage(int quality, int lgwin,
2095                                             size_t input_size) {
2096   BrotliEncoderParams params;
2097   BrotliEncoderInitParams(&params);
2098   params.quality = quality;
2099   params.lgwin = lgwin;
2100   params.size_hint = input_size;
2101   SanitizeParams(&params);
2102   params.lgblock = ComputeLgBlock(&params);
2103   ChooseHasher(&params, &params.hasher);
2104   if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
2105       params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
2106     size_t state_size = sizeof(BrotliEncoderState);
2107     size_t block_size = BROTLI_MIN(size_t, input_size, (1ul << params.lgwin));
2108     size_t hash_table_size =
2109         HashTableSize(MaxHashTableSize(params.quality), block_size);
2110     size_t hash_size =
2111         (hash_table_size < (1u << 10)) ? 0 : sizeof(int) * hash_table_size;
2112     size_t cmdbuf_size = params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY ?
2113         5 * BROTLI_MIN(size_t, block_size, 1ul << 17) : 0;
2114     if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
2115       state_size += sizeof(BrotliOnePassArena);
2116     } else {
2117       state_size += sizeof(BrotliTwoPassArena);
2118     }
2119     return hash_size + cmdbuf_size + state_size;
2120   } else {
2121     size_t short_ringbuffer_size = (size_t)1 << params.lgblock;
2122     int ringbuffer_bits = ComputeRbBits(&params);
2123     size_t ringbuffer_size = input_size < short_ringbuffer_size ?
2124         input_size : (1u << ringbuffer_bits) + short_ringbuffer_size;
2125     size_t hash_size[4] = {0};
2126     size_t metablock_size =
2127         BROTLI_MIN(size_t, input_size, MaxMetablockSize(&params));
2128     size_t inputblock_size =
2129         BROTLI_MIN(size_t, input_size, (size_t)1 << params.lgblock);
2130     size_t cmdbuf_size = metablock_size * 2 + inputblock_size * 6;
2131     size_t outbuf_size = metablock_size * 2 + 503;
2132     size_t histogram_size = 0;
2133     HasherSize(&params, BROTLI_TRUE, input_size, hash_size);
2134     if (params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
2135       cmdbuf_size = BROTLI_MIN(size_t, cmdbuf_size,
2136           MAX_NUM_DELAYED_SYMBOLS * sizeof(Command) + inputblock_size * 12);
2137     }
2138     if (params.quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
2139       /* Only a very rough estimation, based on enwik8. */
2140       histogram_size = 200 << 20;
2141     } else if (params.quality >= MIN_QUALITY_FOR_BLOCK_SPLIT) {
2142       size_t literal_histograms =
2143           BROTLI_MIN(size_t, metablock_size / 6144, 256);
2144       size_t command_histograms =
2145           BROTLI_MIN(size_t, metablock_size / 6144, 256);
2146       size_t distance_histograms =
2147           BROTLI_MIN(size_t, metablock_size / 6144, 256);
2148       histogram_size = literal_histograms * sizeof(HistogramLiteral) +
2149                        command_histograms * sizeof(HistogramCommand) +
2150                        distance_histograms * sizeof(HistogramDistance);
2151     }
2152     return (ringbuffer_size +
2153             hash_size[0] + hash_size[1] + hash_size[2] + hash_size[3] +
2154             cmdbuf_size +
2155             outbuf_size +
2156             histogram_size);
2157   }
2158 }
BrotliEncoderGetPreparedDictionarySize(const BrotliEncoderPreparedDictionary * prepared_dictionary)2159 size_t BrotliEncoderGetPreparedDictionarySize(
2160     const BrotliEncoderPreparedDictionary* prepared_dictionary) {
2161   /* First field of dictionary structs */
2162   const BrotliEncoderPreparedDictionary* prepared = prepared_dictionary;
2163   uint32_t magic = *((const uint32_t*)prepared);
2164   size_t overhead = 0;
2165   if (magic == kManagedDictionaryMagic) {
2166     const ManagedDictionary* managed = (const ManagedDictionary*)prepared;
2167     overhead = sizeof(ManagedDictionary);
2168     magic = *managed->dictionary;
2169     prepared = (const BrotliEncoderPreparedDictionary*)managed->dictionary;
2170   }
2171 
2172   if (magic == kPreparedDictionaryMagic) {
2173     const PreparedDictionary* dictionary =
2174         (const PreparedDictionary*)prepared;
2175     /* Keep in sync with step 3 of CreatePreparedDictionary */
2176     return sizeof(PreparedDictionary) + dictionary->source_size +
2177         (sizeof(uint32_t) << dictionary->slot_bits) +
2178         (sizeof(uint16_t) << dictionary->bucket_bits) +
2179         (sizeof(uint32_t) * dictionary->source_offset) + overhead;
2180   } else if (magic == kSharedDictionaryMagic) {
2181     const SharedEncoderDictionary* dictionary =
2182         (const SharedEncoderDictionary*)prepared;
2183     const CompoundDictionary* compound = &dictionary->compound;
2184     const ContextualEncoderDictionary* contextual = &dictionary->contextual;
2185     size_t result = sizeof(*dictionary);
2186     size_t i;
2187     size_t num_instances;
2188     const BrotliEncoderDictionary* instances;
2189     for (i = 0; i < compound->num_prepared_instances_; i++) {
2190       size_t size = BrotliEncoderGetPreparedDictionarySize(
2191           (const BrotliEncoderPreparedDictionary*)
2192           compound->prepared_instances_[i]);
2193       if (!size) return 0;  /* error */
2194       result += size;
2195     }
2196     if (contextual->context_based) {
2197       num_instances = contextual->num_instances_;
2198       instances = contextual->instances_;
2199       result += sizeof(*instances) * num_instances;
2200     } else {
2201       num_instances = 1;
2202       instances = &contextual->instance_;
2203     }
2204     for (i = 0; i < num_instances; i++) {
2205       const BrotliEncoderDictionary* dict = &instances[i];
2206       result += dict->trie.pool_capacity * sizeof(BrotliTrieNode);
2207       if (dict->hash_table_data_words_) {
2208         result += sizeof(kStaticDictionaryHashWords);
2209       }
2210       if (dict->hash_table_data_lengths_) {
2211         result += sizeof(kStaticDictionaryHashLengths);
2212       }
2213       if (dict->buckets_data_) {
2214         result += sizeof(*dict->buckets_data_) * dict->buckets_alloc_size_;
2215       }
2216       if (dict->dict_words_data_) {
2217         result += sizeof(*dict->dict_words) * dict->dict_words_alloc_size_;
2218       }
2219       if (dict->words_instance_) {
2220         result += sizeof(*dict->words_instance_);
2221         /* data_size not added here: it is never allocated by the
2222            SharedEncoderDictionary, instead it always points to the file
2223            already loaded in memory. So if the caller wants to include
2224            this memory as well, add the size of the loaded dictionary
2225            file to this. */
2226       }
2227     }
2228     return result + overhead;
2229   }
2230   return 0;  /* error */
2231 }
2232 
2233 #if defined(BROTLI_TEST)
2234 size_t MakeUncompressedStreamForTest(const uint8_t*, size_t, uint8_t*);
MakeUncompressedStreamForTest(const uint8_t * input,size_t input_size,uint8_t * output)2235 size_t MakeUncompressedStreamForTest(
2236     const uint8_t* input, size_t input_size, uint8_t* output) {
2237   return MakeUncompressedStream(input, input_size, output);
2238 }
2239 #endif
2240 
2241 #if defined(__cplusplus) || defined(c_plusplus)
2242 }  /* extern "C" */
2243 #endif
2244