1 /* NOLINT(build/header_guard) */
2 /* Copyright 2015 Google Inc. All Rights Reserved.
3
4 Distributed under MIT license.
5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 */
7
8 /* template parameters: FN */
9
10 #define HistogramType FN(Histogram)
11
12 /* Greedy block splitter for one block category (literal, command or distance).
13 */
14 typedef struct FN(BlockSplitter) {
15 /* Alphabet size of particular block category. */
16 size_t alphabet_size_;
17 /* We collect at least this many symbols for each block. */
18 size_t min_block_size_;
19 /* We merge histograms A and B if
20 entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
21 where A is the current histogram and B is the histogram of the last or the
22 second last block type. */
23 double split_threshold_;
24
25 size_t num_blocks_;
26 BlockSplit* split_; /* not owned */
27 HistogramType* histograms_; /* not owned */
28 size_t* histograms_size_; /* not owned */
29
30 /* Temporary storage for BlockSplitterFinishBlock. */
31 HistogramType combined_histo[2];
32
33 /* The number of symbols that we want to collect before deciding on whether
34 or not to merge the block with a previous one or emit a new block. */
35 size_t target_block_size_;
36 /* The number of symbols in the current histogram. */
37 size_t block_size_;
38 /* Offset of the current histogram. */
39 size_t curr_histogram_ix_;
40 /* Offset of the histograms of the previous two block types. */
41 size_t last_histogram_ix_[2];
42 /* Entropy of the previous two block types. */
43 double last_entropy_[2];
44 /* The number of times we merged the current block with the last one. */
45 size_t merge_last_count_;
46 } FN(BlockSplitter);
47
FN(InitBlockSplitter)48 static void FN(InitBlockSplitter)(
49 MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,
50 size_t min_block_size, double split_threshold, size_t num_symbols,
51 BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {
52 size_t max_num_blocks = num_symbols / min_block_size + 1;
53 /* We have to allocate one more histogram than the maximum number of block
54 types for the current histogram when the meta-block is too big. */
55 size_t max_num_types =
56 BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);
57 self->alphabet_size_ = alphabet_size;
58 self->min_block_size_ = min_block_size;
59 self->split_threshold_ = split_threshold;
60 self->num_blocks_ = 0;
61 self->split_ = split;
62 self->histograms_size_ = histograms_size;
63 self->target_block_size_ = min_block_size;
64 self->block_size_ = 0;
65 self->curr_histogram_ix_ = 0;
66 self->merge_last_count_ = 0;
67 BROTLI_ENSURE_CAPACITY(m, uint8_t,
68 split->types, split->types_alloc_size, max_num_blocks);
69 BROTLI_ENSURE_CAPACITY(m, uint32_t,
70 split->lengths, split->lengths_alloc_size, max_num_blocks);
71 if (BROTLI_IS_OOM(m)) return;
72 self->split_->num_blocks = max_num_blocks;
73 BROTLI_DCHECK(*histograms == 0);
74 *histograms_size = max_num_types;
75 *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
76 self->histograms_ = *histograms;
77 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return;
78 /* Clear only current histogram. */
79 FN(HistogramClear)(&self->histograms_[0]);
80 self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
81 }
82
83 /* Does either of three things:
84 (1) emits the current block with a new block type;
85 (2) emits the current block with the type of the second last block;
86 (3) merges the current block with the last block. */
FN(BlockSplitterFinishBlock)87 static void FN(BlockSplitterFinishBlock)(
88 FN(BlockSplitter)* self, BROTLI_BOOL is_final) {
89 BlockSplit* split = self->split_;
90 double* last_entropy = self->last_entropy_;
91 HistogramType* histograms = self->histograms_;
92 self->block_size_ =
93 BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);
94 if (self->num_blocks_ == 0) {
95 /* Create first block. */
96 split->lengths[0] = (uint32_t)self->block_size_;
97 split->types[0] = 0;
98 last_entropy[0] =
99 BitsEntropy(histograms[0].data_, self->alphabet_size_);
100 last_entropy[1] = last_entropy[0];
101 ++self->num_blocks_;
102 ++split->num_types;
103 ++self->curr_histogram_ix_;
104 if (self->curr_histogram_ix_ < *self->histograms_size_)
105 FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
106 self->block_size_ = 0;
107 } else if (self->block_size_ > 0) {
108 double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
109 self->alphabet_size_);
110 double combined_entropy[2];
111 double diff[2];
112 size_t j;
113 for (j = 0; j < 2; ++j) {
114 size_t last_histogram_ix = self->last_histogram_ix_[j];
115 self->combined_histo[j] = histograms[self->curr_histogram_ix_];
116 FN(HistogramAddHistogram)(&self->combined_histo[j],
117 &histograms[last_histogram_ix]);
118 combined_entropy[j] = BitsEntropy(
119 &self->combined_histo[j].data_[0], self->alphabet_size_);
120 diff[j] = combined_entropy[j] - entropy - last_entropy[j];
121 }
122
123 if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
124 diff[0] > self->split_threshold_ &&
125 diff[1] > self->split_threshold_) {
126 /* Create new block. */
127 split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
128 split->types[self->num_blocks_] = (uint8_t)split->num_types;
129 self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
130 self->last_histogram_ix_[0] = (uint8_t)split->num_types;
131 last_entropy[1] = last_entropy[0];
132 last_entropy[0] = entropy;
133 ++self->num_blocks_;
134 ++split->num_types;
135 ++self->curr_histogram_ix_;
136 if (self->curr_histogram_ix_ < *self->histograms_size_)
137 FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
138 self->block_size_ = 0;
139 self->merge_last_count_ = 0;
140 self->target_block_size_ = self->min_block_size_;
141 } else if (diff[1] < diff[0] - 20.0) {
142 /* Combine this block with second last block. */
143 split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
144 split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
145 BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
146 histograms[self->last_histogram_ix_[0]] = self->combined_histo[1];
147 last_entropy[1] = last_entropy[0];
148 last_entropy[0] = combined_entropy[1];
149 ++self->num_blocks_;
150 self->block_size_ = 0;
151 FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
152 self->merge_last_count_ = 0;
153 self->target_block_size_ = self->min_block_size_;
154 } else {
155 /* Combine this block with last block. */
156 split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
157 histograms[self->last_histogram_ix_[0]] = self->combined_histo[0];
158 last_entropy[0] = combined_entropy[0];
159 if (split->num_types == 1) {
160 last_entropy[1] = last_entropy[0];
161 }
162 self->block_size_ = 0;
163 FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
164 if (++self->merge_last_count_ > 1) {
165 self->target_block_size_ += self->min_block_size_;
166 }
167 }
168 }
169 if (is_final) {
170 *self->histograms_size_ = split->num_types;
171 split->num_blocks = self->num_blocks_;
172 }
173 }
174
175 /* Adds the next symbol to the current histogram. When the current histogram
176 reaches the target size, decides on merging the block. */
FN(BlockSplitterAddSymbol)177 static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {
178 FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);
179 ++self->block_size_;
180 if (self->block_size_ == self->target_block_size_) {
181 FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE);
182 }
183 }
184
185 #undef HistogramType
186