1 /* Copyright 2017 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 #include "compound_dictionary.h"
8
9 #include "../common/platform.h"
10 #include <brotli/types.h>
11 #include "memory.h"
12 #include "quality.h"
13
CreatePreparedDictionaryWithParams(MemoryManager * m,const uint8_t * source,size_t source_size,uint32_t bucket_bits,uint32_t slot_bits,uint32_t hash_bits,uint16_t bucket_limit)14 static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
15 const uint8_t* source, size_t source_size, uint32_t bucket_bits,
16 uint32_t slot_bits, uint32_t hash_bits, uint16_t bucket_limit) {
17 /* Step 1: create "bloated" hasher. */
18 uint32_t num_slots = 1u << slot_bits;
19 uint32_t num_buckets = 1u << bucket_bits;
20 uint32_t hash_shift = 64u - bucket_bits;
21 uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
22 uint32_t slot_mask = num_slots - 1;
23 size_t alloc_size = (sizeof(uint32_t) << slot_bits) +
24 (sizeof(uint32_t) << slot_bits) +
25 (sizeof(uint16_t) << bucket_bits) +
26 (sizeof(uint32_t) << bucket_bits) +
27 (sizeof(uint32_t) * source_size);
28 uint8_t* flat = NULL;
29 PreparedDictionary* result = NULL;
30 uint16_t* num = NULL;
31 uint32_t* bucket_heads = NULL;
32 uint32_t* next_bucket = NULL;
33 uint32_t* slot_offsets = NULL;
34 uint16_t* heads = NULL;
35 uint32_t* items = NULL;
36 uint8_t* source_copy = NULL;
37 uint32_t i;
38 uint32_t* slot_size = NULL;
39 uint32_t* slot_limit = NULL;
40 uint32_t total_items = 0;
41 if (slot_bits > 16) return NULL;
42 if (slot_bits > bucket_bits) return NULL;
43 if (bucket_bits - slot_bits >= 16) return NULL;
44
45 flat = BROTLI_ALLOC(m, uint8_t, alloc_size);
46 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(flat)) return NULL;
47
48 slot_size = (uint32_t*)flat;
49 slot_limit = (uint32_t*)(&slot_size[num_slots]);
50 num = (uint16_t*)(&slot_limit[num_slots]);
51 bucket_heads = (uint32_t*)(&num[num_buckets]);
52 next_bucket = (uint32_t*)(&bucket_heads[num_buckets]);
53 memset(num, 0, num_buckets * sizeof(num[0]));
54
55 /* TODO(eustas): apply custom "store" order. */
56 for (i = 0; i + 7 < source_size; ++i) {
57 const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(&source[i]) & hash_mask) *
58 kPreparedDictionaryHashMul64Long;
59 const uint32_t key = (uint32_t)(h >> hash_shift);
60 uint16_t count = num[key];
61 next_bucket[i] = (count == 0) ? ((uint32_t)(-1)) : bucket_heads[key];
62 bucket_heads[key] = i;
63 count++;
64 if (count > bucket_limit) count = bucket_limit;
65 num[key] = count;
66 }
67
68 /* Step 2: find slot limits. */
69 for (i = 0; i < num_slots; ++i) {
70 BROTLI_BOOL overflow = BROTLI_FALSE;
71 slot_limit[i] = bucket_limit;
72 while (BROTLI_TRUE) {
73 uint32_t limit = slot_limit[i];
74 size_t j;
75 uint32_t count = 0;
76 overflow = BROTLI_FALSE;
77 for (j = i; j < num_buckets; j += num_slots) {
78 uint32_t size = num[j];
79 /* Last chain may span behind 64K limit; overflow happens only if
80 we are about to use 0xFFFF+ as item offset. */
81 if (count >= 0xFFFF) {
82 overflow = BROTLI_TRUE;
83 break;
84 }
85 if (size > limit) size = limit;
86 count += size;
87 }
88 if (!overflow) {
89 slot_size[i] = count;
90 total_items += count;
91 break;
92 }
93 slot_limit[i]--;
94 }
95 }
96
97 /* Step 3: transfer data to "slim" hasher. */
98 alloc_size = sizeof(PreparedDictionary) + (sizeof(uint32_t) << slot_bits) +
99 (sizeof(uint16_t) << bucket_bits) + (sizeof(uint32_t) * total_items) +
100 source_size;
101
102 result = (PreparedDictionary*)BROTLI_ALLOC(m, uint8_t, alloc_size);
103 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(result)) {
104 BROTLI_FREE(m, flat);
105 return NULL;
106 }
107 slot_offsets = (uint32_t*)(&result[1]);
108 heads = (uint16_t*)(&slot_offsets[num_slots]);
109 items = (uint32_t*)(&heads[num_buckets]);
110 source_copy = (uint8_t*)(&items[total_items]);
111
112 result->magic = kPreparedDictionaryMagic;
113 result->source_offset = total_items;
114 result->source_size = (uint32_t)source_size;
115 result->hash_bits = hash_bits;
116 result->bucket_bits = bucket_bits;
117 result->slot_bits = slot_bits;
118
119 total_items = 0;
120 for (i = 0; i < num_slots; ++i) {
121 slot_offsets[i] = total_items;
122 total_items += slot_size[i];
123 slot_size[i] = 0;
124 }
125 for (i = 0; i < num_buckets; ++i) {
126 uint32_t slot = i & slot_mask;
127 uint32_t count = num[i];
128 uint32_t pos;
129 size_t j;
130 size_t cursor = slot_size[slot];
131 if (count > slot_limit[slot]) count = slot_limit[slot];
132 if (count == 0) {
133 heads[i] = 0xFFFF;
134 continue;
135 }
136 heads[i] = (uint16_t)cursor;
137 cursor += slot_offsets[slot];
138 slot_size[slot] += count;
139 pos = bucket_heads[i];
140 for (j = 0; j < count; j++) {
141 items[cursor++] = pos;
142 pos = next_bucket[pos];
143 }
144 items[cursor - 1] |= 0x80000000;
145 }
146
147 BROTLI_FREE(m, flat);
148 memcpy(source_copy, source, source_size);
149 return result;
150 }
151
CreatePreparedDictionary(MemoryManager * m,const uint8_t * source,size_t source_size)152 PreparedDictionary* CreatePreparedDictionary(MemoryManager* m,
153 const uint8_t* source, size_t source_size) {
154 uint32_t bucket_bits = 17;
155 uint32_t slot_bits = 7;
156 uint32_t hash_bits = 40;
157 uint16_t bucket_limit = 32;
158 size_t volume = 16u << bucket_bits;
159 /* Tune parameters to fit dictionary size. */
160 while (volume < source_size && bucket_bits < 22) {
161 bucket_bits++;
162 slot_bits++;
163 volume <<= 1;
164 }
165 return CreatePreparedDictionaryWithParams(m,
166 source, source_size, bucket_bits, slot_bits, hash_bits, bucket_limit);
167 }
168
DestroyPreparedDictionary(MemoryManager * m,PreparedDictionary * dictionary)169 void DestroyPreparedDictionary(MemoryManager* m,
170 PreparedDictionary* dictionary) {
171 if (!dictionary) return;
172 BROTLI_FREE(m, dictionary);
173 }
174
AttachPreparedDictionary(CompoundDictionary * compound,const PreparedDictionary * dictionary)175 BROTLI_BOOL AttachPreparedDictionary(
176 CompoundDictionary* compound, const PreparedDictionary* dictionary) {
177 size_t length = 0;
178 size_t index = 0;
179
180 if (compound->num_chunks == SHARED_BROTLI_MAX_COMPOUND_DICTS) {
181 return BROTLI_FALSE;
182 }
183
184 if (!dictionary) return BROTLI_FALSE;
185
186 length = dictionary->source_size;
187 index = compound->num_chunks;
188 compound->total_size += length;
189 compound->chunks[index] = dictionary;
190 compound->chunk_offsets[index + 1] = compound->total_size;
191 {
192 uint32_t* slot_offsets = (uint32_t*)(&dictionary[1]);
193 uint16_t* heads = (uint16_t*)(&slot_offsets[1u << dictionary->slot_bits]);
194 uint32_t* items = (uint32_t*)(&heads[1u << dictionary->bucket_bits]);
195 compound->chunk_source[index] =
196 (const uint8_t*)(&items[dictionary->source_offset]);
197 }
198 compound->num_chunks++;
199 return BROTLI_TRUE;
200 }
201