1 /* Copyright 2010 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 /* A (forgetful) hash table to the data seen by the compressor, to
8 help create backward references to previous data. */
9
10 #ifndef BROTLI_ENC_HASH_H_
11 #define BROTLI_ENC_HASH_H_
12
13 #include <stdlib.h> /* exit */
14 #include <string.h> /* memcmp, memset */
15
16 #include "../common/constants.h"
17 #include "../common/dictionary.h"
18 #include "../common/platform.h"
19 #include <brotli/types.h>
20 #include "encoder_dict.h"
21 #include "fast_log.h"
22 #include "find_match_length.h"
23 #include "memory.h"
24 #include "quality.h"
25 #include "static_dict.h"
26
27 #if defined(__cplusplus) || defined(c_plusplus)
28 extern "C" {
29 #endif
30
31 typedef struct {
32 /**
33 * Dynamically allocated areas; regular hasher uses one or two allocations;
34 * "composite" hasher uses up to 4 allocations.
35 */
36 void* extra[4];
37
38 /**
39 * False before the fisrt invocation of HasherSetup (where "extra" memory)
40 * is allocated.
41 */
42 BROTLI_BOOL is_setup_;
43
44 size_t dict_num_lookups;
45 size_t dict_num_matches;
46
47 BrotliHasherParams params;
48
49 /**
50 * False if hasher needs to be "prepared" before use (before the first
51 * invocation of HasherSetup or after HasherReset). "preparation" is hasher
52 * data initialization (using input ringbuffer).
53 */
54 BROTLI_BOOL is_prepared_;
55 } HasherCommon;
56
57 #define score_t size_t
58
59 static const uint32_t kCutoffTransformsCount = 10;
60 /* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */
61 /* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
62 static const uint64_t kCutoffTransforms =
63 BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);
64
65 typedef struct HasherSearchResult {
66 size_t len;
67 size_t distance;
68 score_t score;
69 int len_code_delta; /* == len_code - len */
70 } HasherSearchResult;
71
72 /* kHashMul32 multiplier has these properties:
73 * The multiplier must be odd. Otherwise we may lose the highest bit.
74 * No long streaks of ones or zeros.
75 * There is no effort to ensure that it is a prime, the oddity is enough
76 for this use.
77 * The number has been tuned heuristically against compression benchmarks. */
78 static const uint32_t kHashMul32 = 0x1E35A7BD;
79 static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
80 static const uint64_t kHashMul64Long =
81 BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
82
Hash14(const uint8_t * data)83 static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
84 uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
85 /* The higher bits contain more mixture from the multiplication,
86 so we take our results from there. */
87 return h >> (32 - 14);
88 }
89
PrepareDistanceCache(int * BROTLI_RESTRICT distance_cache,const int num_distances)90 static BROTLI_INLINE void PrepareDistanceCache(
91 int* BROTLI_RESTRICT distance_cache, const int num_distances) {
92 if (num_distances > 4) {
93 int last_distance = distance_cache[0];
94 distance_cache[4] = last_distance - 1;
95 distance_cache[5] = last_distance + 1;
96 distance_cache[6] = last_distance - 2;
97 distance_cache[7] = last_distance + 2;
98 distance_cache[8] = last_distance - 3;
99 distance_cache[9] = last_distance + 3;
100 if (num_distances > 10) {
101 int next_last_distance = distance_cache[1];
102 distance_cache[10] = next_last_distance - 1;
103 distance_cache[11] = next_last_distance + 1;
104 distance_cache[12] = next_last_distance - 2;
105 distance_cache[13] = next_last_distance + 2;
106 distance_cache[14] = next_last_distance - 3;
107 distance_cache[15] = next_last_distance + 3;
108 }
109 }
110 }
111
112 #define BROTLI_LITERAL_BYTE_SCORE 135
113 #define BROTLI_DISTANCE_BIT_PENALTY 30
114 /* Score must be positive after applying maximal penalty. */
115 #define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
116
117 /* Usually, we always choose the longest backward reference. This function
118 allows for the exception of that rule.
119
120 If we choose a backward reference that is further away, it will
121 usually be coded with more bits. We approximate this by assuming
122 log2(distance). If the distance can be expressed in terms of the
123 last four distances, we use some heuristic constants to estimate
124 the bits cost. For the first up to four literals we use the bit
125 cost of the literals from the literal cost model, after that we
126 use the average bit cost of the cost model.
127
128 This function is used to sometimes discard a longer backward reference
129 when it is not much longer and the bit cost for encoding it is more
130 than the saved literals.
131
132 backward_reference_offset MUST be positive. */
BackwardReferenceScore(size_t copy_length,size_t backward_reference_offset)133 static BROTLI_INLINE score_t BackwardReferenceScore(
134 size_t copy_length, size_t backward_reference_offset) {
135 return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
136 BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
137 }
138
BackwardReferenceScoreUsingLastDistance(size_t copy_length)139 static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
140 size_t copy_length) {
141 return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
142 BROTLI_SCORE_BASE + 15;
143 }
144
BackwardReferencePenaltyUsingLastDistance(size_t distance_short_code)145 static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
146 size_t distance_short_code) {
147 return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
148 }
149
TestStaticDictionaryItem(const BrotliEncoderDictionary * dictionary,size_t len,size_t word_idx,const uint8_t * data,size_t max_length,size_t max_backward,size_t max_distance,HasherSearchResult * out)150 static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
151 const BrotliEncoderDictionary* dictionary, size_t len, size_t word_idx,
152 const uint8_t* data, size_t max_length, size_t max_backward,
153 size_t max_distance, HasherSearchResult* out) {
154 size_t offset;
155 size_t matchlen;
156 size_t backward;
157 score_t score;
158 offset = dictionary->words->offsets_by_length[len] + len * word_idx;
159 if (len > max_length) {
160 return BROTLI_FALSE;
161 }
162
163 matchlen =
164 FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len);
165 if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) {
166 return BROTLI_FALSE;
167 }
168 {
169 size_t cut = len - matchlen;
170 size_t transform_id = (cut << 2) +
171 (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
172 backward = max_backward + 1 + word_idx +
173 (transform_id << dictionary->words->size_bits_by_length[len]);
174 }
175 if (backward > max_distance) {
176 return BROTLI_FALSE;
177 }
178 score = BackwardReferenceScore(matchlen, backward);
179 if (score < out->score) {
180 return BROTLI_FALSE;
181 }
182 out->len = matchlen;
183 out->len_code_delta = (int)len - (int)matchlen;
184 out->distance = backward;
185 out->score = score;
186 return BROTLI_TRUE;
187 }
188
SearchInStaticDictionary(const BrotliEncoderDictionary * dictionary,HasherCommon * common,const uint8_t * data,size_t max_length,size_t max_backward,size_t max_distance,HasherSearchResult * out,BROTLI_BOOL shallow)189 static BROTLI_INLINE void SearchInStaticDictionary(
190 const BrotliEncoderDictionary* dictionary,
191 HasherCommon* common, const uint8_t* data, size_t max_length,
192 size_t max_backward, size_t max_distance,
193 HasherSearchResult* out, BROTLI_BOOL shallow) {
194 size_t key;
195 size_t i;
196 if (common->dict_num_matches < (common->dict_num_lookups >> 7)) {
197 return;
198 }
199 key = Hash14(data) << 1;
200 for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
201 common->dict_num_lookups++;
202 if (dictionary->hash_table_lengths[key] != 0) {
203 BROTLI_BOOL item_matches = TestStaticDictionaryItem(
204 dictionary, dictionary->hash_table_lengths[key],
205 dictionary->hash_table_words[key], data,
206 max_length, max_backward, max_distance, out);
207 if (item_matches) {
208 common->dict_num_matches++;
209 }
210 }
211 }
212 }
213
214 typedef struct BackwardMatch {
215 uint32_t distance;
216 uint32_t length_and_code;
217 } BackwardMatch;
218
InitBackwardMatch(BackwardMatch * self,size_t dist,size_t len)219 static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
220 size_t dist, size_t len) {
221 self->distance = (uint32_t)dist;
222 self->length_and_code = (uint32_t)(len << 5);
223 }
224
InitDictionaryBackwardMatch(BackwardMatch * self,size_t dist,size_t len,size_t len_code)225 static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
226 size_t dist, size_t len, size_t len_code) {
227 self->distance = (uint32_t)dist;
228 self->length_and_code =
229 (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
230 }
231
BackwardMatchLength(const BackwardMatch * self)232 static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
233 return self->length_and_code >> 5;
234 }
235
BackwardMatchLengthCode(const BackwardMatch * self)236 static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
237 size_t code = self->length_and_code & 31;
238 return code ? code : BackwardMatchLength(self);
239 }
240
241 #define EXPAND_CAT(a, b) CAT(a, b)
242 #define CAT(a, b) a ## b
243 #define FN(X) EXPAND_CAT(X, HASHER())
244
245 #define HASHER() H10
246 #define BUCKET_BITS 17
247 #define MAX_TREE_SEARCH_DEPTH 64
248 #define MAX_TREE_COMP_LENGTH 128
249 #include "hash_to_binary_tree_inc.h" /* NOLINT(build/include) */
250 #undef MAX_TREE_SEARCH_DEPTH
251 #undef MAX_TREE_COMP_LENGTH
252 #undef BUCKET_BITS
253 #undef HASHER
254 /* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
255 #define MAX_NUM_MATCHES_H10 128
256
257 /* For BUCKET_SWEEP_BITS == 0, enabling the dictionary lookup makes compression
258 a little faster (0.5% - 1%) and it compresses 0.15% better on small text
259 and HTML inputs. */
260
261 #define HASHER() H2
262 #define BUCKET_BITS 16
263 #define BUCKET_SWEEP_BITS 0
264 #define HASH_LEN 5
265 #define USE_DICTIONARY 1
266 #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
267 #undef BUCKET_SWEEP_BITS
268 #undef USE_DICTIONARY
269 #undef HASHER
270
271 #define HASHER() H3
272 #define BUCKET_SWEEP_BITS 1
273 #define USE_DICTIONARY 0
274 #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
275 #undef USE_DICTIONARY
276 #undef BUCKET_SWEEP_BITS
277 #undef BUCKET_BITS
278 #undef HASHER
279
280 #define HASHER() H4
281 #define BUCKET_BITS 17
282 #define BUCKET_SWEEP_BITS 2
283 #define USE_DICTIONARY 1
284 #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
285 #undef USE_DICTIONARY
286 #undef HASH_LEN
287 #undef BUCKET_SWEEP_BITS
288 #undef BUCKET_BITS
289 #undef HASHER
290
291 #define HASHER() H5
292 #include "hash_longest_match_inc.h" /* NOLINT(build/include) */
293 #undef HASHER
294
295 #define HASHER() H6
296 #include "hash_longest_match64_inc.h" /* NOLINT(build/include) */
297 #undef HASHER
298
299 #define BUCKET_BITS 15
300
301 #define NUM_LAST_DISTANCES_TO_CHECK 4
302 #define NUM_BANKS 1
303 #define BANK_BITS 16
304 #define HASHER() H40
305 #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
306 #undef HASHER
307 #undef NUM_LAST_DISTANCES_TO_CHECK
308
309 #define NUM_LAST_DISTANCES_TO_CHECK 10
310 #define HASHER() H41
311 #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
312 #undef HASHER
313 #undef NUM_LAST_DISTANCES_TO_CHECK
314 #undef NUM_BANKS
315 #undef BANK_BITS
316
317 #define NUM_LAST_DISTANCES_TO_CHECK 16
318 #define NUM_BANKS 512
319 #define BANK_BITS 9
320 #define HASHER() H42
321 #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
322 #undef HASHER
323 #undef NUM_LAST_DISTANCES_TO_CHECK
324 #undef NUM_BANKS
325 #undef BANK_BITS
326
327 #undef BUCKET_BITS
328
329 #define HASHER() H54
330 #define BUCKET_BITS 20
331 #define BUCKET_SWEEP_BITS 2
332 #define HASH_LEN 7
333 #define USE_DICTIONARY 0
334 #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
335 #undef USE_DICTIONARY
336 #undef HASH_LEN
337 #undef BUCKET_SWEEP_BITS
338 #undef BUCKET_BITS
339 #undef HASHER
340
341 /* fast large window hashers */
342
343 #define HASHER() HROLLING_FAST
344 #define CHUNKLEN 32
345 #define JUMP 4
346 #define NUMBUCKETS 16777216
347 #define MASK ((NUMBUCKETS * 64) - 1)
348 #include "hash_rolling_inc.h" /* NOLINT(build/include) */
349 #undef JUMP
350 #undef HASHER
351
352
353 #define HASHER() HROLLING
354 #define JUMP 1
355 #include "hash_rolling_inc.h" /* NOLINT(build/include) */
356 #undef MASK
357 #undef NUMBUCKETS
358 #undef JUMP
359 #undef CHUNKLEN
360 #undef HASHER
361
362 #define HASHER() H35
363 #define HASHER_A H3
364 #define HASHER_B HROLLING_FAST
365 #include "hash_composite_inc.h" /* NOLINT(build/include) */
366 #undef HASHER_A
367 #undef HASHER_B
368 #undef HASHER
369
370 #define HASHER() H55
371 #define HASHER_A H54
372 #define HASHER_B HROLLING_FAST
373 #include "hash_composite_inc.h" /* NOLINT(build/include) */
374 #undef HASHER_A
375 #undef HASHER_B
376 #undef HASHER
377
378 #define HASHER() H65
379 #define HASHER_A H6
380 #define HASHER_B HROLLING
381 #include "hash_composite_inc.h" /* NOLINT(build/include) */
382 #undef HASHER_A
383 #undef HASHER_B
384 #undef HASHER
385
386 #undef FN
387 #undef CAT
388 #undef EXPAND_CAT
389
390 #define FOR_SIMPLE_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
391 #define FOR_COMPOSITE_HASHERS(H) H(35) H(55) H(65)
392 #define FOR_GENERIC_HASHERS(H) FOR_SIMPLE_HASHERS(H) FOR_COMPOSITE_HASHERS(H)
393 #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
394
395 typedef struct {
396 HasherCommon common;
397
398 union {
399 #define MEMBER_(N) \
400 H ## N _H ## N;
401 FOR_ALL_HASHERS(MEMBER_)
402 #undef MEMBER_
403 } privat;
404 } Hasher;
405
406 /* MUST be invoked before any other method. */
HasherInit(Hasher * hasher)407 static BROTLI_INLINE void HasherInit(Hasher* hasher) {
408 hasher->common.is_setup_ = BROTLI_FALSE;
409 hasher->common.extra[0] = NULL;
410 hasher->common.extra[1] = NULL;
411 hasher->common.extra[2] = NULL;
412 hasher->common.extra[3] = NULL;
413 }
414
DestroyHasher(MemoryManager * m,Hasher * hasher)415 static BROTLI_INLINE void DestroyHasher(MemoryManager* m, Hasher* hasher) {
416 if (hasher->common.extra[0] != NULL) BROTLI_FREE(m, hasher->common.extra[0]);
417 if (hasher->common.extra[1] != NULL) BROTLI_FREE(m, hasher->common.extra[1]);
418 if (hasher->common.extra[2] != NULL) BROTLI_FREE(m, hasher->common.extra[2]);
419 if (hasher->common.extra[3] != NULL) BROTLI_FREE(m, hasher->common.extra[3]);
420 }
421
HasherReset(Hasher * hasher)422 static BROTLI_INLINE void HasherReset(Hasher* hasher) {
423 hasher->common.is_prepared_ = BROTLI_FALSE;
424 }
425
HasherSize(const BrotliEncoderParams * params,BROTLI_BOOL one_shot,const size_t input_size,size_t * alloc_size)426 static BROTLI_INLINE void HasherSize(const BrotliEncoderParams* params,
427 BROTLI_BOOL one_shot, const size_t input_size, size_t* alloc_size) {
428 switch (params->hasher.type) {
429 #define SIZE_(N) \
430 case N: \
431 HashMemAllocInBytesH ## N(params, one_shot, input_size, alloc_size); \
432 break;
433 FOR_ALL_HASHERS(SIZE_)
434 #undef SIZE_
435 default:
436 break;
437 }
438 }
439
HasherSetup(MemoryManager * m,Hasher * hasher,BrotliEncoderParams * params,const uint8_t * data,size_t position,size_t input_size,BROTLI_BOOL is_last)440 static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher,
441 BrotliEncoderParams* params, const uint8_t* data, size_t position,
442 size_t input_size, BROTLI_BOOL is_last) {
443 BROTLI_BOOL one_shot = (position == 0 && is_last);
444 if (!hasher->common.is_setup_) {
445 size_t alloc_size[4] = {0};
446 size_t i;
447 ChooseHasher(params, ¶ms->hasher);
448 hasher->common.params = params->hasher;
449 hasher->common.dict_num_lookups = 0;
450 hasher->common.dict_num_matches = 0;
451 HasherSize(params, one_shot, input_size, alloc_size);
452 for (i = 0; i < 4; ++i) {
453 if (alloc_size[i] == 0) continue;
454 hasher->common.extra[i] = BROTLI_ALLOC(m, uint8_t, alloc_size[i]);
455 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(hasher->common.extra[i])) return;
456 }
457 switch (hasher->common.params.type) {
458 #define INITIALIZE_(N) \
459 case N: \
460 InitializeH ## N(&hasher->common, \
461 &hasher->privat._H ## N, params); \
462 break;
463 FOR_ALL_HASHERS(INITIALIZE_);
464 #undef INITIALIZE_
465 default:
466 break;
467 }
468 HasherReset(hasher);
469 hasher->common.is_setup_ = BROTLI_TRUE;
470 }
471
472 if (!hasher->common.is_prepared_) {
473 switch (hasher->common.params.type) {
474 #define PREPARE_(N) \
475 case N: \
476 PrepareH ## N( \
477 &hasher->privat._H ## N, \
478 one_shot, input_size, data); \
479 break;
480 FOR_ALL_HASHERS(PREPARE_)
481 #undef PREPARE_
482 default: break;
483 }
484 hasher->common.is_prepared_ = BROTLI_TRUE;
485 }
486 }
487
InitOrStitchToPreviousBlock(MemoryManager * m,Hasher * hasher,const uint8_t * data,size_t mask,BrotliEncoderParams * params,size_t position,size_t input_size,BROTLI_BOOL is_last)488 static BROTLI_INLINE void InitOrStitchToPreviousBlock(
489 MemoryManager* m, Hasher* hasher, const uint8_t* data, size_t mask,
490 BrotliEncoderParams* params, size_t position, size_t input_size,
491 BROTLI_BOOL is_last) {
492 HasherSetup(m, hasher, params, data, position, input_size, is_last);
493 if (BROTLI_IS_OOM(m)) return;
494 switch (hasher->common.params.type) {
495 #define INIT_(N) \
496 case N: \
497 StitchToPreviousBlockH ## N( \
498 &hasher->privat._H ## N, \
499 input_size, position, data, mask); \
500 break;
501 FOR_ALL_HASHERS(INIT_)
502 #undef INIT_
503 default: break;
504 }
505 }
506
507 /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
508 to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
FindCompoundDictionaryMatch(const PreparedDictionary * self,const uint8_t * BROTLI_RESTRICT data,const size_t ring_buffer_mask,const int * BROTLI_RESTRICT distance_cache,const size_t cur_ix,const size_t max_length,const size_t distance_offset,const size_t max_distance,HasherSearchResult * BROTLI_RESTRICT out)509 static BROTLI_INLINE void FindCompoundDictionaryMatch(
510 const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
511 const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
512 const size_t cur_ix, const size_t max_length, const size_t distance_offset,
513 const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
514 const uint32_t source_offset = self->source_offset;
515 const uint32_t source_size = self->source_size;
516 const size_t boundary = distance_offset - source_size;
517 const uint32_t hash_bits = self->hash_bits;
518 const uint32_t bucket_bits = self->bucket_bits;
519 const uint32_t slot_bits = self->slot_bits;
520
521 const uint32_t hash_shift = 64u - bucket_bits;
522 const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
523 const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
524
525 const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
526 const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
527 const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
528 const uint8_t* source = (uint8_t*)(&items[source_offset]);
529
530 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
531 score_t best_score = out->score;
532 size_t best_len = out->len;
533 size_t i;
534 const uint64_t h =
535 (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
536 kPreparedDictionaryHashMul64Long;
537 const uint32_t key = (uint32_t)(h >> hash_shift);
538 const uint32_t slot = key & slot_mask;
539 const uint32_t head = heads[key];
540 const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
541 uint32_t item = (head == 0xFFFF) ? 1 : 0;
542 for (i = 0; i < 4; ++i) {
543 const size_t distance = (size_t)distance_cache[i];
544 size_t offset;
545 size_t limit;
546 size_t len;
547 if (distance <= boundary || distance > distance_offset) continue;
548 offset = distance_offset - distance;
549 limit = source_size - offset;
550 limit = limit > max_length ? max_length : limit;
551 len = FindMatchLengthWithLimit(&source[offset], &data[cur_ix_masked],
552 limit);
553 if (len >= 2) {
554 score_t score = BackwardReferenceScoreUsingLastDistance(len);
555 if (best_score < score) {
556 if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
557 if (best_score < score) {
558 best_score = score;
559 if (len > best_len) best_len = len;
560 out->len = len;
561 out->len_code_delta = 0;
562 out->distance = distance;
563 out->score = best_score;
564 }
565 }
566 }
567 }
568 while (item == 0) {
569 size_t offset;
570 size_t distance;
571 size_t limit;
572 item = *chain;
573 chain++;
574 offset = item & 0x7FFFFFFF;
575 item &= 0x80000000;
576 distance = distance_offset - offset;
577 limit = source_size - offset;
578 limit = (limit > max_length) ? max_length : limit;
579 if (distance > max_distance) continue;
580 if (cur_ix_masked + best_len > ring_buffer_mask ||
581 best_len >= limit ||
582 data[cur_ix_masked + best_len] != source[offset + best_len]) {
583 continue;
584 }
585 {
586 const size_t len = FindMatchLengthWithLimit(&source[offset],
587 &data[cur_ix_masked],
588 limit);
589 if (len >= 4) {
590 score_t score = BackwardReferenceScore(len, distance);
591 if (best_score < score) {
592 best_score = score;
593 best_len = len;
594 out->len = best_len;
595 out->len_code_delta = 0;
596 out->distance = distance;
597 out->score = best_score;
598 }
599 }
600 }
601 }
602 }
603
604 /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
605 to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
FindAllCompoundDictionaryMatches(const PreparedDictionary * self,const uint8_t * BROTLI_RESTRICT data,const size_t ring_buffer_mask,const size_t cur_ix,const size_t min_length,const size_t max_length,const size_t distance_offset,const size_t max_distance,BackwardMatch * matches,size_t match_limit)606 static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
607 const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
608 const size_t ring_buffer_mask, const size_t cur_ix, const size_t min_length,
609 const size_t max_length, const size_t distance_offset,
610 const size_t max_distance, BackwardMatch* matches, size_t match_limit) {
611 const uint32_t source_offset = self->source_offset;
612 const uint32_t source_size = self->source_size;
613 const uint32_t hash_bits = self->hash_bits;
614 const uint32_t bucket_bits = self->bucket_bits;
615 const uint32_t slot_bits = self->slot_bits;
616
617 const uint32_t hash_shift = 64u - bucket_bits;
618 const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
619 const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
620
621 const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
622 const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
623 const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
624 const uint8_t* source = (uint8_t*)(&items[source_offset]);
625
626 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
627 size_t best_len = min_length;
628 const uint64_t h =
629 (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
630 kPreparedDictionaryHashMul64Long;
631 const uint32_t key = (uint32_t)(h >> hash_shift);
632 const uint32_t slot = key & slot_mask;
633 const uint32_t head = heads[key];
634 const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
635 uint32_t item = (head == 0xFFFF) ? 1 : 0;
636 size_t found = 0;
637 while (item == 0) {
638 size_t offset;
639 size_t distance;
640 size_t limit;
641 size_t len;
642 item = *chain;
643 chain++;
644 offset = item & 0x7FFFFFFF;
645 item &= 0x80000000;
646 distance = distance_offset - offset;
647 limit = source_size - offset;
648 limit = (limit > max_length) ? max_length : limit;
649 if (distance > max_distance) continue;
650 if (cur_ix_masked + best_len > ring_buffer_mask ||
651 best_len >= limit ||
652 data[cur_ix_masked + best_len] != source[offset + best_len]) {
653 continue;
654 }
655 len = FindMatchLengthWithLimit(
656 &source[offset], &data[cur_ix_masked], limit);
657 if (len > best_len) {
658 best_len = len;
659 InitBackwardMatch(matches++, distance, len);
660 found++;
661 if (found == match_limit) break;
662 }
663 }
664 return found;
665 }
666
LookupCompoundDictionaryMatch(const CompoundDictionary * addon,const uint8_t * BROTLI_RESTRICT data,const size_t ring_buffer_mask,const int * BROTLI_RESTRICT distance_cache,const size_t cur_ix,const size_t max_length,const size_t max_ring_buffer_distance,const size_t max_distance,HasherSearchResult * sr)667 static BROTLI_INLINE void LookupCompoundDictionaryMatch(
668 const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
669 const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
670 const size_t cur_ix, const size_t max_length,
671 const size_t max_ring_buffer_distance, const size_t max_distance,
672 HasherSearchResult* sr) {
673 size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
674 size_t d;
675 for (d = 0; d < addon->num_chunks; ++d) {
676 /* Only one prepared dictionary type is currently supported. */
677 FindCompoundDictionaryMatch(
678 (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
679 distance_cache, cur_ix, max_length,
680 base_offset - addon->chunk_offsets[d], max_distance, sr);
681 }
682 }
683
LookupAllCompoundDictionaryMatches(const CompoundDictionary * addon,const uint8_t * BROTLI_RESTRICT data,const size_t ring_buffer_mask,const size_t cur_ix,size_t min_length,const size_t max_length,const size_t max_ring_buffer_distance,const size_t max_distance,BackwardMatch * matches,size_t match_limit)684 static BROTLI_INLINE size_t LookupAllCompoundDictionaryMatches(
685 const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
686 const size_t ring_buffer_mask, const size_t cur_ix, size_t min_length,
687 const size_t max_length, const size_t max_ring_buffer_distance,
688 const size_t max_distance, BackwardMatch* matches,
689 size_t match_limit) {
690 size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
691 size_t d;
692 size_t total_found = 0;
693 for (d = 0; d < addon->num_chunks; ++d) {
694 /* Only one prepared dictionary type is currently supported. */
695 total_found += FindAllCompoundDictionaryMatches(
696 (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
697 cur_ix, min_length, max_length, base_offset - addon->chunk_offsets[d],
698 max_distance, matches + total_found, match_limit - total_found);
699 if (total_found == match_limit) break;
700 if (total_found > 0) {
701 min_length = BackwardMatchLength(&matches[total_found - 1]);
702 }
703 }
704 return total_found;
705 }
706
707 #if defined(__cplusplus) || defined(c_plusplus)
708 } /* extern "C" */
709 #endif
710
711 #endif /* BROTLI_ENC_HASH_H_ */
712