xref: /aosp_15_r20/external/cronet/third_party/brotli/enc/hash.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 /* Copyright 2010 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 /* A (forgetful) hash table to the data seen by the compressor, to
8    help create backward references to previous data. */
9 
10 #ifndef BROTLI_ENC_HASH_H_
11 #define BROTLI_ENC_HASH_H_
12 
13 #include <stdlib.h>  /* exit */
14 #include <string.h>  /* memcmp, memset */
15 
16 #include "../common/constants.h"
17 #include "../common/dictionary.h"
18 #include "../common/platform.h"
19 #include <brotli/types.h>
20 #include "encoder_dict.h"
21 #include "fast_log.h"
22 #include "find_match_length.h"
23 #include "memory.h"
24 #include "quality.h"
25 #include "static_dict.h"
26 
27 #if defined(__cplusplus) || defined(c_plusplus)
28 extern "C" {
29 #endif
30 
31 typedef struct {
32   /**
33    * Dynamically allocated areas; regular hasher uses one or two allocations;
34    * "composite" hasher uses up to 4 allocations.
35    */
36   void* extra[4];
37 
38   /**
39    * False before the fisrt invocation of HasherSetup (where "extra" memory)
40    * is allocated.
41    */
42   BROTLI_BOOL is_setup_;
43 
44   size_t dict_num_lookups;
45   size_t dict_num_matches;
46 
47   BrotliHasherParams params;
48 
49   /**
50    * False if hasher needs to be "prepared" before use (before the first
51    * invocation of HasherSetup or after HasherReset). "preparation" is hasher
52    * data initialization (using input ringbuffer).
53    */
54   BROTLI_BOOL is_prepared_;
55 } HasherCommon;
56 
57 #define score_t size_t
58 
59 static const uint32_t kCutoffTransformsCount = 10;
60 /*   0,  12,   27,    23,    42,    63,    56,    48,    59,    64 */
61 /* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
62 static const uint64_t kCutoffTransforms =
63     BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);
64 
65 typedef struct HasherSearchResult {
66   size_t len;
67   size_t distance;
68   score_t score;
69   int len_code_delta; /* == len_code - len */
70 } HasherSearchResult;
71 
72 /* kHashMul32 multiplier has these properties:
73    * The multiplier must be odd. Otherwise we may lose the highest bit.
74    * No long streaks of ones or zeros.
75    * There is no effort to ensure that it is a prime, the oddity is enough
76      for this use.
77    * The number has been tuned heuristically against compression benchmarks. */
78 static const uint32_t kHashMul32 = 0x1E35A7BD;
79 static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
80 static const uint64_t kHashMul64Long =
81     BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
82 
Hash14(const uint8_t * data)83 static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
84   uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
85   /* The higher bits contain more mixture from the multiplication,
86      so we take our results from there. */
87   return h >> (32 - 14);
88 }
89 
PrepareDistanceCache(int * BROTLI_RESTRICT distance_cache,const int num_distances)90 static BROTLI_INLINE void PrepareDistanceCache(
91     int* BROTLI_RESTRICT distance_cache, const int num_distances) {
92   if (num_distances > 4) {
93     int last_distance = distance_cache[0];
94     distance_cache[4] = last_distance - 1;
95     distance_cache[5] = last_distance + 1;
96     distance_cache[6] = last_distance - 2;
97     distance_cache[7] = last_distance + 2;
98     distance_cache[8] = last_distance - 3;
99     distance_cache[9] = last_distance + 3;
100     if (num_distances > 10) {
101       int next_last_distance = distance_cache[1];
102       distance_cache[10] = next_last_distance - 1;
103       distance_cache[11] = next_last_distance + 1;
104       distance_cache[12] = next_last_distance - 2;
105       distance_cache[13] = next_last_distance + 2;
106       distance_cache[14] = next_last_distance - 3;
107       distance_cache[15] = next_last_distance + 3;
108     }
109   }
110 }
111 
112 #define BROTLI_LITERAL_BYTE_SCORE 135
113 #define BROTLI_DISTANCE_BIT_PENALTY 30
114 /* Score must be positive after applying maximal penalty. */
115 #define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
116 
117 /* Usually, we always choose the longest backward reference. This function
118    allows for the exception of that rule.
119 
120    If we choose a backward reference that is further away, it will
121    usually be coded with more bits. We approximate this by assuming
122    log2(distance). If the distance can be expressed in terms of the
123    last four distances, we use some heuristic constants to estimate
124    the bits cost. For the first up to four literals we use the bit
125    cost of the literals from the literal cost model, after that we
126    use the average bit cost of the cost model.
127 
128    This function is used to sometimes discard a longer backward reference
129    when it is not much longer and the bit cost for encoding it is more
130    than the saved literals.
131 
132    backward_reference_offset MUST be positive. */
BackwardReferenceScore(size_t copy_length,size_t backward_reference_offset)133 static BROTLI_INLINE score_t BackwardReferenceScore(
134     size_t copy_length, size_t backward_reference_offset) {
135   return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
136       BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
137 }
138 
BackwardReferenceScoreUsingLastDistance(size_t copy_length)139 static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
140     size_t copy_length) {
141   return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
142       BROTLI_SCORE_BASE + 15;
143 }
144 
BackwardReferencePenaltyUsingLastDistance(size_t distance_short_code)145 static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
146     size_t distance_short_code) {
147   return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
148 }
149 
TestStaticDictionaryItem(const BrotliEncoderDictionary * dictionary,size_t len,size_t word_idx,const uint8_t * data,size_t max_length,size_t max_backward,size_t max_distance,HasherSearchResult * out)150 static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
151     const BrotliEncoderDictionary* dictionary, size_t len, size_t word_idx,
152     const uint8_t* data, size_t max_length, size_t max_backward,
153     size_t max_distance, HasherSearchResult* out) {
154   size_t offset;
155   size_t matchlen;
156   size_t backward;
157   score_t score;
158   offset = dictionary->words->offsets_by_length[len] + len * word_idx;
159   if (len > max_length) {
160     return BROTLI_FALSE;
161   }
162 
163   matchlen =
164       FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len);
165   if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) {
166     return BROTLI_FALSE;
167   }
168   {
169     size_t cut = len - matchlen;
170     size_t transform_id = (cut << 2) +
171         (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
172     backward = max_backward + 1 + word_idx +
173         (transform_id << dictionary->words->size_bits_by_length[len]);
174   }
175   if (backward > max_distance) {
176     return BROTLI_FALSE;
177   }
178   score = BackwardReferenceScore(matchlen, backward);
179   if (score < out->score) {
180     return BROTLI_FALSE;
181   }
182   out->len = matchlen;
183   out->len_code_delta = (int)len - (int)matchlen;
184   out->distance = backward;
185   out->score = score;
186   return BROTLI_TRUE;
187 }
188 
SearchInStaticDictionary(const BrotliEncoderDictionary * dictionary,HasherCommon * common,const uint8_t * data,size_t max_length,size_t max_backward,size_t max_distance,HasherSearchResult * out,BROTLI_BOOL shallow)189 static BROTLI_INLINE void SearchInStaticDictionary(
190     const BrotliEncoderDictionary* dictionary,
191     HasherCommon* common, const uint8_t* data, size_t max_length,
192     size_t max_backward, size_t max_distance,
193     HasherSearchResult* out, BROTLI_BOOL shallow) {
194   size_t key;
195   size_t i;
196   if (common->dict_num_matches < (common->dict_num_lookups >> 7)) {
197     return;
198   }
199   key = Hash14(data) << 1;
200   for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
201     common->dict_num_lookups++;
202     if (dictionary->hash_table_lengths[key] != 0) {
203       BROTLI_BOOL item_matches = TestStaticDictionaryItem(
204           dictionary, dictionary->hash_table_lengths[key],
205           dictionary->hash_table_words[key], data,
206           max_length, max_backward, max_distance, out);
207       if (item_matches) {
208         common->dict_num_matches++;
209       }
210     }
211   }
212 }
213 
214 typedef struct BackwardMatch {
215   uint32_t distance;
216   uint32_t length_and_code;
217 } BackwardMatch;
218 
InitBackwardMatch(BackwardMatch * self,size_t dist,size_t len)219 static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
220     size_t dist, size_t len) {
221   self->distance = (uint32_t)dist;
222   self->length_and_code = (uint32_t)(len << 5);
223 }
224 
InitDictionaryBackwardMatch(BackwardMatch * self,size_t dist,size_t len,size_t len_code)225 static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
226     size_t dist, size_t len, size_t len_code) {
227   self->distance = (uint32_t)dist;
228   self->length_and_code =
229       (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
230 }
231 
BackwardMatchLength(const BackwardMatch * self)232 static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
233   return self->length_and_code >> 5;
234 }
235 
BackwardMatchLengthCode(const BackwardMatch * self)236 static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
237   size_t code = self->length_and_code & 31;
238   return code ? code : BackwardMatchLength(self);
239 }
240 
241 #define EXPAND_CAT(a, b) CAT(a, b)
242 #define CAT(a, b) a ## b
243 #define FN(X) EXPAND_CAT(X, HASHER())
244 
245 #define HASHER() H10
246 #define BUCKET_BITS 17
247 #define MAX_TREE_SEARCH_DEPTH 64
248 #define MAX_TREE_COMP_LENGTH 128
249 #include "hash_to_binary_tree_inc.h"  /* NOLINT(build/include) */
250 #undef MAX_TREE_SEARCH_DEPTH
251 #undef MAX_TREE_COMP_LENGTH
252 #undef BUCKET_BITS
253 #undef HASHER
254 /* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
255 #define MAX_NUM_MATCHES_H10 128
256 
257 /* For BUCKET_SWEEP_BITS == 0, enabling the dictionary lookup makes compression
258    a little faster (0.5% - 1%) and it compresses 0.15% better on small text
259    and HTML inputs. */
260 
261 #define HASHER() H2
262 #define BUCKET_BITS 16
263 #define BUCKET_SWEEP_BITS 0
264 #define HASH_LEN 5
265 #define USE_DICTIONARY 1
266 #include "hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
267 #undef BUCKET_SWEEP_BITS
268 #undef USE_DICTIONARY
269 #undef HASHER
270 
271 #define HASHER() H3
272 #define BUCKET_SWEEP_BITS 1
273 #define USE_DICTIONARY 0
274 #include "hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
275 #undef USE_DICTIONARY
276 #undef BUCKET_SWEEP_BITS
277 #undef BUCKET_BITS
278 #undef HASHER
279 
280 #define HASHER() H4
281 #define BUCKET_BITS 17
282 #define BUCKET_SWEEP_BITS 2
283 #define USE_DICTIONARY 1
284 #include "hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
285 #undef USE_DICTIONARY
286 #undef HASH_LEN
287 #undef BUCKET_SWEEP_BITS
288 #undef BUCKET_BITS
289 #undef HASHER
290 
291 #define HASHER() H5
292 #include "hash_longest_match_inc.h"  /* NOLINT(build/include) */
293 #undef HASHER
294 
295 #define HASHER() H6
296 #include "hash_longest_match64_inc.h"  /* NOLINT(build/include) */
297 #undef HASHER
298 
299 #define BUCKET_BITS 15
300 
301 #define NUM_LAST_DISTANCES_TO_CHECK 4
302 #define NUM_BANKS 1
303 #define BANK_BITS 16
304 #define HASHER() H40
305 #include "hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
306 #undef HASHER
307 #undef NUM_LAST_DISTANCES_TO_CHECK
308 
309 #define NUM_LAST_DISTANCES_TO_CHECK 10
310 #define HASHER() H41
311 #include "hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
312 #undef HASHER
313 #undef NUM_LAST_DISTANCES_TO_CHECK
314 #undef NUM_BANKS
315 #undef BANK_BITS
316 
317 #define NUM_LAST_DISTANCES_TO_CHECK 16
318 #define NUM_BANKS 512
319 #define BANK_BITS 9
320 #define HASHER() H42
321 #include "hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
322 #undef HASHER
323 #undef NUM_LAST_DISTANCES_TO_CHECK
324 #undef NUM_BANKS
325 #undef BANK_BITS
326 
327 #undef BUCKET_BITS
328 
329 #define HASHER() H54
330 #define BUCKET_BITS 20
331 #define BUCKET_SWEEP_BITS 2
332 #define HASH_LEN 7
333 #define USE_DICTIONARY 0
334 #include "hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
335 #undef USE_DICTIONARY
336 #undef HASH_LEN
337 #undef BUCKET_SWEEP_BITS
338 #undef BUCKET_BITS
339 #undef HASHER
340 
341 /* fast large window hashers */
342 
343 #define HASHER() HROLLING_FAST
344 #define CHUNKLEN 32
345 #define JUMP 4
346 #define NUMBUCKETS 16777216
347 #define MASK ((NUMBUCKETS * 64) - 1)
348 #include "hash_rolling_inc.h"  /* NOLINT(build/include) */
349 #undef JUMP
350 #undef HASHER
351 
352 
353 #define HASHER() HROLLING
354 #define JUMP 1
355 #include "hash_rolling_inc.h"  /* NOLINT(build/include) */
356 #undef MASK
357 #undef NUMBUCKETS
358 #undef JUMP
359 #undef CHUNKLEN
360 #undef HASHER
361 
362 #define HASHER() H35
363 #define HASHER_A H3
364 #define HASHER_B HROLLING_FAST
365 #include "hash_composite_inc.h"  /* NOLINT(build/include) */
366 #undef HASHER_A
367 #undef HASHER_B
368 #undef HASHER
369 
370 #define HASHER() H55
371 #define HASHER_A H54
372 #define HASHER_B HROLLING_FAST
373 #include "hash_composite_inc.h"  /* NOLINT(build/include) */
374 #undef HASHER_A
375 #undef HASHER_B
376 #undef HASHER
377 
378 #define HASHER() H65
379 #define HASHER_A H6
380 #define HASHER_B HROLLING
381 #include "hash_composite_inc.h"  /* NOLINT(build/include) */
382 #undef HASHER_A
383 #undef HASHER_B
384 #undef HASHER
385 
386 #undef FN
387 #undef CAT
388 #undef EXPAND_CAT
389 
390 #define FOR_SIMPLE_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
391 #define FOR_COMPOSITE_HASHERS(H) H(35) H(55) H(65)
392 #define FOR_GENERIC_HASHERS(H) FOR_SIMPLE_HASHERS(H) FOR_COMPOSITE_HASHERS(H)
393 #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
394 
395 typedef struct {
396   HasherCommon common;
397 
398   union {
399 #define MEMBER_(N) \
400     H ## N _H ## N;
401     FOR_ALL_HASHERS(MEMBER_)
402 #undef MEMBER_
403   } privat;
404 } Hasher;
405 
406 /* MUST be invoked before any other method. */
HasherInit(Hasher * hasher)407 static BROTLI_INLINE void HasherInit(Hasher* hasher) {
408   hasher->common.is_setup_ = BROTLI_FALSE;
409   hasher->common.extra[0] = NULL;
410   hasher->common.extra[1] = NULL;
411   hasher->common.extra[2] = NULL;
412   hasher->common.extra[3] = NULL;
413 }
414 
DestroyHasher(MemoryManager * m,Hasher * hasher)415 static BROTLI_INLINE void DestroyHasher(MemoryManager* m, Hasher* hasher) {
416   if (hasher->common.extra[0] != NULL) BROTLI_FREE(m, hasher->common.extra[0]);
417   if (hasher->common.extra[1] != NULL) BROTLI_FREE(m, hasher->common.extra[1]);
418   if (hasher->common.extra[2] != NULL) BROTLI_FREE(m, hasher->common.extra[2]);
419   if (hasher->common.extra[3] != NULL) BROTLI_FREE(m, hasher->common.extra[3]);
420 }
421 
HasherReset(Hasher * hasher)422 static BROTLI_INLINE void HasherReset(Hasher* hasher) {
423   hasher->common.is_prepared_ = BROTLI_FALSE;
424 }
425 
HasherSize(const BrotliEncoderParams * params,BROTLI_BOOL one_shot,const size_t input_size,size_t * alloc_size)426 static BROTLI_INLINE void HasherSize(const BrotliEncoderParams* params,
427     BROTLI_BOOL one_shot, const size_t input_size, size_t* alloc_size) {
428   switch (params->hasher.type) {
429 #define SIZE_(N)                                                           \
430     case N:                                                                \
431       HashMemAllocInBytesH ## N(params, one_shot, input_size, alloc_size); \
432       break;
433     FOR_ALL_HASHERS(SIZE_)
434 #undef SIZE_
435     default:
436       break;
437   }
438 }
439 
HasherSetup(MemoryManager * m,Hasher * hasher,BrotliEncoderParams * params,const uint8_t * data,size_t position,size_t input_size,BROTLI_BOOL is_last)440 static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher,
441     BrotliEncoderParams* params, const uint8_t* data, size_t position,
442     size_t input_size, BROTLI_BOOL is_last) {
443   BROTLI_BOOL one_shot = (position == 0 && is_last);
444   if (!hasher->common.is_setup_) {
445     size_t alloc_size[4] = {0};
446     size_t i;
447     ChooseHasher(params, &params->hasher);
448     hasher->common.params = params->hasher;
449     hasher->common.dict_num_lookups = 0;
450     hasher->common.dict_num_matches = 0;
451     HasherSize(params, one_shot, input_size, alloc_size);
452     for (i = 0; i < 4; ++i) {
453       if (alloc_size[i] == 0) continue;
454       hasher->common.extra[i] = BROTLI_ALLOC(m, uint8_t, alloc_size[i]);
455       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(hasher->common.extra[i])) return;
456     }
457     switch (hasher->common.params.type) {
458 #define INITIALIZE_(N)                        \
459       case N:                                 \
460         InitializeH ## N(&hasher->common,     \
461             &hasher->privat._H ## N, params); \
462         break;
463       FOR_ALL_HASHERS(INITIALIZE_);
464 #undef INITIALIZE_
465       default:
466         break;
467     }
468     HasherReset(hasher);
469     hasher->common.is_setup_ = BROTLI_TRUE;
470   }
471 
472   if (!hasher->common.is_prepared_) {
473     switch (hasher->common.params.type) {
474 #define PREPARE_(N)                      \
475       case N:                            \
476         PrepareH ## N(                   \
477             &hasher->privat._H ## N,     \
478             one_shot, input_size, data); \
479         break;
480       FOR_ALL_HASHERS(PREPARE_)
481 #undef PREPARE_
482       default: break;
483     }
484     hasher->common.is_prepared_ = BROTLI_TRUE;
485   }
486 }
487 
InitOrStitchToPreviousBlock(MemoryManager * m,Hasher * hasher,const uint8_t * data,size_t mask,BrotliEncoderParams * params,size_t position,size_t input_size,BROTLI_BOOL is_last)488 static BROTLI_INLINE void InitOrStitchToPreviousBlock(
489     MemoryManager* m, Hasher* hasher, const uint8_t* data, size_t mask,
490     BrotliEncoderParams* params, size_t position, size_t input_size,
491     BROTLI_BOOL is_last) {
492   HasherSetup(m, hasher, params, data, position, input_size, is_last);
493   if (BROTLI_IS_OOM(m)) return;
494   switch (hasher->common.params.type) {
495 #define INIT_(N)                             \
496     case N:                                  \
497       StitchToPreviousBlockH ## N(           \
498           &hasher->privat._H ## N,           \
499           input_size, position, data, mask); \
500     break;
501     FOR_ALL_HASHERS(INIT_)
502 #undef INIT_
503     default: break;
504   }
505 }
506 
507 /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
508        to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
FindCompoundDictionaryMatch(const PreparedDictionary * self,const uint8_t * BROTLI_RESTRICT data,const size_t ring_buffer_mask,const int * BROTLI_RESTRICT distance_cache,const size_t cur_ix,const size_t max_length,const size_t distance_offset,const size_t max_distance,HasherSearchResult * BROTLI_RESTRICT out)509 static BROTLI_INLINE void FindCompoundDictionaryMatch(
510     const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
511     const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
512     const size_t cur_ix, const size_t max_length, const size_t distance_offset,
513     const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
514   const uint32_t source_offset = self->source_offset;
515   const uint32_t source_size = self->source_size;
516   const size_t boundary = distance_offset - source_size;
517   const uint32_t hash_bits = self->hash_bits;
518   const uint32_t bucket_bits = self->bucket_bits;
519   const uint32_t slot_bits = self->slot_bits;
520 
521   const uint32_t hash_shift = 64u - bucket_bits;
522   const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
523   const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
524 
525   const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
526   const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
527   const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
528   const uint8_t* source = (uint8_t*)(&items[source_offset]);
529 
530   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
531   score_t best_score = out->score;
532   size_t best_len = out->len;
533   size_t i;
534   const uint64_t h =
535       (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
536       kPreparedDictionaryHashMul64Long;
537   const uint32_t key = (uint32_t)(h >> hash_shift);
538   const uint32_t slot = key & slot_mask;
539   const uint32_t head = heads[key];
540   const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
541   uint32_t item = (head == 0xFFFF) ? 1 : 0;
542   for (i = 0; i < 4; ++i) {
543     const size_t distance = (size_t)distance_cache[i];
544     size_t offset;
545     size_t limit;
546     size_t len;
547     if (distance <= boundary || distance > distance_offset) continue;
548     offset = distance_offset - distance;
549     limit = source_size - offset;
550     limit = limit > max_length ? max_length : limit;
551     len = FindMatchLengthWithLimit(&source[offset], &data[cur_ix_masked],
552                                    limit);
553     if (len >= 2) {
554       score_t score = BackwardReferenceScoreUsingLastDistance(len);
555       if (best_score < score) {
556         if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
557         if (best_score < score) {
558           best_score = score;
559           if (len > best_len) best_len = len;
560           out->len = len;
561           out->len_code_delta = 0;
562           out->distance = distance;
563           out->score = best_score;
564         }
565       }
566     }
567   }
568   while (item == 0) {
569     size_t offset;
570     size_t distance;
571     size_t limit;
572     item = *chain;
573     chain++;
574     offset = item & 0x7FFFFFFF;
575     item &= 0x80000000;
576     distance = distance_offset - offset;
577     limit = source_size - offset;
578     limit = (limit > max_length) ? max_length : limit;
579     if (distance > max_distance) continue;
580     if (cur_ix_masked + best_len > ring_buffer_mask ||
581         best_len >= limit ||
582         data[cur_ix_masked + best_len] != source[offset + best_len]) {
583       continue;
584     }
585     {
586       const size_t len = FindMatchLengthWithLimit(&source[offset],
587                                                   &data[cur_ix_masked],
588                                                   limit);
589       if (len >= 4) {
590         score_t score = BackwardReferenceScore(len, distance);
591         if (best_score < score) {
592           best_score = score;
593           best_len = len;
594           out->len = best_len;
595           out->len_code_delta = 0;
596           out->distance = distance;
597           out->score = best_score;
598         }
599       }
600     }
601   }
602 }
603 
604 /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
605        to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
FindAllCompoundDictionaryMatches(const PreparedDictionary * self,const uint8_t * BROTLI_RESTRICT data,const size_t ring_buffer_mask,const size_t cur_ix,const size_t min_length,const size_t max_length,const size_t distance_offset,const size_t max_distance,BackwardMatch * matches,size_t match_limit)606 static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
607     const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
608     const size_t ring_buffer_mask, const size_t cur_ix, const size_t min_length,
609     const size_t max_length, const size_t distance_offset,
610     const size_t max_distance, BackwardMatch* matches, size_t match_limit) {
611   const uint32_t source_offset = self->source_offset;
612   const uint32_t source_size = self->source_size;
613   const uint32_t hash_bits = self->hash_bits;
614   const uint32_t bucket_bits = self->bucket_bits;
615   const uint32_t slot_bits = self->slot_bits;
616 
617   const uint32_t hash_shift = 64u - bucket_bits;
618   const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
619   const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
620 
621   const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
622   const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
623   const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
624   const uint8_t* source = (uint8_t*)(&items[source_offset]);
625 
626   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
627   size_t best_len = min_length;
628   const uint64_t h =
629       (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
630       kPreparedDictionaryHashMul64Long;
631   const uint32_t key = (uint32_t)(h >> hash_shift);
632   const uint32_t slot = key & slot_mask;
633   const uint32_t head = heads[key];
634   const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
635   uint32_t item = (head == 0xFFFF) ? 1 : 0;
636   size_t found = 0;
637   while (item == 0) {
638     size_t offset;
639     size_t distance;
640     size_t limit;
641     size_t len;
642     item = *chain;
643     chain++;
644     offset = item & 0x7FFFFFFF;
645     item &= 0x80000000;
646     distance = distance_offset - offset;
647     limit = source_size - offset;
648     limit = (limit > max_length) ? max_length : limit;
649     if (distance > max_distance) continue;
650     if (cur_ix_masked + best_len > ring_buffer_mask ||
651         best_len >= limit ||
652         data[cur_ix_masked + best_len] != source[offset + best_len]) {
653       continue;
654     }
655     len = FindMatchLengthWithLimit(
656         &source[offset], &data[cur_ix_masked], limit);
657     if (len > best_len) {
658       best_len = len;
659       InitBackwardMatch(matches++, distance, len);
660       found++;
661       if (found == match_limit) break;
662     }
663   }
664   return found;
665 }
666 
LookupCompoundDictionaryMatch(const CompoundDictionary * addon,const uint8_t * BROTLI_RESTRICT data,const size_t ring_buffer_mask,const int * BROTLI_RESTRICT distance_cache,const size_t cur_ix,const size_t max_length,const size_t max_ring_buffer_distance,const size_t max_distance,HasherSearchResult * sr)667 static BROTLI_INLINE void LookupCompoundDictionaryMatch(
668     const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
669     const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
670     const size_t cur_ix, const size_t max_length,
671     const size_t max_ring_buffer_distance, const size_t max_distance,
672     HasherSearchResult* sr) {
673   size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
674   size_t d;
675   for (d = 0; d < addon->num_chunks; ++d) {
676     /* Only one prepared dictionary type is currently supported. */
677     FindCompoundDictionaryMatch(
678         (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
679         distance_cache, cur_ix, max_length,
680         base_offset - addon->chunk_offsets[d], max_distance, sr);
681   }
682 }
683 
LookupAllCompoundDictionaryMatches(const CompoundDictionary * addon,const uint8_t * BROTLI_RESTRICT data,const size_t ring_buffer_mask,const size_t cur_ix,size_t min_length,const size_t max_length,const size_t max_ring_buffer_distance,const size_t max_distance,BackwardMatch * matches,size_t match_limit)684 static BROTLI_INLINE size_t LookupAllCompoundDictionaryMatches(
685     const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
686     const size_t ring_buffer_mask, const size_t cur_ix, size_t min_length,
687     const size_t max_length, const size_t max_ring_buffer_distance,
688     const size_t max_distance, BackwardMatch* matches,
689     size_t match_limit) {
690   size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
691   size_t d;
692   size_t total_found = 0;
693   for (d = 0; d < addon->num_chunks; ++d) {
694     /* Only one prepared dictionary type is currently supported. */
695     total_found += FindAllCompoundDictionaryMatches(
696         (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
697         cur_ix, min_length, max_length, base_offset - addon->chunk_offsets[d],
698         max_distance, matches + total_found, match_limit - total_found);
699     if (total_found == match_limit) break;
700     if (total_found > 0) {
701       min_length = BackwardMatchLength(&matches[total_found - 1]);
702     }
703   }
704   return total_found;
705 }
706 
707 #if defined(__cplusplus) || defined(c_plusplus)
708 }  /* extern "C" */
709 #endif
710 
711 #endif  /* BROTLI_ENC_HASH_H_ */
712