xref: /aosp_15_r20/external/brotli/c/enc/hash_rolling_inc.h (revision f4ee7fba7774faf2a30f13154332c0a06550dbc4)
1*f4ee7fbaSAndroid Build Coastguard Worker /* NOLINT(build/header_guard) */
2*f4ee7fbaSAndroid Build Coastguard Worker /* Copyright 2018 Google Inc. All Rights Reserved.
3*f4ee7fbaSAndroid Build Coastguard Worker 
4*f4ee7fbaSAndroid Build Coastguard Worker    Distributed under MIT license.
5*f4ee7fbaSAndroid Build Coastguard Worker    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6*f4ee7fbaSAndroid Build Coastguard Worker */
7*f4ee7fbaSAndroid Build Coastguard Worker 
8*f4ee7fbaSAndroid Build Coastguard Worker /* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
9*f4ee7fbaSAndroid Build Coastguard Worker /* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
10*f4ee7fbaSAndroid Build Coastguard Worker /* JUMP = skip bytes for speedup */
11*f4ee7fbaSAndroid Build Coastguard Worker 
12*f4ee7fbaSAndroid Build Coastguard Worker /* Rolling hash for long distance long string matches. Stores one position
13*f4ee7fbaSAndroid Build Coastguard Worker    per bucket, bucket key is computed over a long region. */
14*f4ee7fbaSAndroid Build Coastguard Worker 
15*f4ee7fbaSAndroid Build Coastguard Worker #define HashRolling HASHER()
16*f4ee7fbaSAndroid Build Coastguard Worker 
17*f4ee7fbaSAndroid Build Coastguard Worker static const uint32_t FN(kRollingHashMul32) = 69069;
18*f4ee7fbaSAndroid Build Coastguard Worker static const uint32_t FN(kInvalidPos) = 0xffffffff;
19*f4ee7fbaSAndroid Build Coastguard Worker 
20*f4ee7fbaSAndroid Build Coastguard Worker /* This hasher uses a longer forward length, but returning a higher value here
21*f4ee7fbaSAndroid Build Coastguard Worker    will hurt compression by the main hasher when combined with a composite
22*f4ee7fbaSAndroid Build Coastguard Worker    hasher. The hasher tests for forward itself instead. */
FN(HashTypeLength)23*f4ee7fbaSAndroid Build Coastguard Worker static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
FN(StoreLookahead)24*f4ee7fbaSAndroid Build Coastguard Worker static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
25*f4ee7fbaSAndroid Build Coastguard Worker 
26*f4ee7fbaSAndroid Build Coastguard Worker /* Computes a code from a single byte. A lookup table of 256 values could be
27*f4ee7fbaSAndroid Build Coastguard Worker    used, but simply adding 1 works about as good. */
FN(HashByte)28*f4ee7fbaSAndroid Build Coastguard Worker static uint32_t FN(HashByte)(uint8_t byte) {
29*f4ee7fbaSAndroid Build Coastguard Worker   return (uint32_t)byte + 1u;
30*f4ee7fbaSAndroid Build Coastguard Worker }
31*f4ee7fbaSAndroid Build Coastguard Worker 
FN(HashRollingFunctionInitial)32*f4ee7fbaSAndroid Build Coastguard Worker static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
33*f4ee7fbaSAndroid Build Coastguard Worker                                                uint32_t factor) {
34*f4ee7fbaSAndroid Build Coastguard Worker   return (uint32_t)(factor * state + FN(HashByte)(add));
35*f4ee7fbaSAndroid Build Coastguard Worker }
36*f4ee7fbaSAndroid Build Coastguard Worker 
FN(HashRollingFunction)37*f4ee7fbaSAndroid Build Coastguard Worker static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
38*f4ee7fbaSAndroid Build Coastguard Worker                                         uint8_t rem, uint32_t factor,
39*f4ee7fbaSAndroid Build Coastguard Worker                                         uint32_t factor_remove) {
40*f4ee7fbaSAndroid Build Coastguard Worker   return (uint32_t)(factor * state +
41*f4ee7fbaSAndroid Build Coastguard Worker       FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
42*f4ee7fbaSAndroid Build Coastguard Worker }
43*f4ee7fbaSAndroid Build Coastguard Worker 
44*f4ee7fbaSAndroid Build Coastguard Worker typedef struct HashRolling {
45*f4ee7fbaSAndroid Build Coastguard Worker   uint32_t state;
46*f4ee7fbaSAndroid Build Coastguard Worker   uint32_t* table;
47*f4ee7fbaSAndroid Build Coastguard Worker   size_t next_ix;
48*f4ee7fbaSAndroid Build Coastguard Worker 
49*f4ee7fbaSAndroid Build Coastguard Worker   uint32_t chunk_len;
50*f4ee7fbaSAndroid Build Coastguard Worker   uint32_t factor;
51*f4ee7fbaSAndroid Build Coastguard Worker   uint32_t factor_remove;
52*f4ee7fbaSAndroid Build Coastguard Worker } HashRolling;
53*f4ee7fbaSAndroid Build Coastguard Worker 
FN(Initialize)54*f4ee7fbaSAndroid Build Coastguard Worker static void FN(Initialize)(
55*f4ee7fbaSAndroid Build Coastguard Worker     HasherCommon* common, HashRolling* BROTLI_RESTRICT self,
56*f4ee7fbaSAndroid Build Coastguard Worker     const BrotliEncoderParams* params) {
57*f4ee7fbaSAndroid Build Coastguard Worker   size_t i;
58*f4ee7fbaSAndroid Build Coastguard Worker   self->state = 0;
59*f4ee7fbaSAndroid Build Coastguard Worker   self->next_ix = 0;
60*f4ee7fbaSAndroid Build Coastguard Worker 
61*f4ee7fbaSAndroid Build Coastguard Worker   self->factor = FN(kRollingHashMul32);
62*f4ee7fbaSAndroid Build Coastguard Worker 
63*f4ee7fbaSAndroid Build Coastguard Worker   /* Compute the factor of the oldest byte to remove: factor**steps modulo
64*f4ee7fbaSAndroid Build Coastguard Worker      0xffffffff (the multiplications rely on 32-bit overflow) */
65*f4ee7fbaSAndroid Build Coastguard Worker   self->factor_remove = 1;
66*f4ee7fbaSAndroid Build Coastguard Worker   for (i = 0; i < CHUNKLEN; i += JUMP) {
67*f4ee7fbaSAndroid Build Coastguard Worker     self->factor_remove *= self->factor;
68*f4ee7fbaSAndroid Build Coastguard Worker   }
69*f4ee7fbaSAndroid Build Coastguard Worker 
70*f4ee7fbaSAndroid Build Coastguard Worker   self->table = (uint32_t*)common->extra;
71*f4ee7fbaSAndroid Build Coastguard Worker   for (i = 0; i < NUMBUCKETS; i++) {
72*f4ee7fbaSAndroid Build Coastguard Worker     self->table[i] = FN(kInvalidPos);
73*f4ee7fbaSAndroid Build Coastguard Worker   }
74*f4ee7fbaSAndroid Build Coastguard Worker 
75*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(params);
76*f4ee7fbaSAndroid Build Coastguard Worker }
77*f4ee7fbaSAndroid Build Coastguard Worker 
FN(Prepare)78*f4ee7fbaSAndroid Build Coastguard Worker static void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
79*f4ee7fbaSAndroid Build Coastguard Worker     size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
80*f4ee7fbaSAndroid Build Coastguard Worker   size_t i;
81*f4ee7fbaSAndroid Build Coastguard Worker   /* Too small size, cannot use this hasher. */
82*f4ee7fbaSAndroid Build Coastguard Worker   if (input_size < CHUNKLEN) return;
83*f4ee7fbaSAndroid Build Coastguard Worker   self->state = 0;
84*f4ee7fbaSAndroid Build Coastguard Worker   for (i = 0; i < CHUNKLEN; i += JUMP) {
85*f4ee7fbaSAndroid Build Coastguard Worker     self->state = FN(HashRollingFunctionInitial)(
86*f4ee7fbaSAndroid Build Coastguard Worker         self->state, data[i], self->factor);
87*f4ee7fbaSAndroid Build Coastguard Worker   }
88*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(one_shot);
89*f4ee7fbaSAndroid Build Coastguard Worker }
90*f4ee7fbaSAndroid Build Coastguard Worker 
FN(HashMemAllocInBytes)91*f4ee7fbaSAndroid Build Coastguard Worker static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
92*f4ee7fbaSAndroid Build Coastguard Worker     const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
93*f4ee7fbaSAndroid Build Coastguard Worker     size_t input_size) {
94*f4ee7fbaSAndroid Build Coastguard Worker   return NUMBUCKETS * sizeof(uint32_t);
95*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(params);
96*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(one_shot);
97*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(input_size);
98*f4ee7fbaSAndroid Build Coastguard Worker }
99*f4ee7fbaSAndroid Build Coastguard Worker 
FN(Store)100*f4ee7fbaSAndroid Build Coastguard Worker static BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self,
101*f4ee7fbaSAndroid Build Coastguard Worker     const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
102*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(self);
103*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(data);
104*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(mask);
105*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(ix);
106*f4ee7fbaSAndroid Build Coastguard Worker }
107*f4ee7fbaSAndroid Build Coastguard Worker 
FN(StoreRange)108*f4ee7fbaSAndroid Build Coastguard Worker static BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self,
109*f4ee7fbaSAndroid Build Coastguard Worker     const uint8_t* BROTLI_RESTRICT data, const size_t mask,
110*f4ee7fbaSAndroid Build Coastguard Worker     const size_t ix_start, const size_t ix_end) {
111*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(self);
112*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(data);
113*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(mask);
114*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(ix_start);
115*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(ix_end);
116*f4ee7fbaSAndroid Build Coastguard Worker }
117*f4ee7fbaSAndroid Build Coastguard Worker 
FN(StitchToPreviousBlock)118*f4ee7fbaSAndroid Build Coastguard Worker static BROTLI_INLINE void FN(StitchToPreviousBlock)(
119*f4ee7fbaSAndroid Build Coastguard Worker     HashRolling* BROTLI_RESTRICT self,
120*f4ee7fbaSAndroid Build Coastguard Worker     size_t num_bytes, size_t position, const uint8_t* ringbuffer,
121*f4ee7fbaSAndroid Build Coastguard Worker     size_t ring_buffer_mask) {
122*f4ee7fbaSAndroid Build Coastguard Worker   /* In this case we must re-initialize the hasher from scratch from the
123*f4ee7fbaSAndroid Build Coastguard Worker      current position. */
124*f4ee7fbaSAndroid Build Coastguard Worker   size_t position_masked;
125*f4ee7fbaSAndroid Build Coastguard Worker   size_t available = num_bytes;
126*f4ee7fbaSAndroid Build Coastguard Worker   if ((position & (JUMP - 1)) != 0) {
127*f4ee7fbaSAndroid Build Coastguard Worker     size_t diff = JUMP - (position & (JUMP - 1));
128*f4ee7fbaSAndroid Build Coastguard Worker     available = (diff > available) ? 0 : (available - diff);
129*f4ee7fbaSAndroid Build Coastguard Worker     position += diff;
130*f4ee7fbaSAndroid Build Coastguard Worker   }
131*f4ee7fbaSAndroid Build Coastguard Worker   position_masked = position & ring_buffer_mask;
132*f4ee7fbaSAndroid Build Coastguard Worker   /* wrapping around ringbuffer not handled. */
133*f4ee7fbaSAndroid Build Coastguard Worker   if (available > ring_buffer_mask - position_masked) {
134*f4ee7fbaSAndroid Build Coastguard Worker     available = ring_buffer_mask - position_masked;
135*f4ee7fbaSAndroid Build Coastguard Worker   }
136*f4ee7fbaSAndroid Build Coastguard Worker 
137*f4ee7fbaSAndroid Build Coastguard Worker   FN(Prepare)(self, BROTLI_FALSE, available,
138*f4ee7fbaSAndroid Build Coastguard Worker       ringbuffer + (position & ring_buffer_mask));
139*f4ee7fbaSAndroid Build Coastguard Worker   self->next_ix = position;
140*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(num_bytes);
141*f4ee7fbaSAndroid Build Coastguard Worker }
142*f4ee7fbaSAndroid Build Coastguard Worker 
FN(PrepareDistanceCache)143*f4ee7fbaSAndroid Build Coastguard Worker static BROTLI_INLINE void FN(PrepareDistanceCache)(
144*f4ee7fbaSAndroid Build Coastguard Worker     HashRolling* BROTLI_RESTRICT self,
145*f4ee7fbaSAndroid Build Coastguard Worker     int* BROTLI_RESTRICT distance_cache) {
146*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(self);
147*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(distance_cache);
148*f4ee7fbaSAndroid Build Coastguard Worker }
149*f4ee7fbaSAndroid Build Coastguard Worker 
FN(FindLongestMatch)150*f4ee7fbaSAndroid Build Coastguard Worker static BROTLI_INLINE void FN(FindLongestMatch)(
151*f4ee7fbaSAndroid Build Coastguard Worker     HashRolling* BROTLI_RESTRICT self,
152*f4ee7fbaSAndroid Build Coastguard Worker     const BrotliEncoderDictionary* dictionary,
153*f4ee7fbaSAndroid Build Coastguard Worker     const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
154*f4ee7fbaSAndroid Build Coastguard Worker     const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
155*f4ee7fbaSAndroid Build Coastguard Worker     const size_t max_length, const size_t max_backward,
156*f4ee7fbaSAndroid Build Coastguard Worker     const size_t dictionary_distance, const size_t max_distance,
157*f4ee7fbaSAndroid Build Coastguard Worker     HasherSearchResult* BROTLI_RESTRICT out) {
158*f4ee7fbaSAndroid Build Coastguard Worker   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
159*f4ee7fbaSAndroid Build Coastguard Worker   size_t pos;
160*f4ee7fbaSAndroid Build Coastguard Worker 
161*f4ee7fbaSAndroid Build Coastguard Worker   if ((cur_ix & (JUMP - 1)) != 0) return;
162*f4ee7fbaSAndroid Build Coastguard Worker 
163*f4ee7fbaSAndroid Build Coastguard Worker   /* Not enough lookahead */
164*f4ee7fbaSAndroid Build Coastguard Worker   if (max_length < CHUNKLEN) return;
165*f4ee7fbaSAndroid Build Coastguard Worker 
166*f4ee7fbaSAndroid Build Coastguard Worker   for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
167*f4ee7fbaSAndroid Build Coastguard Worker     uint32_t code = self->state & MASK;
168*f4ee7fbaSAndroid Build Coastguard Worker 
169*f4ee7fbaSAndroid Build Coastguard Worker     uint8_t rem = data[pos & ring_buffer_mask];
170*f4ee7fbaSAndroid Build Coastguard Worker     uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
171*f4ee7fbaSAndroid Build Coastguard Worker     size_t found_ix = FN(kInvalidPos);
172*f4ee7fbaSAndroid Build Coastguard Worker 
173*f4ee7fbaSAndroid Build Coastguard Worker     self->state = FN(HashRollingFunction)(
174*f4ee7fbaSAndroid Build Coastguard Worker         self->state, add, rem, self->factor, self->factor_remove);
175*f4ee7fbaSAndroid Build Coastguard Worker 
176*f4ee7fbaSAndroid Build Coastguard Worker     if (code < NUMBUCKETS) {
177*f4ee7fbaSAndroid Build Coastguard Worker       found_ix = self->table[code];
178*f4ee7fbaSAndroid Build Coastguard Worker       self->table[code] = (uint32_t)pos;
179*f4ee7fbaSAndroid Build Coastguard Worker       if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
180*f4ee7fbaSAndroid Build Coastguard Worker         /* The cast to 32-bit makes backward distances up to 4GB work even
181*f4ee7fbaSAndroid Build Coastguard Worker            if cur_ix is above 4GB, despite using 32-bit values in the table. */
182*f4ee7fbaSAndroid Build Coastguard Worker         size_t backward = (uint32_t)(cur_ix - found_ix);
183*f4ee7fbaSAndroid Build Coastguard Worker         if (backward <= max_backward) {
184*f4ee7fbaSAndroid Build Coastguard Worker           const size_t found_ix_masked = found_ix & ring_buffer_mask;
185*f4ee7fbaSAndroid Build Coastguard Worker           const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
186*f4ee7fbaSAndroid Build Coastguard Worker                                                       &data[cur_ix_masked],
187*f4ee7fbaSAndroid Build Coastguard Worker                                                       max_length);
188*f4ee7fbaSAndroid Build Coastguard Worker           if (len >= 4 && len > out->len) {
189*f4ee7fbaSAndroid Build Coastguard Worker             score_t score = BackwardReferenceScore(len, backward);
190*f4ee7fbaSAndroid Build Coastguard Worker             if (score > out->score) {
191*f4ee7fbaSAndroid Build Coastguard Worker               out->len = len;
192*f4ee7fbaSAndroid Build Coastguard Worker               out->distance = backward;
193*f4ee7fbaSAndroid Build Coastguard Worker               out->score = score;
194*f4ee7fbaSAndroid Build Coastguard Worker               out->len_code_delta = 0;
195*f4ee7fbaSAndroid Build Coastguard Worker             }
196*f4ee7fbaSAndroid Build Coastguard Worker           }
197*f4ee7fbaSAndroid Build Coastguard Worker         }
198*f4ee7fbaSAndroid Build Coastguard Worker       }
199*f4ee7fbaSAndroid Build Coastguard Worker     }
200*f4ee7fbaSAndroid Build Coastguard Worker   }
201*f4ee7fbaSAndroid Build Coastguard Worker 
202*f4ee7fbaSAndroid Build Coastguard Worker   self->next_ix = cur_ix + JUMP;
203*f4ee7fbaSAndroid Build Coastguard Worker 
204*f4ee7fbaSAndroid Build Coastguard Worker   /* NOTE: this hasher does not search in the dictionary. It is used as
205*f4ee7fbaSAndroid Build Coastguard Worker      backup-hasher, the main hasher already searches in it. */
206*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(dictionary);
207*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(distance_cache);
208*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(dictionary_distance);
209*f4ee7fbaSAndroid Build Coastguard Worker   BROTLI_UNUSED(max_distance);
210*f4ee7fbaSAndroid Build Coastguard Worker }
211*f4ee7fbaSAndroid Build Coastguard Worker 
212*f4ee7fbaSAndroid Build Coastguard Worker #undef HashRolling
213