1 /* match_tpl.h -- find longest match template for compare256 variants
2  *
3  * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
4  * For conditions of distribution and use, see copyright notice in zlib.h
5  *
6  * Portions copyright (C) 2014-2021 Konstantin Nosov
7  *  Fast-zlib optimized longest_match
8  *  https://github.com/gildor2/fast_zlib
9  */
10 
11 #include "zbuild.h"
12 #include "deflate.h"
13 #include "functable.h"
14 
15 #ifndef MATCH_TPL_H
16 #define MATCH_TPL_H
17 
18 #define EARLY_EXIT_TRIGGER_LEVEL 5
19 
20 #endif
21 
22 /* Set match_start to the longest match starting at the given string and
23  * return its length. Matches shorter or equal to prev_length are discarded,
24  * in which case the result is equal to prev_length and match_start is garbage.
25  *
26  * IN assertions: cur_match is the head of the hash chain for the current
27  * string (strstart) and its distance is <= MAX_DIST, and prev_length >=1
28  * OUT assertion: the match length is not greater than s->lookahead
29  */
LONGEST_MATCH(deflate_state * const s,Pos cur_match)30 Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
31     unsigned int strstart = s->strstart;
32     const unsigned wmask = s->w_mask;
33     unsigned char *window = s->window;
34     unsigned char *scan = window + strstart;
35     Z_REGISTER unsigned char *mbase_start = window;
36     Z_REGISTER unsigned char *mbase_end;
37     const Pos *prev = s->prev;
38     Pos limit;
39 #ifdef LONGEST_MATCH_SLOW
40     Pos limit_base;
41 #else
42     int32_t early_exit;
43 #endif
44     uint32_t chain_length, nice_match, best_len, offset;
45     uint32_t lookahead = s->lookahead;
46     Pos match_offset = 0;
47 #ifdef UNALIGNED_OK
48     uint8_t scan_start[8];
49 #endif
50     uint8_t scan_end[8];
51 
52 #define GOTO_NEXT_CHAIN \
53     if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
54         continue; \
55     return best_len;
56 
57     /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
58     Assert(STD_MAX_MATCH == 258, "Code too clever");
59 
60     best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
61 
62     /* Calculate read offset which should only extend an extra byte
63      * to find the next best match length.
64      */
65     offset = best_len-1;
66 #ifdef UNALIGNED_OK
67     if (best_len >= sizeof(uint32_t)) {
68         offset -= 2;
69 #ifdef UNALIGNED64_OK
70         if (best_len >= sizeof(uint64_t))
71             offset -= 4;
72 #endif
73     }
74 #endif
75 
76 #ifdef UNALIGNED64_OK
77     zmemcpy_8(scan_start, scan);
78     zmemcpy_8(scan_end, scan+offset);
79 #elif defined(UNALIGNED_OK)
80     zmemcpy_4(scan_start, scan);
81     zmemcpy_4(scan_end, scan+offset);
82 #else
83     scan_end[0] = *(scan+offset);
84     scan_end[1] = *(scan+offset+1);
85 #endif
86     mbase_end  = (mbase_start+offset);
87 
88     /* Do not waste too much time if we already have a good match */
89     chain_length = s->max_chain_length;
90     if (best_len >= s->good_match)
91         chain_length >>= 2;
92     nice_match = (uint32_t)s->nice_match;
93 
94     /* Stop when cur_match becomes <= limit. To simplify the code,
95      * we prevent matches with the string of window index 0
96      */
97     limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
98 #ifdef LONGEST_MATCH_SLOW
99     limit_base = limit;
100     if (best_len >= STD_MIN_MATCH) {
101         /* We're continuing search (lazy evaluation). */
102         uint32_t i, hash;
103         Pos pos;
104 
105         /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
106          * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
107          * these strings are not yet inserted into the hash table.
108          */
109         hash = s->update_hash(s, 0, scan[1]);
110         hash = s->update_hash(s, hash, scan[2]);
111 
112         for (i = 3; i <= best_len; i++) {
113             hash = s->update_hash(s, hash, scan[i]);
114 
115             /* If we're starting with best_len >= 3, we can use offset search. */
116             pos = s->head[hash];
117             if (pos < cur_match) {
118                 match_offset = (Pos)(i - 2);
119                 cur_match = pos;
120             }
121         }
122 
123         /* Update offset-dependent variables */
124         limit = limit_base+match_offset;
125         if (cur_match <= limit)
126             goto break_matching;
127         mbase_start -= match_offset;
128         mbase_end -= match_offset;
129     }
130 #else
131     early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
132 #endif
133     Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
134     for (;;) {
135         if (cur_match >= strstart)
136             break;
137 
138         /* Skip to next match if the match length cannot increase or if the match length is
139          * less than 2. Note that the checks below for insufficient lookahead only occur
140          * occasionally for performance reasons.
141          * Therefore uninitialized memory will be accessed and conditional jumps will be made
142          * that depend on those values. However the length of the match is limited to the
143          * lookahead, so the output of deflate is not affected by the uninitialized values.
144          */
145 #ifdef UNALIGNED_OK
146         if (best_len < sizeof(uint32_t)) {
147             for (;;) {
148                 if (zmemcmp_2(mbase_end+cur_match, scan_end) == 0 &&
149                     zmemcmp_2(mbase_start+cur_match, scan_start) == 0)
150                     break;
151                 GOTO_NEXT_CHAIN;
152             }
153 #  ifdef UNALIGNED64_OK
154         } else if (best_len >= sizeof(uint64_t)) {
155             for (;;) {
156                 if (zmemcmp_8(mbase_end+cur_match, scan_end) == 0 &&
157                     zmemcmp_8(mbase_start+cur_match, scan_start) == 0)
158                     break;
159                 GOTO_NEXT_CHAIN;
160             }
161 #  endif
162         } else {
163             for (;;) {
164                 if (zmemcmp_4(mbase_end+cur_match, scan_end) == 0 &&
165                     zmemcmp_4(mbase_start+cur_match, scan_start) == 0)
166                     break;
167                 GOTO_NEXT_CHAIN;
168             }
169         }
170 #else
171         for (;;) {
172             if (mbase_end[cur_match] == scan_end[0] && mbase_end[cur_match+1] == scan_end[1] &&
173                 mbase_start[cur_match] == scan[0] && mbase_start[cur_match+1] == scan[1])
174                 break;
175             GOTO_NEXT_CHAIN;
176         }
177 #endif
178         uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
179         Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
180 
181         if (len > best_len) {
182             uint32_t match_start = cur_match - match_offset;
183             s->match_start = match_start;
184 
185             /* Do not look for matches beyond the end of the input. */
186             if (len > lookahead)
187                 return lookahead;
188             best_len = len;
189             if (best_len >= nice_match)
190                 return best_len;
191 
192             offset = best_len-1;
193 #ifdef UNALIGNED_OK
194             if (best_len >= sizeof(uint32_t)) {
195                 offset -= 2;
196 #ifdef UNALIGNED64_OK
197                 if (best_len >= sizeof(uint64_t))
198                     offset -= 4;
199 #endif
200             }
201 #endif
202 
203 #ifdef UNALIGNED64_OK
204             zmemcpy_8(scan_end, scan+offset);
205 #elif defined(UNALIGNED_OK)
206             zmemcpy_4(scan_end, scan+offset);
207 #else
208             scan_end[0] = *(scan+offset);
209             scan_end[1] = *(scan+offset+1);
210 #endif
211 
212 #ifdef LONGEST_MATCH_SLOW
213             /* Look for a better string offset */
214             if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
215                 Pos pos, next_pos;
216                 uint32_t i, hash;
217                 unsigned char *scan_endstr;
218 
219                 /* Go back to offset 0 */
220                 cur_match -= match_offset;
221                 match_offset = 0;
222                 next_pos = cur_match;
223                 for (i = 0; i <= len - STD_MIN_MATCH; i++) {
224                     pos = prev[(cur_match + i) & wmask];
225                     if (pos < next_pos) {
226                         /* Hash chain is more distant, use it */
227                         if (pos <= limit_base + i)
228                             goto break_matching;
229                         next_pos = pos;
230                         match_offset = (Pos)i;
231                     }
232                 }
233                 /* Switch cur_match to next_pos chain */
234                 cur_match = next_pos;
235 
236                 /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
237                  * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
238                  * us include one more byte into hash - the byte which will be checked
239                  * in main loop now, and which allows to grow match by 1.
240                  */
241                 scan_endstr = scan + len - (STD_MIN_MATCH+1);
242 
243                 hash = s->update_hash(s, 0, scan_endstr[0]);
244                 hash = s->update_hash(s, hash, scan_endstr[1]);
245                 hash = s->update_hash(s, hash, scan_endstr[2]);
246 
247                 pos = s->head[hash];
248                 if (pos < cur_match) {
249                     match_offset = (Pos)(len - (STD_MIN_MATCH+1));
250                     if (pos <= limit_base + match_offset)
251                         goto break_matching;
252                     cur_match = pos;
253                 }
254 
255                 /* Update offset-dependent variables */
256                 limit = limit_base+match_offset;
257                 mbase_start = window-match_offset;
258                 mbase_end = (mbase_start+offset);
259                 continue;
260             }
261 #endif
262             mbase_end = (mbase_start+offset);
263         }
264 #ifndef LONGEST_MATCH_SLOW
265         else if (UNLIKELY(early_exit)) {
266             /* The probability of finding a match later if we here is pretty low, so for
267              * performance it's best to outright stop here for the lower compression levels
268              */
269             break;
270         }
271 #endif
272         GOTO_NEXT_CHAIN;
273     }
274     return best_len;
275 
276 #ifdef LONGEST_MATCH_SLOW
277 break_matching:
278 
279     if (best_len < s->lookahead)
280         return best_len;
281 
282     return s->lookahead;
283 #endif
284 }
285 
286 #undef LONGEST_MATCH_SLOW
287 #undef LONGEST_MATCH
288 #undef COMPARE256
289