1 /* deflate_medium.c -- The deflate_medium deflate strategy
2  *
3  * Copyright (C) 2013 Intel Corporation. All rights reserved.
4  * Authors:
5  *  Arjan van de Ven    <[email protected]>
6  *
7  * For conditions of distribution and use, see copyright notice in zlib.h
8  */
9 #ifndef NO_MEDIUM_STRATEGY
10 #include "zbuild.h"
11 #include "deflate.h"
12 #include "deflate_p.h"
13 #include "functable.h"
14 
15 struct match {
16     uint16_t match_start;
17     uint16_t match_length;
18     uint16_t strstart;
19     uint16_t orgstart;
20 };
21 
emit_match(deflate_state * s,struct match match)22 static int emit_match(deflate_state *s, struct match match) {
23     int bflush = 0;
24 
25     /* matches that are not long enough we need to emit as literals */
26     if (match.match_length < WANT_MIN_MATCH) {
27         while (match.match_length) {
28             bflush += zng_tr_tally_lit(s, s->window[match.strstart]);
29             s->lookahead--;
30             match.strstart++;
31             match.match_length--;
32         }
33         return bflush;
34     }
35 
36     check_match(s, match.strstart, match.match_start, match.match_length);
37 
38     bflush += zng_tr_tally_dist(s, match.strstart - match.match_start, match.match_length - STD_MIN_MATCH);
39 
40     s->lookahead -= match.match_length;
41     return bflush;
42 }
43 
insert_match(deflate_state * s,struct match match)44 static void insert_match(deflate_state *s, struct match match) {
45     if (UNLIKELY(s->lookahead <= (unsigned int)(match.match_length + WANT_MIN_MATCH)))
46         return;
47 
48     /* matches that are not long enough we need to emit as literals */
49     if (LIKELY(match.match_length < WANT_MIN_MATCH)) {
50         match.strstart++;
51         match.match_length--;
52         if (UNLIKELY(match.match_length > 0)) {
53             if (match.strstart >= match.orgstart) {
54                 if (match.strstart + match.match_length - 1 >= match.orgstart) {
55                     functable.insert_string(s, match.strstart, match.match_length);
56                 } else {
57                     functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
58                 }
59                 match.strstart += match.match_length;
60                 match.match_length = 0;
61             }
62         }
63         return;
64     }
65 
66     /* Insert new strings in the hash table only if the match length
67      * is not too large. This saves time but degrades compression.
68      */
69     if (match.match_length <= 16 * s->max_insert_length && s->lookahead >= WANT_MIN_MATCH) {
70         match.match_length--; /* string at strstart already in table */
71         match.strstart++;
72 
73         if (LIKELY(match.strstart >= match.orgstart)) {
74             if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) {
75                 functable.insert_string(s, match.strstart, match.match_length);
76             } else {
77                 functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
78             }
79         } else if (match.orgstart < match.strstart + match.match_length) {
80             functable.insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart);
81         }
82         match.strstart += match.match_length;
83         match.match_length = 0;
84     } else {
85         match.strstart += match.match_length;
86         match.match_length = 0;
87 
88         if (match.strstart >= (STD_MIN_MATCH - 2))
89             functable.quick_insert_string(s, match.strstart + 2 - STD_MIN_MATCH);
90 
91         /* If lookahead < WANT_MIN_MATCH, ins_h is garbage, but it does not
92          * matter since it will be recomputed at next deflate call.
93          */
94     }
95 }
96 
fizzle_matches(deflate_state * s,struct match * current,struct match * next)97 static void fizzle_matches(deflate_state *s, struct match *current, struct match *next) {
98     Pos limit;
99     unsigned char *match, *orig;
100     int changed = 0;
101     struct match c, n;
102     /* step zero: sanity checks */
103 
104     if (current->match_length <= 1)
105         return;
106 
107     if (UNLIKELY(current->match_length > 1 + next->match_start))
108         return;
109 
110     if (UNLIKELY(current->match_length > 1 + next->strstart))
111         return;
112 
113     match = s->window - current->match_length + 1 + next->match_start;
114     orig  = s->window - current->match_length + 1 + next->strstart;
115 
116     /* quick exit check.. if this fails then don't bother with anything else */
117     if (LIKELY(*match != *orig))
118         return;
119 
120     c = *current;
121     n = *next;
122 
123     /* step one: try to move the "next" match to the left as much as possible */
124     limit = next->strstart > MAX_DIST(s) ? next->strstart - (Pos)MAX_DIST(s) : 0;
125 
126     match = s->window + n.match_start - 1;
127     orig = s->window + n.strstart - 1;
128 
129     while (*match == *orig) {
130         if (UNLIKELY(c.match_length < 1))
131             break;
132         if (UNLIKELY(n.strstart <= limit))
133             break;
134         if (UNLIKELY(n.match_length >= 256))
135             break;
136         if (UNLIKELY(n.match_start <= 1))
137             break;
138 
139         n.strstart--;
140         n.match_start--;
141         n.match_length++;
142         c.match_length--;
143         match--;
144         orig--;
145         changed++;
146     }
147 
148     if (!changed)
149         return;
150 
151     if (c.match_length <= 1 && n.match_length != 2) {
152         n.orgstart++;
153         *current = c;
154         *next = n;
155     } else {
156         return;
157     }
158 }
159 
deflate_medium(deflate_state * s,int flush)160 Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
161     /* Align the first struct to start on a new cacheline, this allows us to fit both structs in one cacheline */
162     ALIGNED_(16) struct match current_match;
163                  struct match next_match;
164 
165     /* For levels below 5, don't check the next position for a better match */
166     int early_exit = s->level < 5;
167 
168     memset(&current_match, 0, sizeof(struct match));
169     memset(&next_match, 0, sizeof(struct match));
170 
171     for (;;) {
172         Pos hash_head = 0;    /* head of the hash chain */
173         int bflush = 0;       /* set if current block must be flushed */
174         int64_t dist;
175 
176         /* Make sure that we always have enough lookahead, except
177          * at the end of the input file. We need STD_MAX_MATCH bytes
178          * for the next match, plus WANT_MIN_MATCH bytes to insert the
179          * string following the next current_match.
180          */
181         if (s->lookahead < MIN_LOOKAHEAD) {
182             fill_window(s);
183             if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
184                 return need_more;
185             }
186             if (UNLIKELY(s->lookahead == 0))
187                 break; /* flush the current block */
188             next_match.match_length = 0;
189         }
190 
191         /* Insert the string window[strstart .. strstart+2] in the
192          * dictionary, and set hash_head to the head of the hash chain:
193          */
194 
195         /* If we already have a future match from a previous round, just use that */
196         if (!early_exit && next_match.match_length > 0) {
197             current_match = next_match;
198             next_match.match_length = 0;
199         } else {
200             hash_head = 0;
201             if (s->lookahead >= WANT_MIN_MATCH) {
202                 hash_head = functable.quick_insert_string(s, s->strstart);
203             }
204 
205             current_match.strstart = (uint16_t)s->strstart;
206             current_match.orgstart = current_match.strstart;
207 
208             /* Find the longest match, discarding those <= prev_length.
209              * At this point we have always match_length < WANT_MIN_MATCH
210              */
211 
212             dist = (int64_t)s->strstart - hash_head;
213             if (dist <= MAX_DIST(s) && dist > 0 && hash_head != 0) {
214                 /* To simplify the code, we prevent matches with the string
215                  * of window index 0 (in particular we have to avoid a match
216                  * of the string with itself at the start of the input file).
217                  */
218                 current_match.match_length = (uint16_t)functable.longest_match(s, hash_head);
219                 current_match.match_start = (uint16_t)s->match_start;
220                 if (UNLIKELY(current_match.match_length < WANT_MIN_MATCH))
221                     current_match.match_length = 1;
222                 if (UNLIKELY(current_match.match_start >= current_match.strstart)) {
223                     /* this can happen due to some restarts */
224                     current_match.match_length = 1;
225                 }
226             } else {
227                 /* Set up the match to be a 1 byte literal */
228                 current_match.match_start = 0;
229                 current_match.match_length = 1;
230             }
231         }
232 
233         insert_match(s, current_match);
234 
235         /* now, look ahead one */
236         if (LIKELY(!early_exit && s->lookahead > MIN_LOOKAHEAD && (uint32_t)(current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD))) {
237             s->strstart = current_match.strstart + current_match.match_length;
238             hash_head = functable.quick_insert_string(s, s->strstart);
239 
240             next_match.strstart = (uint16_t)s->strstart;
241             next_match.orgstart = next_match.strstart;
242 
243             /* Find the longest match, discarding those <= prev_length.
244              * At this point we have always match_length < WANT_MIN_MATCH
245              */
246 
247             dist = (int64_t)s->strstart - hash_head;
248             if (dist <= MAX_DIST(s) && dist > 0 && hash_head != 0) {
249                 /* To simplify the code, we prevent matches with the string
250                  * of window index 0 (in particular we have to avoid a match
251                  * of the string with itself at the start of the input file).
252                  */
253                 next_match.match_length = (uint16_t)functable.longest_match(s, hash_head);
254                 next_match.match_start = (uint16_t)s->match_start;
255                 if (UNLIKELY(next_match.match_start >= next_match.strstart)) {
256                     /* this can happen due to some restarts */
257                     next_match.match_length = 1;
258                 }
259                 if (next_match.match_length < WANT_MIN_MATCH)
260                     next_match.match_length = 1;
261                 else
262                     fizzle_matches(s, &current_match, &next_match);
263             } else {
264                 /* Set up the match to be a 1 byte literal */
265                 next_match.match_start = 0;
266                 next_match.match_length = 1;
267             }
268 
269             s->strstart = current_match.strstart;
270         } else {
271             next_match.match_length = 0;
272         }
273 
274         /* now emit the current match */
275         bflush = emit_match(s, current_match);
276 
277         /* move the "cursor" forward */
278         s->strstart += current_match.match_length;
279 
280         if (UNLIKELY(bflush))
281             FLUSH_BLOCK(s, 0);
282     }
283     s->insert = s->strstart < (STD_MIN_MATCH - 1) ? s->strstart : (STD_MIN_MATCH - 1);
284     if (flush == Z_FINISH) {
285         FLUSH_BLOCK(s, 1);
286         return finish_done;
287     }
288     if (UNLIKELY(s->sym_next))
289         FLUSH_BLOCK(s, 0);
290 
291     return block_done;
292 }
293 #endif
294