1 /* deflate_medium.c -- The deflate_medium deflate strategy
2 *
3 * Copyright (C) 2013 Intel Corporation. All rights reserved.
4 * Authors:
5 * Arjan van de Ven <[email protected]>
6 *
7 * For conditions of distribution and use, see copyright notice in zlib.h
8 */
9 #ifndef NO_MEDIUM_STRATEGY
10 #include "zbuild.h"
11 #include "deflate.h"
12 #include "deflate_p.h"
13 #include "functable.h"
14
15 struct match {
16 uint16_t match_start;
17 uint16_t match_length;
18 uint16_t strstart;
19 uint16_t orgstart;
20 };
21
emit_match(deflate_state * s,struct match match)22 static int emit_match(deflate_state *s, struct match match) {
23 int bflush = 0;
24
25 /* matches that are not long enough we need to emit as literals */
26 if (match.match_length < WANT_MIN_MATCH) {
27 while (match.match_length) {
28 bflush += zng_tr_tally_lit(s, s->window[match.strstart]);
29 s->lookahead--;
30 match.strstart++;
31 match.match_length--;
32 }
33 return bflush;
34 }
35
36 check_match(s, match.strstart, match.match_start, match.match_length);
37
38 bflush += zng_tr_tally_dist(s, match.strstart - match.match_start, match.match_length - STD_MIN_MATCH);
39
40 s->lookahead -= match.match_length;
41 return bflush;
42 }
43
insert_match(deflate_state * s,struct match match)44 static void insert_match(deflate_state *s, struct match match) {
45 if (UNLIKELY(s->lookahead <= (unsigned int)(match.match_length + WANT_MIN_MATCH)))
46 return;
47
48 /* matches that are not long enough we need to emit as literals */
49 if (LIKELY(match.match_length < WANT_MIN_MATCH)) {
50 match.strstart++;
51 match.match_length--;
52 if (UNLIKELY(match.match_length > 0)) {
53 if (match.strstart >= match.orgstart) {
54 if (match.strstart + match.match_length - 1 >= match.orgstart) {
55 functable.insert_string(s, match.strstart, match.match_length);
56 } else {
57 functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
58 }
59 match.strstart += match.match_length;
60 match.match_length = 0;
61 }
62 }
63 return;
64 }
65
66 /* Insert new strings in the hash table only if the match length
67 * is not too large. This saves time but degrades compression.
68 */
69 if (match.match_length <= 16 * s->max_insert_length && s->lookahead >= WANT_MIN_MATCH) {
70 match.match_length--; /* string at strstart already in table */
71 match.strstart++;
72
73 if (LIKELY(match.strstart >= match.orgstart)) {
74 if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) {
75 functable.insert_string(s, match.strstart, match.match_length);
76 } else {
77 functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
78 }
79 } else if (match.orgstart < match.strstart + match.match_length) {
80 functable.insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart);
81 }
82 match.strstart += match.match_length;
83 match.match_length = 0;
84 } else {
85 match.strstart += match.match_length;
86 match.match_length = 0;
87
88 if (match.strstart >= (STD_MIN_MATCH - 2))
89 functable.quick_insert_string(s, match.strstart + 2 - STD_MIN_MATCH);
90
91 /* If lookahead < WANT_MIN_MATCH, ins_h is garbage, but it does not
92 * matter since it will be recomputed at next deflate call.
93 */
94 }
95 }
96
fizzle_matches(deflate_state * s,struct match * current,struct match * next)97 static void fizzle_matches(deflate_state *s, struct match *current, struct match *next) {
98 Pos limit;
99 unsigned char *match, *orig;
100 int changed = 0;
101 struct match c, n;
102 /* step zero: sanity checks */
103
104 if (current->match_length <= 1)
105 return;
106
107 if (UNLIKELY(current->match_length > 1 + next->match_start))
108 return;
109
110 if (UNLIKELY(current->match_length > 1 + next->strstart))
111 return;
112
113 match = s->window - current->match_length + 1 + next->match_start;
114 orig = s->window - current->match_length + 1 + next->strstart;
115
116 /* quick exit check.. if this fails then don't bother with anything else */
117 if (LIKELY(*match != *orig))
118 return;
119
120 c = *current;
121 n = *next;
122
123 /* step one: try to move the "next" match to the left as much as possible */
124 limit = next->strstart > MAX_DIST(s) ? next->strstart - (Pos)MAX_DIST(s) : 0;
125
126 match = s->window + n.match_start - 1;
127 orig = s->window + n.strstart - 1;
128
129 while (*match == *orig) {
130 if (UNLIKELY(c.match_length < 1))
131 break;
132 if (UNLIKELY(n.strstart <= limit))
133 break;
134 if (UNLIKELY(n.match_length >= 256))
135 break;
136 if (UNLIKELY(n.match_start <= 1))
137 break;
138
139 n.strstart--;
140 n.match_start--;
141 n.match_length++;
142 c.match_length--;
143 match--;
144 orig--;
145 changed++;
146 }
147
148 if (!changed)
149 return;
150
151 if (c.match_length <= 1 && n.match_length != 2) {
152 n.orgstart++;
153 *current = c;
154 *next = n;
155 } else {
156 return;
157 }
158 }
159
deflate_medium(deflate_state * s,int flush)160 Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
161 /* Align the first struct to start on a new cacheline, this allows us to fit both structs in one cacheline */
162 ALIGNED_(16) struct match current_match;
163 struct match next_match;
164
165 /* For levels below 5, don't check the next position for a better match */
166 int early_exit = s->level < 5;
167
168 memset(¤t_match, 0, sizeof(struct match));
169 memset(&next_match, 0, sizeof(struct match));
170
171 for (;;) {
172 Pos hash_head = 0; /* head of the hash chain */
173 int bflush = 0; /* set if current block must be flushed */
174 int64_t dist;
175
176 /* Make sure that we always have enough lookahead, except
177 * at the end of the input file. We need STD_MAX_MATCH bytes
178 * for the next match, plus WANT_MIN_MATCH bytes to insert the
179 * string following the next current_match.
180 */
181 if (s->lookahead < MIN_LOOKAHEAD) {
182 fill_window(s);
183 if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
184 return need_more;
185 }
186 if (UNLIKELY(s->lookahead == 0))
187 break; /* flush the current block */
188 next_match.match_length = 0;
189 }
190
191 /* Insert the string window[strstart .. strstart+2] in the
192 * dictionary, and set hash_head to the head of the hash chain:
193 */
194
195 /* If we already have a future match from a previous round, just use that */
196 if (!early_exit && next_match.match_length > 0) {
197 current_match = next_match;
198 next_match.match_length = 0;
199 } else {
200 hash_head = 0;
201 if (s->lookahead >= WANT_MIN_MATCH) {
202 hash_head = functable.quick_insert_string(s, s->strstart);
203 }
204
205 current_match.strstart = (uint16_t)s->strstart;
206 current_match.orgstart = current_match.strstart;
207
208 /* Find the longest match, discarding those <= prev_length.
209 * At this point we have always match_length < WANT_MIN_MATCH
210 */
211
212 dist = (int64_t)s->strstart - hash_head;
213 if (dist <= MAX_DIST(s) && dist > 0 && hash_head != 0) {
214 /* To simplify the code, we prevent matches with the string
215 * of window index 0 (in particular we have to avoid a match
216 * of the string with itself at the start of the input file).
217 */
218 current_match.match_length = (uint16_t)functable.longest_match(s, hash_head);
219 current_match.match_start = (uint16_t)s->match_start;
220 if (UNLIKELY(current_match.match_length < WANT_MIN_MATCH))
221 current_match.match_length = 1;
222 if (UNLIKELY(current_match.match_start >= current_match.strstart)) {
223 /* this can happen due to some restarts */
224 current_match.match_length = 1;
225 }
226 } else {
227 /* Set up the match to be a 1 byte literal */
228 current_match.match_start = 0;
229 current_match.match_length = 1;
230 }
231 }
232
233 insert_match(s, current_match);
234
235 /* now, look ahead one */
236 if (LIKELY(!early_exit && s->lookahead > MIN_LOOKAHEAD && (uint32_t)(current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD))) {
237 s->strstart = current_match.strstart + current_match.match_length;
238 hash_head = functable.quick_insert_string(s, s->strstart);
239
240 next_match.strstart = (uint16_t)s->strstart;
241 next_match.orgstart = next_match.strstart;
242
243 /* Find the longest match, discarding those <= prev_length.
244 * At this point we have always match_length < WANT_MIN_MATCH
245 */
246
247 dist = (int64_t)s->strstart - hash_head;
248 if (dist <= MAX_DIST(s) && dist > 0 && hash_head != 0) {
249 /* To simplify the code, we prevent matches with the string
250 * of window index 0 (in particular we have to avoid a match
251 * of the string with itself at the start of the input file).
252 */
253 next_match.match_length = (uint16_t)functable.longest_match(s, hash_head);
254 next_match.match_start = (uint16_t)s->match_start;
255 if (UNLIKELY(next_match.match_start >= next_match.strstart)) {
256 /* this can happen due to some restarts */
257 next_match.match_length = 1;
258 }
259 if (next_match.match_length < WANT_MIN_MATCH)
260 next_match.match_length = 1;
261 else
262 fizzle_matches(s, ¤t_match, &next_match);
263 } else {
264 /* Set up the match to be a 1 byte literal */
265 next_match.match_start = 0;
266 next_match.match_length = 1;
267 }
268
269 s->strstart = current_match.strstart;
270 } else {
271 next_match.match_length = 0;
272 }
273
274 /* now emit the current match */
275 bflush = emit_match(s, current_match);
276
277 /* move the "cursor" forward */
278 s->strstart += current_match.match_length;
279
280 if (UNLIKELY(bflush))
281 FLUSH_BLOCK(s, 0);
282 }
283 s->insert = s->strstart < (STD_MIN_MATCH - 1) ? s->strstart : (STD_MIN_MATCH - 1);
284 if (flush == Z_FINISH) {
285 FLUSH_BLOCK(s, 1);
286 return finish_done;
287 }
288 if (UNLIKELY(s->sym_next))
289 FLUSH_BLOCK(s, 0);
290
291 return block_done;
292 }
293 #endif
294