1 /* dfltcc_deflate.c - IBM Z DEFLATE CONVERSION CALL compression support. */
2
3 /*
4 Use the following commands to build zlib-ng with DFLTCC compression support:
5
6 $ ./configure --with-dfltcc-deflate
7 or
8
9 $ cmake -DWITH_DFLTCC_DEFLATE=1 .
10
11 and then
12
13 $ make
14 */
15
16 #include "zbuild.h"
17 #include "deflate.h"
18 #include "trees_emit.h"
19 #include "dfltcc_deflate.h"
20 #include "dfltcc_detail.h"
21
22 struct dfltcc_deflate_state {
23 struct dfltcc_state common;
24 uint16_t level_mask; /* Levels on which to use DFLTCC */
25 uint32_t block_size; /* New block each X bytes */
26 size_t block_threshold; /* New block after total_in > X */
27 uint32_t dht_threshold; /* New block only if avail_in >= X */
28 };
29
30 #define GET_DFLTCC_DEFLATE_STATE(state) ((struct dfltcc_deflate_state *)GET_DFLTCC_STATE(state))
31
PREFIX(dfltcc_alloc_deflate_state)32 void Z_INTERNAL *PREFIX(dfltcc_alloc_deflate_state)(PREFIX3(streamp) strm) {
33 return dfltcc_alloc_state(strm, sizeof(deflate_state), sizeof(struct dfltcc_deflate_state));
34 }
35
PREFIX(dfltcc_reset_deflate_state)36 void Z_INTERNAL PREFIX(dfltcc_reset_deflate_state)(PREFIX3(streamp) strm) {
37 deflate_state *state = (deflate_state *)strm->state;
38 struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state);
39
40 dfltcc_reset_state(&dfltcc_state->common);
41
42 /* Initialize tuning parameters */
43 dfltcc_state->level_mask = DFLTCC_LEVEL_MASK;
44 dfltcc_state->block_size = DFLTCC_BLOCK_SIZE;
45 dfltcc_state->block_threshold = DFLTCC_FIRST_FHT_BLOCK_SIZE;
46 dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE;
47 }
48
PREFIX(dfltcc_copy_deflate_state)49 void Z_INTERNAL PREFIX(dfltcc_copy_deflate_state)(void *dst, const void *src) {
50 dfltcc_copy_state(dst, src, sizeof(deflate_state), sizeof(struct dfltcc_deflate_state));
51 }
52
dfltcc_can_deflate_with_params(PREFIX3 (streamp)strm,int level,uInt window_bits,int strategy,int reproducible)53 static inline int dfltcc_can_deflate_with_params(PREFIX3(streamp) strm, int level, uInt window_bits, int strategy,
54 int reproducible) {
55 deflate_state *state = (deflate_state *)strm->state;
56 struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state);
57
58 /* Unsupported compression settings */
59 if ((dfltcc_state->level_mask & (1 << level)) == 0)
60 return 0;
61 if (window_bits != HB_BITS)
62 return 0;
63 if (strategy != Z_FIXED && strategy != Z_DEFAULT_STRATEGY)
64 return 0;
65 if (reproducible)
66 return 0;
67
68 /* Unsupported hardware */
69 if (!is_bit_set(dfltcc_state->common.af.fns, DFLTCC_GDHT) ||
70 !is_bit_set(dfltcc_state->common.af.fns, DFLTCC_CMPR) ||
71 !is_bit_set(dfltcc_state->common.af.fmts, DFLTCC_FMT0))
72 return 0;
73
74 return 1;
75 }
76
PREFIX(dfltcc_can_deflate)77 int Z_INTERNAL PREFIX(dfltcc_can_deflate)(PREFIX3(streamp) strm) {
78 deflate_state *state = (deflate_state *)strm->state;
79
80 return dfltcc_can_deflate_with_params(strm, state->level, state->w_bits, state->strategy, state->reproducible);
81 }
82
dfltcc_gdht(PREFIX3 (streamp)strm)83 static inline void dfltcc_gdht(PREFIX3(streamp) strm) {
84 deflate_state *state = (deflate_state *)strm->state;
85 struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
86 size_t avail_in = strm->avail_in;
87
88 dfltcc(DFLTCC_GDHT, param, NULL, NULL, &strm->next_in, &avail_in, NULL);
89 }
90
dfltcc_cmpr(PREFIX3 (streamp)strm)91 static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm) {
92 deflate_state *state = (deflate_state *)strm->state;
93 struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
94 size_t avail_in = strm->avail_in;
95 size_t avail_out = strm->avail_out;
96 dfltcc_cc cc;
97
98 cc = dfltcc(DFLTCC_CMPR | HBT_CIRCULAR,
99 param, &strm->next_out, &avail_out,
100 &strm->next_in, &avail_in, state->window);
101 strm->total_in += (strm->avail_in - avail_in);
102 strm->total_out += (strm->avail_out - avail_out);
103 strm->avail_in = avail_in;
104 strm->avail_out = avail_out;
105 return cc;
106 }
107
send_eobs(PREFIX3 (streamp)strm,const struct dfltcc_param_v0 * param)108 static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 *param) {
109 deflate_state *state = (deflate_state *)strm->state;
110
111 send_bits(state, PREFIX(bi_reverse)(param->eobs >> (15 - param->eobl), param->eobl), param->eobl, state->bi_buf, state->bi_valid);
112 PREFIX(flush_pending)(strm);
113 if (state->pending != 0) {
114 /* The remaining data is located in pending_out[0:pending]. If someone
115 * calls put_byte() - this might happen in deflate() - the byte will be
116 * placed into pending_buf[pending], which is incorrect. Move the
117 * remaining data to the beginning of pending_buf so that put_byte() is
118 * usable again.
119 */
120 memmove(state->pending_buf, state->pending_out, state->pending);
121 state->pending_out = state->pending_buf;
122 }
123 #ifdef ZLIB_DEBUG
124 state->compressed_len += param->eobl;
125 #endif
126 }
127
PREFIX(dfltcc_deflate)128 int Z_INTERNAL PREFIX(dfltcc_deflate)(PREFIX3(streamp) strm, int flush, block_state *result) {
129 deflate_state *state = (deflate_state *)strm->state;
130 struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state);
131 struct dfltcc_param_v0 *param = &dfltcc_state->common.param;
132 uInt masked_avail_in;
133 dfltcc_cc cc;
134 int need_empty_block;
135 int soft_bcc;
136 int no_flush;
137
138 if (!PREFIX(dfltcc_can_deflate)(strm)) {
139 /* Clear history. */
140 if (flush == Z_FULL_FLUSH)
141 param->hl = 0;
142 return 0;
143 }
144
145 again:
146 masked_avail_in = 0;
147 soft_bcc = 0;
148 no_flush = flush == Z_NO_FLUSH;
149
150 /* No input data. Return, except when Continuation Flag is set, which means
151 * that DFLTCC has buffered some output in the parameter block and needs to
152 * be called again in order to flush it.
153 */
154 if (strm->avail_in == 0 && !param->cf) {
155 /* A block is still open, and the hardware does not support closing
156 * blocks without adding data. Thus, close it manually.
157 */
158 if (!no_flush && param->bcf) {
159 send_eobs(strm, param);
160 param->bcf = 0;
161 }
162 /* Let one of deflate_* functions write a trailing empty block. */
163 if (flush == Z_FINISH)
164 return 0;
165 /* Clear history. */
166 if (flush == Z_FULL_FLUSH)
167 param->hl = 0;
168 /* Trigger block post-processing if necessary. */
169 *result = no_flush ? need_more : block_done;
170 return 1;
171 }
172
173 /* There is an open non-BFINAL block, we are not going to close it just
174 * yet, we have compressed more than DFLTCC_BLOCK_SIZE bytes and we see
175 * more than DFLTCC_DHT_MIN_SAMPLE_SIZE bytes. Open a new block with a new
176 * DHT in order to adapt to a possibly changed input data distribution.
177 */
178 if (param->bcf && no_flush &&
179 strm->total_in > dfltcc_state->block_threshold &&
180 strm->avail_in >= dfltcc_state->dht_threshold) {
181 if (param->cf) {
182 /* We need to flush the DFLTCC buffer before writing the
183 * End-of-block Symbol. Mask the input data and proceed as usual.
184 */
185 masked_avail_in += strm->avail_in;
186 strm->avail_in = 0;
187 no_flush = 0;
188 } else {
189 /* DFLTCC buffer is empty, so we can manually write the
190 * End-of-block Symbol right away.
191 */
192 send_eobs(strm, param);
193 param->bcf = 0;
194 dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size;
195 }
196 }
197
198 /* No space for compressed data. If we proceed, dfltcc_cmpr() will return
199 * DFLTCC_CC_OP1_TOO_SHORT without buffering header bits, but we will still
200 * set BCF=1, which is wrong. Avoid complications and return early.
201 */
202 if (strm->avail_out == 0) {
203 *result = need_more;
204 return 1;
205 }
206
207 /* The caller gave us too much data. Pass only one block worth of
208 * uncompressed data to DFLTCC and mask the rest, so that on the next
209 * iteration we start a new block.
210 */
211 if (no_flush && strm->avail_in > dfltcc_state->block_size) {
212 masked_avail_in += (strm->avail_in - dfltcc_state->block_size);
213 strm->avail_in = dfltcc_state->block_size;
214 }
215
216 /* When we have an open non-BFINAL deflate block and caller indicates that
217 * the stream is ending, we need to close an open deflate block and open a
218 * BFINAL one.
219 */
220 need_empty_block = flush == Z_FINISH && param->bcf && !param->bhf;
221
222 /* Translate stream to parameter block */
223 param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32;
224 if (!no_flush)
225 /* We need to close a block. Always do this in software - when there is
226 * no input data, the hardware will not honor BCC. */
227 soft_bcc = 1;
228 if (flush == Z_FINISH && !param->bcf)
229 /* We are about to open a BFINAL block, set Block Header Final bit
230 * until the stream ends.
231 */
232 param->bhf = 1;
233 /* DFLTCC-CMPR will write to next_out, so make sure that buffers with
234 * higher precedence are empty.
235 */
236 Assert(state->pending == 0, "There must be no pending bytes");
237 Assert(state->bi_valid < 8, "There must be less than 8 pending bits");
238 param->sbb = (unsigned int)state->bi_valid;
239 if (param->sbb > 0)
240 *strm->next_out = (unsigned char)state->bi_buf;
241 /* Honor history and check value */
242 param->nt = 0;
243 param->cv = state->wrap == 2 ? ZSWAP32(state->crc_fold.value) : strm->adler;
244
245 /* When opening a block, choose a Huffman-Table Type */
246 if (!param->bcf) {
247 if (state->strategy == Z_FIXED || (strm->total_in == 0 && dfltcc_state->block_threshold > 0))
248 param->htt = HTT_FIXED;
249 else {
250 param->htt = HTT_DYNAMIC;
251 dfltcc_gdht(strm);
252 }
253 }
254
255 /* Deflate */
256 do {
257 cc = dfltcc_cmpr(strm);
258 if (strm->avail_in < 4096 && masked_avail_in > 0)
259 /* We are about to call DFLTCC with a small input buffer, which is
260 * inefficient. Since there is masked data, there will be at least
261 * one more DFLTCC call, so skip the current one and make the next
262 * one handle more data.
263 */
264 break;
265 } while (cc == DFLTCC_CC_AGAIN);
266
267 /* Translate parameter block to stream */
268 strm->msg = oesc_msg(dfltcc_state->common.msg, param->oesc);
269 state->bi_valid = param->sbb;
270 if (state->bi_valid == 0)
271 state->bi_buf = 0; /* Avoid accessing next_out */
272 else
273 state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1);
274 if (state->wrap == 2)
275 state->crc_fold.value = ZSWAP32(param->cv);
276 else
277 strm->adler = param->cv;
278
279 /* Unmask the input data */
280 strm->avail_in += masked_avail_in;
281 masked_avail_in = 0;
282
283 /* If we encounter an error, it means there is a bug in DFLTCC call */
284 Assert(cc != DFLTCC_CC_OP2_CORRUPT || param->oesc == 0, "BUG");
285
286 /* Update Block-Continuation Flag. It will be used to check whether to call
287 * GDHT the next time.
288 */
289 if (cc == DFLTCC_CC_OK) {
290 if (soft_bcc) {
291 send_eobs(strm, param);
292 param->bcf = 0;
293 dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size;
294 } else
295 param->bcf = 1;
296 if (flush == Z_FINISH) {
297 if (need_empty_block)
298 /* Make the current deflate() call also close the stream */
299 return 0;
300 else {
301 bi_windup(state);
302 *result = finish_done;
303 }
304 } else {
305 if (flush == Z_FULL_FLUSH)
306 param->hl = 0; /* Clear history */
307 *result = flush == Z_NO_FLUSH ? need_more : block_done;
308 }
309 } else {
310 param->bcf = 1;
311 *result = need_more;
312 }
313 if (strm->avail_in != 0 && strm->avail_out != 0)
314 goto again; /* deflate() must use all input or all output */
315 return 1;
316 }
317
318 /*
319 Switching between hardware and software compression.
320
321 DFLTCC does not support all zlib settings, e.g. generation of non-compressed
322 blocks or alternative window sizes. When such settings are applied on the
323 fly with deflateParams, we need to convert between hardware and software
324 window formats.
325 */
dfltcc_was_deflate_used(PREFIX3 (streamp)strm)326 static int dfltcc_was_deflate_used(PREFIX3(streamp) strm) {
327 deflate_state *state = (deflate_state *)strm->state;
328 struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
329
330 return strm->total_in > 0 || param->nt == 0 || param->hl > 0;
331 }
332
PREFIX(dfltcc_deflate_params)333 int Z_INTERNAL PREFIX(dfltcc_deflate_params)(PREFIX3(streamp) strm, int level, int strategy, int *flush) {
334 deflate_state *state = (deflate_state *)strm->state;
335 int could_deflate = PREFIX(dfltcc_can_deflate)(strm);
336 int can_deflate = dfltcc_can_deflate_with_params(strm, level, state->w_bits, strategy, state->reproducible);
337
338 if (can_deflate == could_deflate)
339 /* We continue to work in the same mode - no changes needed */
340 return Z_OK;
341
342 if (!dfltcc_was_deflate_used(strm))
343 /* DFLTCC was not used yet - no changes needed */
344 return Z_OK;
345
346 /* For now, do not convert between window formats - simply get rid of the old data instead */
347 *flush = Z_FULL_FLUSH;
348 return Z_OK;
349 }
350
PREFIX(dfltcc_deflate_done)351 int Z_INTERNAL PREFIX(dfltcc_deflate_done)(PREFIX3(streamp) strm, int flush) {
352 deflate_state *state = (deflate_state *)strm->state;
353 struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
354 struct dfltcc_param_v0 *param = &dfltcc_state->param;
355
356 /* When deflate(Z_FULL_FLUSH) is called with small avail_out, it might
357 * close the block without resetting the compression state. Detect this
358 * situation and return that deflation is not done.
359 */
360 if (flush == Z_FULL_FLUSH && strm->avail_out == 0)
361 return 0;
362
363 /* Return that deflation is not done if DFLTCC is used and either it
364 * buffered some data (Continuation Flag is set), or has not written EOBS
365 * yet (Block-Continuation Flag is set).
366 */
367 return !PREFIX(dfltcc_can_deflate)(strm) || (!param->cf && !param->bcf);
368 }
369
PREFIX(dfltcc_can_set_reproducible)370 int Z_INTERNAL PREFIX(dfltcc_can_set_reproducible)(PREFIX3(streamp) strm, int reproducible) {
371 deflate_state *state = (deflate_state *)strm->state;
372
373 return reproducible != state->reproducible && !dfltcc_was_deflate_used(strm);
374 }
375
376 /*
377 Preloading history.
378 */
append_history(struct dfltcc_param_v0 * param,unsigned char * history,const unsigned char * buf,uInt count)379 static void append_history(struct dfltcc_param_v0 *param, unsigned char *history, const unsigned char *buf, uInt count) {
380 size_t offset;
381 size_t n;
382
383 /* Do not use more than 32K */
384 if (count > HB_SIZE) {
385 buf += count - HB_SIZE;
386 count = HB_SIZE;
387 }
388 offset = (param->ho + param->hl) % HB_SIZE;
389 if (offset + count <= HB_SIZE)
390 /* Circular history buffer does not wrap - copy one chunk */
391 memcpy(history + offset, buf, count);
392 else {
393 /* Circular history buffer wraps - copy two chunks */
394 n = HB_SIZE - offset;
395 memcpy(history + offset, buf, n);
396 memcpy(history, buf + n, count - n);
397 }
398 n = param->hl + count;
399 if (n <= HB_SIZE)
400 /* All history fits into buffer - no need to discard anything */
401 param->hl = n;
402 else {
403 /* History does not fit into buffer - discard extra bytes */
404 param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE;
405 param->hl = HB_SIZE;
406 }
407 }
408
PREFIX(dfltcc_deflate_set_dictionary)409 int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm,
410 const unsigned char *dictionary, uInt dict_length) {
411 deflate_state *state = (deflate_state *)strm->state;
412 struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
413 struct dfltcc_param_v0 *param = &dfltcc_state->param;
414
415 append_history(param, state->window, dictionary, dict_length);
416 state->strstart = 1; /* Add FDICT to zlib header */
417 state->block_start = state->strstart; /* Make deflate_stored happy */
418 return Z_OK;
419 }
420
PREFIX(dfltcc_deflate_get_dictionary)421 int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) {
422 deflate_state *state = (deflate_state *)strm->state;
423 struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
424 struct dfltcc_param_v0 *param = &dfltcc_state->param;
425
426 if (dictionary) {
427 if (param->ho + param->hl <= HB_SIZE)
428 /* Circular history buffer does not wrap - copy one chunk */
429 memcpy(dictionary, state->window + param->ho, param->hl);
430 else {
431 /* Circular history buffer wraps - copy two chunks */
432 memcpy(dictionary, state->window + param->ho, HB_SIZE - param->ho);
433 memcpy(dictionary + HB_SIZE - param->ho, state->window, param->ho + param->hl - HB_SIZE);
434 }
435 }
436 if (dict_length)
437 *dict_length = param->hl;
438 return Z_OK;
439 }
440