1*01826a49SYabin Cui /* fitblk.c: example of fitting compressed output to a specified size
2*01826a49SYabin Cui Not copyrighted -- provided to the public domain
3*01826a49SYabin Cui Version 1.1 25 November 2004 Mark Adler */
4*01826a49SYabin Cui
5*01826a49SYabin Cui /* Version history:
6*01826a49SYabin Cui 1.0 24 Nov 2004 First version
7*01826a49SYabin Cui 1.1 25 Nov 2004 Change deflateInit2() to deflateInit()
8*01826a49SYabin Cui Use fixed-size, stack-allocated raw buffers
9*01826a49SYabin Cui Simplify code moving compression to subroutines
10*01826a49SYabin Cui Use assert() for internal errors
11*01826a49SYabin Cui Add detailed description of approach
12*01826a49SYabin Cui */
13*01826a49SYabin Cui
14*01826a49SYabin Cui /* Approach to just fitting a requested compressed size:
15*01826a49SYabin Cui
16*01826a49SYabin Cui fitblk performs three compression passes on a portion of the input
17*01826a49SYabin Cui data in order to determine how much of that input will compress to
18*01826a49SYabin Cui nearly the requested output block size. The first pass generates
19*01826a49SYabin Cui enough deflate blocks to produce output to fill the requested
20*01826a49SYabin Cui output size plus a specified excess amount (see the EXCESS define
21*01826a49SYabin Cui below). The last deflate block may go quite a bit past that, but
22*01826a49SYabin Cui is discarded. The second pass decompresses and recompresses just
23*01826a49SYabin Cui the compressed data that fit in the requested plus excess sized
24*01826a49SYabin Cui buffer. The deflate process is terminated after that amount of
25*01826a49SYabin Cui input, which is less than the amount consumed on the first pass.
26*01826a49SYabin Cui The last deflate block of the result will be of a comparable size
27*01826a49SYabin Cui to the final product, so that the header for that deflate block and
28*01826a49SYabin Cui the compression ratio for that block will be about the same as in
29*01826a49SYabin Cui the final product. The third compression pass decompresses the
30*01826a49SYabin Cui result of the second step, but only the compressed data up to the
31*01826a49SYabin Cui requested size minus an amount to allow the compressed stream to
32*01826a49SYabin Cui complete (see the MARGIN define below). That will result in a
33*01826a49SYabin Cui final compressed stream whose length is less than or equal to the
34*01826a49SYabin Cui requested size. Assuming sufficient input and a requested size
35*01826a49SYabin Cui greater than a few hundred bytes, the shortfall will typically be
36*01826a49SYabin Cui less than ten bytes.
37*01826a49SYabin Cui
38*01826a49SYabin Cui If the input is short enough that the first compression completes
39*01826a49SYabin Cui before filling the requested output size, then that compressed
40*01826a49SYabin Cui stream is return with no recompression.
41*01826a49SYabin Cui
42*01826a49SYabin Cui EXCESS is chosen to be just greater than the shortfall seen in a
43*01826a49SYabin Cui two pass approach similar to the above. That shortfall is due to
44*01826a49SYabin Cui the last deflate block compressing more efficiently with a smaller
45*01826a49SYabin Cui header on the second pass. EXCESS is set to be large enough so
46*01826a49SYabin Cui that there is enough uncompressed data for the second pass to fill
47*01826a49SYabin Cui out the requested size, and small enough so that the final deflate
48*01826a49SYabin Cui block of the second pass will be close in size to the final deflate
49*01826a49SYabin Cui block of the third and final pass. MARGIN is chosen to be just
50*01826a49SYabin Cui large enough to assure that the final compression has enough room
51*01826a49SYabin Cui to complete in all cases.
52*01826a49SYabin Cui */
53*01826a49SYabin Cui
54*01826a49SYabin Cui #include <stdio.h>
55*01826a49SYabin Cui #include <stdlib.h>
56*01826a49SYabin Cui #include <assert.h>
57*01826a49SYabin Cui #include "zlib.h"
58*01826a49SYabin Cui
59*01826a49SYabin Cui #define local static
60*01826a49SYabin Cui
61*01826a49SYabin Cui /* print nastygram and leave */
quit(char * why)62*01826a49SYabin Cui local void quit(char *why)
63*01826a49SYabin Cui {
64*01826a49SYabin Cui fprintf(stderr, "fitblk abort: %s\n", why);
65*01826a49SYabin Cui exit(1);
66*01826a49SYabin Cui }
67*01826a49SYabin Cui
68*01826a49SYabin Cui #define RAWLEN 4096 /* intermediate uncompressed buffer size */
69*01826a49SYabin Cui
70*01826a49SYabin Cui /* compress from file to def until provided buffer is full or end of
71*01826a49SYabin Cui input reached; return last deflate() return value, or Z_ERRNO if
72*01826a49SYabin Cui there was read error on the file */
partcompress(FILE * in,z_streamp def)73*01826a49SYabin Cui local int partcompress(FILE *in, z_streamp def)
74*01826a49SYabin Cui {
75*01826a49SYabin Cui int ret, flush;
76*01826a49SYabin Cui unsigned char raw[RAWLEN];
77*01826a49SYabin Cui
78*01826a49SYabin Cui flush = Z_NO_FLUSH;
79*01826a49SYabin Cui do {
80*01826a49SYabin Cui def->avail_in = fread(raw, 1, RAWLEN, in);
81*01826a49SYabin Cui if (ferror(in))
82*01826a49SYabin Cui return Z_ERRNO;
83*01826a49SYabin Cui def->next_in = raw;
84*01826a49SYabin Cui if (feof(in))
85*01826a49SYabin Cui flush = Z_FINISH;
86*01826a49SYabin Cui ret = deflate(def, flush);
87*01826a49SYabin Cui assert(ret != Z_STREAM_ERROR);
88*01826a49SYabin Cui } while (def->avail_out != 0 && flush == Z_NO_FLUSH);
89*01826a49SYabin Cui return ret;
90*01826a49SYabin Cui }
91*01826a49SYabin Cui
92*01826a49SYabin Cui /* recompress from inf's input to def's output; the input for inf and
93*01826a49SYabin Cui the output for def are set in those structures before calling;
94*01826a49SYabin Cui return last deflate() return value, or Z_MEM_ERROR if inflate()
95*01826a49SYabin Cui was not able to allocate enough memory when it needed to */
recompress(z_streamp inf,z_streamp def)96*01826a49SYabin Cui local int recompress(z_streamp inf, z_streamp def)
97*01826a49SYabin Cui {
98*01826a49SYabin Cui int ret, flush;
99*01826a49SYabin Cui unsigned char raw[RAWLEN];
100*01826a49SYabin Cui
101*01826a49SYabin Cui flush = Z_NO_FLUSH;
102*01826a49SYabin Cui do {
103*01826a49SYabin Cui /* decompress */
104*01826a49SYabin Cui inf->avail_out = RAWLEN;
105*01826a49SYabin Cui inf->next_out = raw;
106*01826a49SYabin Cui ret = inflate(inf, Z_NO_FLUSH);
107*01826a49SYabin Cui assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
108*01826a49SYabin Cui ret != Z_NEED_DICT);
109*01826a49SYabin Cui if (ret == Z_MEM_ERROR)
110*01826a49SYabin Cui return ret;
111*01826a49SYabin Cui
112*01826a49SYabin Cui /* compress what was decompressed until done or no room */
113*01826a49SYabin Cui def->avail_in = RAWLEN - inf->avail_out;
114*01826a49SYabin Cui def->next_in = raw;
115*01826a49SYabin Cui if (inf->avail_out != 0)
116*01826a49SYabin Cui flush = Z_FINISH;
117*01826a49SYabin Cui ret = deflate(def, flush);
118*01826a49SYabin Cui assert(ret != Z_STREAM_ERROR);
119*01826a49SYabin Cui } while (ret != Z_STREAM_END && def->avail_out != 0);
120*01826a49SYabin Cui return ret;
121*01826a49SYabin Cui }
122*01826a49SYabin Cui
123*01826a49SYabin Cui #define EXCESS 256 /* empirically determined stream overage */
124*01826a49SYabin Cui #define MARGIN 8 /* amount to back off for completion */
125*01826a49SYabin Cui
126*01826a49SYabin Cui /* compress from stdin to fixed-size block on stdout */
main(int argc,char ** argv)127*01826a49SYabin Cui int main(int argc, char **argv)
128*01826a49SYabin Cui {
129*01826a49SYabin Cui int ret; /* return code */
130*01826a49SYabin Cui unsigned size; /* requested fixed output block size */
131*01826a49SYabin Cui unsigned have; /* bytes written by deflate() call */
132*01826a49SYabin Cui unsigned char *blk; /* intermediate and final stream */
133*01826a49SYabin Cui unsigned char *tmp; /* close to desired size stream */
134*01826a49SYabin Cui z_stream def, inf; /* zlib deflate and inflate states */
135*01826a49SYabin Cui
136*01826a49SYabin Cui /* get requested output size */
137*01826a49SYabin Cui if (argc != 2)
138*01826a49SYabin Cui quit("need one argument: size of output block");
139*01826a49SYabin Cui ret = strtol(argv[1], argv + 1, 10);
140*01826a49SYabin Cui if (argv[1][0] != 0)
141*01826a49SYabin Cui quit("argument must be a number");
142*01826a49SYabin Cui if (ret < 8) /* 8 is minimum zlib stream size */
143*01826a49SYabin Cui quit("need positive size of 8 or greater");
144*01826a49SYabin Cui size = (unsigned)ret;
145*01826a49SYabin Cui
146*01826a49SYabin Cui /* allocate memory for buffers and compression engine */
147*01826a49SYabin Cui blk = malloc(size + EXCESS);
148*01826a49SYabin Cui def.zalloc = Z_NULL;
149*01826a49SYabin Cui def.zfree = Z_NULL;
150*01826a49SYabin Cui def.opaque = Z_NULL;
151*01826a49SYabin Cui ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
152*01826a49SYabin Cui if (ret != Z_OK || blk == NULL)
153*01826a49SYabin Cui quit("out of memory");
154*01826a49SYabin Cui
155*01826a49SYabin Cui /* compress from stdin until output full, or no more input */
156*01826a49SYabin Cui def.avail_out = size + EXCESS;
157*01826a49SYabin Cui def.next_out = blk;
158*01826a49SYabin Cui ret = partcompress(stdin, &def);
159*01826a49SYabin Cui if (ret == Z_ERRNO)
160*01826a49SYabin Cui quit("error reading input");
161*01826a49SYabin Cui
162*01826a49SYabin Cui /* if it all fit, then size was undersubscribed -- done! */
163*01826a49SYabin Cui if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
164*01826a49SYabin Cui /* write block to stdout */
165*01826a49SYabin Cui have = size + EXCESS - def.avail_out;
166*01826a49SYabin Cui if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
167*01826a49SYabin Cui quit("error writing output");
168*01826a49SYabin Cui
169*01826a49SYabin Cui /* clean up and print results to stderr */
170*01826a49SYabin Cui ret = deflateEnd(&def);
171*01826a49SYabin Cui assert(ret != Z_STREAM_ERROR);
172*01826a49SYabin Cui free(blk);
173*01826a49SYabin Cui fprintf(stderr,
174*01826a49SYabin Cui "%u bytes unused out of %u requested (all input)\n",
175*01826a49SYabin Cui size - have, size);
176*01826a49SYabin Cui return 0;
177*01826a49SYabin Cui }
178*01826a49SYabin Cui
179*01826a49SYabin Cui /* it didn't all fit -- set up for recompression */
180*01826a49SYabin Cui inf.zalloc = Z_NULL;
181*01826a49SYabin Cui inf.zfree = Z_NULL;
182*01826a49SYabin Cui inf.opaque = Z_NULL;
183*01826a49SYabin Cui inf.avail_in = 0;
184*01826a49SYabin Cui inf.next_in = Z_NULL;
185*01826a49SYabin Cui ret = inflateInit(&inf);
186*01826a49SYabin Cui tmp = malloc(size + EXCESS);
187*01826a49SYabin Cui if (ret != Z_OK || tmp == NULL)
188*01826a49SYabin Cui quit("out of memory");
189*01826a49SYabin Cui ret = deflateReset(&def);
190*01826a49SYabin Cui assert(ret != Z_STREAM_ERROR);
191*01826a49SYabin Cui
192*01826a49SYabin Cui /* do first recompression close to the right amount */
193*01826a49SYabin Cui inf.avail_in = size + EXCESS;
194*01826a49SYabin Cui inf.next_in = blk;
195*01826a49SYabin Cui def.avail_out = size + EXCESS;
196*01826a49SYabin Cui def.next_out = tmp;
197*01826a49SYabin Cui ret = recompress(&inf, &def);
198*01826a49SYabin Cui if (ret == Z_MEM_ERROR)
199*01826a49SYabin Cui quit("out of memory");
200*01826a49SYabin Cui
201*01826a49SYabin Cui /* set up for next recompression */
202*01826a49SYabin Cui ret = inflateReset(&inf);
203*01826a49SYabin Cui assert(ret != Z_STREAM_ERROR);
204*01826a49SYabin Cui ret = deflateReset(&def);
205*01826a49SYabin Cui assert(ret != Z_STREAM_ERROR);
206*01826a49SYabin Cui
207*01826a49SYabin Cui /* do second and final recompression (third compression) */
208*01826a49SYabin Cui inf.avail_in = size - MARGIN; /* assure stream will complete */
209*01826a49SYabin Cui inf.next_in = tmp;
210*01826a49SYabin Cui def.avail_out = size;
211*01826a49SYabin Cui def.next_out = blk;
212*01826a49SYabin Cui ret = recompress(&inf, &def);
213*01826a49SYabin Cui if (ret == Z_MEM_ERROR)
214*01826a49SYabin Cui quit("out of memory");
215*01826a49SYabin Cui assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */
216*01826a49SYabin Cui
217*01826a49SYabin Cui /* done -- write block to stdout */
218*01826a49SYabin Cui have = size - def.avail_out;
219*01826a49SYabin Cui if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
220*01826a49SYabin Cui quit("error writing output");
221*01826a49SYabin Cui
222*01826a49SYabin Cui /* clean up and print results to stderr */
223*01826a49SYabin Cui free(tmp);
224*01826a49SYabin Cui ret = inflateEnd(&inf);
225*01826a49SYabin Cui assert(ret != Z_STREAM_ERROR);
226*01826a49SYabin Cui ret = deflateEnd(&def);
227*01826a49SYabin Cui assert(ret != Z_STREAM_ERROR);
228*01826a49SYabin Cui free(blk);
229*01826a49SYabin Cui fprintf(stderr,
230*01826a49SYabin Cui "%u bytes unused out of %u requested (%lu input)\n",
231*01826a49SYabin Cui size - have, size, def.total_in);
232*01826a49SYabin Cui return 0;
233*01826a49SYabin Cui }
234