xref: /aosp_15_r20/external/brotli/c/tools/brotli.c (revision f4ee7fba7774faf2a30f13154332c0a06550dbc4)
1 /* Copyright 2014 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 /* Command line interface for Brotli library. */
8 
9 /* Mute strerror/strcpy warnings. */
10 #if !defined(_CRT_SECURE_NO_WARNINGS)
11 #define _CRT_SECURE_NO_WARNINGS
12 #endif
13 
14 #include <errno.h>
15 #include <fcntl.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/stat.h>
20 #include <sys/types.h>
21 #include <time.h>
22 
23 #include "../common/constants.h"
24 #include "../common/version.h"
25 #include <brotli/decode.h>
26 #include <brotli/encode.h>
27 
28 #if !defined(_WIN32)
29 #include <unistd.h>
30 #include <utime.h>
31 #define MAKE_BINARY(FILENO) (FILENO)
32 #else
33 #include <io.h>
34 #include <share.h>
35 #include <sys/utime.h>
36 
37 #define MAKE_BINARY(FILENO) (_setmode((FILENO), _O_BINARY), (FILENO))
38 
39 #if !defined(__MINGW32__)
40 #define STDIN_FILENO _fileno(stdin)
41 #define STDOUT_FILENO _fileno(stdout)
42 #define S_IRUSR S_IREAD
43 #define S_IWUSR S_IWRITE
44 #endif
45 
46 #define fdopen _fdopen
47 #define isatty _isatty
48 #define unlink _unlink
49 #define utimbuf _utimbuf
50 #define utime _utime
51 
52 #define fopen ms_fopen
53 #define open ms_open
54 
55 #define chmod(F, P) (0)
56 #define chown(F, O, G) (0)
57 
58 #if defined(_MSC_VER) && (_MSC_VER >= 1400)
59 #define fseek _fseeki64
60 #define ftell _ftelli64
61 #endif
62 
ms_fopen(const char * filename,const char * mode)63 static FILE* ms_fopen(const char* filename, const char* mode) {
64   FILE* result = 0;
65   fopen_s(&result, filename, mode);
66   return result;
67 }
68 
ms_open(const char * filename,int oflag,int pmode)69 static int ms_open(const char* filename, int oflag, int pmode) {
70   int result = -1;
71   _sopen_s(&result, filename, oflag | O_BINARY, _SH_DENYNO, pmode);
72   return result;
73 }
74 #endif  /* WIN32 */
75 
76 typedef enum {
77   COMMAND_COMPRESS,
78   COMMAND_DECOMPRESS,
79   COMMAND_HELP,
80   COMMAND_INVALID,
81   COMMAND_TEST_INTEGRITY,
82   COMMAND_NOOP,
83   COMMAND_VERSION
84 } Command;
85 
86 #define DEFAULT_LGWIN 24
87 #define DEFAULT_SUFFIX ".br"
88 #define MAX_OPTIONS 20
89 
90 typedef struct {
91   /* Parameters */
92   int quality;
93   int lgwin;
94   int verbosity;
95   BROTLI_BOOL force_overwrite;
96   BROTLI_BOOL junk_source;
97   BROTLI_BOOL copy_stat;
98   BROTLI_BOOL write_to_stdout;
99   BROTLI_BOOL test_integrity;
100   BROTLI_BOOL decompress;
101   BROTLI_BOOL large_window;
102   const char* output_path;
103   const char* suffix;
104   int not_input_indices[MAX_OPTIONS];
105   size_t longest_path_len;
106   size_t input_count;
107 
108   /* Inner state */
109   int argc;
110   char** argv;
111   char* modified_path;  /* Storage for path with appended / cut suffix */
112   int iterator;
113   int ignore;
114   BROTLI_BOOL iterator_error;
115   uint8_t* buffer;
116   uint8_t* input;
117   uint8_t* output;
118   const char* current_input_path;
119   const char* current_output_path;
120   int64_t input_file_length;  /* -1, if impossible to calculate */
121   FILE* fin;
122   FILE* fout;
123 
124   /* I/O buffers */
125   size_t available_in;
126   const uint8_t* next_in;
127   size_t available_out;
128   uint8_t* next_out;
129 
130   /* Reporting */
131   /* size_t would be large enough,
132      until 4GiB+ files are compressed / decompressed on 32-bit CPUs. */
133   size_t total_in;
134   size_t total_out;
135 } Context;
136 
137 /* Parse up to 5 decimal digits. */
ParseInt(const char * s,int low,int high,int * result)138 static BROTLI_BOOL ParseInt(const char* s, int low, int high, int* result) {
139   int value = 0;
140   int i;
141   for (i = 0; i < 5; ++i) {
142     char c = s[i];
143     if (c == 0) break;
144     if (s[i] < '0' || s[i] > '9') return BROTLI_FALSE;
145     value = (10 * value) + (c - '0');
146   }
147   if (i == 0) return BROTLI_FALSE;
148   if (i > 1 && s[0] == '0') return BROTLI_FALSE;
149   if (s[i] != 0) return BROTLI_FALSE;
150   if (value < low || value > high) return BROTLI_FALSE;
151   *result = value;
152   return BROTLI_TRUE;
153 }
154 
155 /* Returns "base file name" or its tail, if it contains '/' or '\'. */
FileName(const char * path)156 static const char* FileName(const char* path) {
157   const char* separator_position = strrchr(path, '/');
158   if (separator_position) path = separator_position + 1;
159   separator_position = strrchr(path, '\\');
160   if (separator_position) path = separator_position + 1;
161   return path;
162 }
163 
164 /* Detect if the program name is a special alias that infers a command type. */
ParseAlias(const char * name)165 static Command ParseAlias(const char* name) {
166   /* TODO: cast name to lower case? */
167   const char* unbrotli = "unbrotli";
168   size_t unbrotli_len = strlen(unbrotli);
169   name = FileName(name);
170   /* Partial comparison. On Windows there could be ".exe" suffix. */
171   if (strncmp(name, unbrotli, unbrotli_len) == 0) {
172     char terminator = name[unbrotli_len];
173     if (terminator == 0 || terminator == '.') return COMMAND_DECOMPRESS;
174   }
175   return COMMAND_COMPRESS;
176 }
177 
ParseParams(Context * params)178 static Command ParseParams(Context* params) {
179   int argc = params->argc;
180   char** argv = params->argv;
181   int i;
182   int next_option_index = 0;
183   size_t input_count = 0;
184   size_t longest_path_len = 1;
185   BROTLI_BOOL command_set = BROTLI_FALSE;
186   BROTLI_BOOL quality_set = BROTLI_FALSE;
187   BROTLI_BOOL output_set = BROTLI_FALSE;
188   BROTLI_BOOL keep_set = BROTLI_FALSE;
189   BROTLI_BOOL lgwin_set = BROTLI_FALSE;
190   BROTLI_BOOL suffix_set = BROTLI_FALSE;
191   BROTLI_BOOL after_dash_dash = BROTLI_FALSE;
192   Command command = ParseAlias(argv[0]);
193 
194   for (i = 1; i < argc; ++i) {
195     const char* arg = argv[i];
196     /* C99 5.1.2.2.1: "members argv[0] through argv[argc-1] inclusive shall
197        contain pointers to strings"; NULL and 0-length are not forbidden. */
198     size_t arg_len = arg ? strlen(arg) : 0;
199 
200     if (arg_len == 0) {
201       params->not_input_indices[next_option_index++] = i;
202       continue;
203     }
204 
205     /* Too many options. The expected longest option list is:
206        "-q 0 -w 10 -o f -D d -S b -d -f -k -n -v --", i.e. 16 items in total.
207        This check is an additional guard that is never triggered, but provides
208        a guard for future changes. */
209     if (next_option_index > (MAX_OPTIONS - 2)) {
210       fprintf(stderr, "too many options passed\n");
211       return COMMAND_INVALID;
212     }
213 
214     /* Input file entry. */
215     if (after_dash_dash || arg[0] != '-' || arg_len == 1) {
216       input_count++;
217       if (longest_path_len < arg_len) longest_path_len = arg_len;
218       continue;
219     }
220 
221     /* Not a file entry. */
222     params->not_input_indices[next_option_index++] = i;
223 
224     /* '--' entry stop parsing arguments. */
225     if (arg_len == 2 && arg[1] == '-') {
226       after_dash_dash = BROTLI_TRUE;
227       continue;
228     }
229 
230     /* Simple / coalesced options. */
231     if (arg[1] != '-') {
232       size_t j;
233       for (j = 1; j < arg_len; ++j) {
234         char c = arg[j];
235         if (c >= '0' && c <= '9') {
236           if (quality_set) {
237             fprintf(stderr, "quality already set\n");
238             return COMMAND_INVALID;
239           }
240           quality_set = BROTLI_TRUE;
241           params->quality = c - '0';
242           continue;
243         } else if (c == 'c') {
244           if (output_set) {
245             fprintf(stderr, "write to standard output already set\n");
246             return COMMAND_INVALID;
247           }
248           output_set = BROTLI_TRUE;
249           params->write_to_stdout = BROTLI_TRUE;
250           continue;
251         } else if (c == 'd') {
252           if (command_set) {
253             fprintf(stderr, "command already set when parsing -d\n");
254             return COMMAND_INVALID;
255           }
256           command_set = BROTLI_TRUE;
257           command = COMMAND_DECOMPRESS;
258           continue;
259         } else if (c == 'f') {
260           if (params->force_overwrite) {
261             fprintf(stderr, "force output overwrite already set\n");
262             return COMMAND_INVALID;
263           }
264           params->force_overwrite = BROTLI_TRUE;
265           continue;
266         } else if (c == 'h') {
267           /* Don't parse further. */
268           return COMMAND_HELP;
269         } else if (c == 'j' || c == 'k') {
270           if (keep_set) {
271             fprintf(stderr, "argument --rm / -j or --keep / -k already set\n");
272             return COMMAND_INVALID;
273           }
274           keep_set = BROTLI_TRUE;
275           params->junk_source = TO_BROTLI_BOOL(c == 'j');
276           continue;
277         } else if (c == 'n') {
278           if (!params->copy_stat) {
279             fprintf(stderr, "argument --no-copy-stat / -n already set\n");
280             return COMMAND_INVALID;
281           }
282           params->copy_stat = BROTLI_FALSE;
283           continue;
284         } else if (c == 't') {
285           if (command_set) {
286             fprintf(stderr, "command already set when parsing -t\n");
287             return COMMAND_INVALID;
288           }
289           command_set = BROTLI_TRUE;
290           command = COMMAND_TEST_INTEGRITY;
291           continue;
292         } else if (c == 'v') {
293           if (params->verbosity > 0) {
294             fprintf(stderr, "argument --verbose / -v already set\n");
295             return COMMAND_INVALID;
296           }
297           params->verbosity = 1;
298           continue;
299         } else if (c == 'V') {
300           /* Don't parse further. */
301           return COMMAND_VERSION;
302         } else if (c == 'Z') {
303           if (quality_set) {
304             fprintf(stderr, "quality already set\n");
305             return COMMAND_INVALID;
306           }
307           quality_set = BROTLI_TRUE;
308           params->quality = 11;
309           continue;
310         }
311         /* o/q/w/D/S with parameter is expected */
312         if (c != 'o' && c != 'q' && c != 'w' && c != 'D' && c != 'S') {
313           fprintf(stderr, "invalid argument -%c\n", c);
314           return COMMAND_INVALID;
315         }
316         if (j + 1 != arg_len) {
317           fprintf(stderr, "expected parameter for argument -%c\n", c);
318           return COMMAND_INVALID;
319         }
320         i++;
321         if (i == argc || !argv[i] || argv[i][0] == 0) {
322           fprintf(stderr, "expected parameter for argument -%c\n", c);
323           return COMMAND_INVALID;
324         }
325         params->not_input_indices[next_option_index++] = i;
326         if (c == 'o') {
327           if (output_set) {
328             fprintf(stderr, "write to standard output already set (-o)\n");
329             return COMMAND_INVALID;
330           }
331           params->output_path = argv[i];
332         } else if (c == 'q') {
333           if (quality_set) {
334             fprintf(stderr, "quality already set\n");
335             return COMMAND_INVALID;
336           }
337           quality_set = ParseInt(argv[i], BROTLI_MIN_QUALITY,
338                                  BROTLI_MAX_QUALITY, &params->quality);
339           if (!quality_set) {
340             fprintf(stderr, "error parsing quality value [%s]\n", argv[i]);
341             return COMMAND_INVALID;
342           }
343         } else if (c == 'w') {
344           if (lgwin_set) {
345             fprintf(stderr, "lgwin parameter already set\n");
346             return COMMAND_INVALID;
347           }
348           lgwin_set = ParseInt(argv[i], 0,
349                                BROTLI_MAX_WINDOW_BITS, &params->lgwin);
350           if (!lgwin_set) {
351             fprintf(stderr, "error parsing lgwin value [%s]\n", argv[i]);
352             return COMMAND_INVALID;
353           }
354           if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
355             fprintf(stderr,
356                     "lgwin parameter (%d) smaller than the minimum (%d)\n",
357                     params->lgwin, BROTLI_MIN_WINDOW_BITS);
358             return COMMAND_INVALID;
359           }
360         } else if (c == 'S') {
361           if (suffix_set) {
362             fprintf(stderr, "suffix already set\n");
363             return COMMAND_INVALID;
364           }
365           suffix_set = BROTLI_TRUE;
366           params->suffix = argv[i];
367         }
368       }
369     } else {  /* Double-dash. */
370       arg = &arg[2];
371       if (strcmp("best", arg) == 0) {
372         if (quality_set) {
373           fprintf(stderr, "quality already set\n");
374           return COMMAND_INVALID;
375         }
376         quality_set = BROTLI_TRUE;
377         params->quality = 11;
378       } else if (strcmp("decompress", arg) == 0) {
379         if (command_set) {
380           fprintf(stderr, "command already set when parsing --decompress\n");
381           return COMMAND_INVALID;
382         }
383         command_set = BROTLI_TRUE;
384         command = COMMAND_DECOMPRESS;
385       } else if (strcmp("force", arg) == 0) {
386         if (params->force_overwrite) {
387           fprintf(stderr, "force output overwrite already set\n");
388           return COMMAND_INVALID;
389         }
390         params->force_overwrite = BROTLI_TRUE;
391       } else if (strcmp("help", arg) == 0) {
392         /* Don't parse further. */
393         return COMMAND_HELP;
394       } else if (strcmp("keep", arg) == 0) {
395         if (keep_set) {
396           fprintf(stderr, "argument --rm / -j or --keep / -k already set\n");
397           return COMMAND_INVALID;
398         }
399         keep_set = BROTLI_TRUE;
400         params->junk_source = BROTLI_FALSE;
401       } else if (strcmp("no-copy-stat", arg) == 0) {
402         if (!params->copy_stat) {
403           fprintf(stderr, "argument --no-copy-stat / -n already set\n");
404           return COMMAND_INVALID;
405         }
406         params->copy_stat = BROTLI_FALSE;
407       } else if (strcmp("rm", arg) == 0) {
408         if (keep_set) {
409           fprintf(stderr, "argument --rm / -j or --keep / -k already set\n");
410           return COMMAND_INVALID;
411         }
412         keep_set = BROTLI_TRUE;
413         params->junk_source = BROTLI_TRUE;
414       } else if (strcmp("stdout", arg) == 0) {
415         if (output_set) {
416           fprintf(stderr, "write to standard output already set\n");
417           return COMMAND_INVALID;
418         }
419         output_set = BROTLI_TRUE;
420         params->write_to_stdout = BROTLI_TRUE;
421       } else if (strcmp("test", arg) == 0) {
422         if (command_set) {
423           fprintf(stderr, "command already set when parsing --test\n");
424           return COMMAND_INVALID;
425         }
426         command_set = BROTLI_TRUE;
427         command = COMMAND_TEST_INTEGRITY;
428       } else if (strcmp("verbose", arg) == 0) {
429         if (params->verbosity > 0) {
430           fprintf(stderr, "argument --verbose / -v already set\n");
431           return COMMAND_INVALID;
432         }
433         params->verbosity = 1;
434       } else if (strcmp("version", arg) == 0) {
435         /* Don't parse further. */
436         return COMMAND_VERSION;
437       } else {
438         /* key=value */
439         const char* value = strrchr(arg, '=');
440         size_t key_len;
441         if (!value || value[1] == 0) {
442           fprintf(stderr, "must pass the parameter as --%s=value\n", arg);
443           return COMMAND_INVALID;
444         }
445         key_len = (size_t)(value - arg);
446         value++;
447         if (strncmp("lgwin", arg, key_len) == 0) {
448           if (lgwin_set) {
449             fprintf(stderr, "lgwin parameter already set\n");
450             return COMMAND_INVALID;
451           }
452           lgwin_set = ParseInt(value, 0,
453                                BROTLI_MAX_WINDOW_BITS, &params->lgwin);
454           if (!lgwin_set) {
455             fprintf(stderr, "error parsing lgwin value [%s]\n", value);
456             return COMMAND_INVALID;
457           }
458           if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
459             fprintf(stderr,
460                     "lgwin parameter (%d) smaller than the minimum (%d)\n",
461                     params->lgwin, BROTLI_MIN_WINDOW_BITS);
462             return COMMAND_INVALID;
463           }
464         } else if (strncmp("large_window", arg, key_len) == 0) {
465           /* This option is intentionally not mentioned in help. */
466           if (lgwin_set) {
467             fprintf(stderr, "lgwin parameter already set\n");
468             return COMMAND_INVALID;
469           }
470           lgwin_set = ParseInt(value, 0,
471                                BROTLI_LARGE_MAX_WINDOW_BITS, &params->lgwin);
472           if (!lgwin_set) {
473             fprintf(stderr, "error parsing lgwin value [%s]\n", value);
474             return COMMAND_INVALID;
475           }
476           if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
477             fprintf(stderr,
478                     "lgwin parameter (%d) smaller than the minimum (%d)\n",
479                     params->lgwin, BROTLI_MIN_WINDOW_BITS);
480             return COMMAND_INVALID;
481           }
482         } else if (strncmp("output", arg, key_len) == 0) {
483           if (output_set) {
484             fprintf(stderr,
485                     "write to standard output already set (--output)\n");
486             return COMMAND_INVALID;
487           }
488           params->output_path = value;
489         } else if (strncmp("quality", arg, key_len) == 0) {
490           if (quality_set) {
491             fprintf(stderr, "quality already set\n");
492             return COMMAND_INVALID;
493           }
494           quality_set = ParseInt(value, BROTLI_MIN_QUALITY,
495                                  BROTLI_MAX_QUALITY, &params->quality);
496           if (!quality_set) {
497             fprintf(stderr, "error parsing quality value [%s]\n", value);
498             return COMMAND_INVALID;
499           }
500         } else if (strncmp("suffix", arg, key_len) == 0) {
501           if (suffix_set) {
502             fprintf(stderr, "suffix already set\n");
503             return COMMAND_INVALID;
504           }
505           suffix_set = BROTLI_TRUE;
506           params->suffix = value;
507         } else {
508           fprintf(stderr, "invalid parameter: [%s]\n", arg);
509           return COMMAND_INVALID;
510         }
511       }
512     }
513   }
514 
515   params->input_count = input_count;
516   params->longest_path_len = longest_path_len;
517   params->decompress = (command == COMMAND_DECOMPRESS);
518   params->test_integrity = (command == COMMAND_TEST_INTEGRITY);
519 
520   if (input_count > 1 && output_set) return COMMAND_INVALID;
521   if (params->test_integrity) {
522     if (params->output_path) return COMMAND_INVALID;
523     if (params->write_to_stdout) return COMMAND_INVALID;
524   }
525   if (strchr(params->suffix, '/') || strchr(params->suffix, '\\')) {
526     return COMMAND_INVALID;
527   }
528 
529   return command;
530 }
531 
PrintVersion(void)532 static void PrintVersion(void) {
533   int major = BROTLI_VERSION >> 24;
534   int minor = (BROTLI_VERSION >> 12) & 0xFFF;
535   int patch = BROTLI_VERSION & 0xFFF;
536   fprintf(stdout, "brotli %d.%d.%d\n", major, minor, patch);
537 }
538 
PrintHelp(const char * name,BROTLI_BOOL error)539 static void PrintHelp(const char* name, BROTLI_BOOL error) {
540   FILE* media = error ? stderr : stdout;
541   /* String is cut to pieces with length less than 509, to conform C90 spec. */
542   fprintf(media,
543 "Usage: %s [OPTION]... [FILE]...\n",
544           name);
545   fprintf(media,
546 "Options:\n"
547 "  -#                          compression level (0-9)\n"
548 "  -c, --stdout                write on standard output\n"
549 "  -d, --decompress            decompress\n"
550 "  -f, --force                 force output file overwrite\n"
551 "  -h, --help                  display this help and exit\n");
552   fprintf(media,
553 "  -j, --rm                    remove source file(s)\n"
554 "  -k, --keep                  keep source file(s) (default)\n"
555 "  -n, --no-copy-stat          do not copy source file(s) attributes\n"
556 "  -o FILE, --output=FILE      output file (only if 1 input file)\n");
557   fprintf(media,
558 "  -q NUM, --quality=NUM       compression level (%d-%d)\n",
559           BROTLI_MIN_QUALITY, BROTLI_MAX_QUALITY);
560   fprintf(media,
561 "  -t, --test                  test compressed file integrity\n"
562 "  -v, --verbose               verbose mode\n");
563   fprintf(media,
564 "  -w NUM, --lgwin=NUM         set LZ77 window size (0, %d-%d)\n"
565 "                              window size = 2**NUM - 16\n"
566 "                              0 lets compressor choose the optimal value\n",
567           BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS);
568   fprintf(media,
569 "  --large_window=NUM          use incompatible large-window brotli\n"
570 "                              bitstream with window size (0, %d-%d)\n"
571 "                              WARNING: this format is not compatible\n"
572 "                              with brotli RFC 7932 and may not be\n"
573 "                              decodable with regular brotli decoders\n",
574           BROTLI_MIN_WINDOW_BITS, BROTLI_LARGE_MAX_WINDOW_BITS);
575   fprintf(media,
576 "  -S SUF, --suffix=SUF        output file suffix (default:'%s')\n",
577           DEFAULT_SUFFIX);
578   fprintf(media,
579 "  -V, --version               display version and exit\n"
580 "  -Z, --best                  use best compression level (11) (default)\n"
581 "Simple options could be coalesced, i.e. '-9kf' is equivalent to '-9 -k -f'.\n"
582 "With no FILE, or when FILE is -, read standard input.\n"
583 "All arguments after '--' are treated as files.\n");
584 }
585 
PrintablePath(const char * path)586 static const char* PrintablePath(const char* path) {
587   return path ? path : "con";
588 }
589 
OpenInputFile(const char * input_path,FILE ** f)590 static BROTLI_BOOL OpenInputFile(const char* input_path, FILE** f) {
591   *f = NULL;
592   if (!input_path) {
593     *f = fdopen(MAKE_BINARY(STDIN_FILENO), "rb");
594     return BROTLI_TRUE;
595   }
596   *f = fopen(input_path, "rb");
597   if (!*f) {
598     fprintf(stderr, "failed to open input file [%s]: %s\n",
599             PrintablePath(input_path), strerror(errno));
600     return BROTLI_FALSE;
601   }
602   return BROTLI_TRUE;
603 }
604 
OpenOutputFile(const char * output_path,FILE ** f,BROTLI_BOOL force)605 static BROTLI_BOOL OpenOutputFile(const char* output_path, FILE** f,
606                                   BROTLI_BOOL force) {
607   int fd;
608   *f = NULL;
609   if (!output_path) {
610     *f = fdopen(MAKE_BINARY(STDOUT_FILENO), "wb");
611     return BROTLI_TRUE;
612   }
613   fd = open(output_path, O_CREAT | (force ? 0 : O_EXCL) | O_WRONLY | O_TRUNC,
614             S_IRUSR | S_IWUSR);
615   if (fd < 0) {
616     fprintf(stderr, "failed to open output file [%s]: %s\n",
617             PrintablePath(output_path), strerror(errno));
618     return BROTLI_FALSE;
619   }
620   *f = fdopen(fd, "wb");
621   if (!*f) {
622     fprintf(stderr, "failed to open output file [%s]: %s\n",
623             PrintablePath(output_path), strerror(errno));
624     return BROTLI_FALSE;
625   }
626   return BROTLI_TRUE;
627 }
628 
FileSize(const char * path)629 static int64_t FileSize(const char* path) {
630   FILE* f = fopen(path, "rb");
631   int64_t retval;
632   if (f == NULL) {
633     return -1;
634   }
635   if (fseek(f, 0L, SEEK_END) != 0) {
636     fclose(f);
637     return -1;
638   }
639   retval = ftell(f);
640   if (fclose(f) != 0) {
641     return -1;
642   }
643   return retval;
644 }
645 
646 /* Copy file times and permissions.
647    TODO: this is a "best effort" implementation; honest cross-platform
648    fully featured implementation is way too hacky; add more hacks by request. */
CopyStat(const char * input_path,const char * output_path)649 static void CopyStat(const char* input_path, const char* output_path) {
650   struct stat statbuf;
651   struct utimbuf times;
652   int res;
653   if (input_path == 0 || output_path == 0) {
654     return;
655   }
656   if (stat(input_path, &statbuf) != 0) {
657     return;
658   }
659   times.actime = statbuf.st_atime;
660   times.modtime = statbuf.st_mtime;
661   utime(output_path, &times);
662   res = chmod(output_path, statbuf.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO));
663   if (res != 0) {
664     fprintf(stderr, "setting access bits failed for [%s]: %s\n",
665             PrintablePath(output_path), strerror(errno));
666   }
667   res = chown(output_path, (uid_t)-1, statbuf.st_gid);
668   if (res != 0) {
669     fprintf(stderr, "setting group failed for [%s]: %s\n",
670             PrintablePath(output_path), strerror(errno));
671   }
672   res = chown(output_path, statbuf.st_uid, (gid_t)-1);
673   if (res != 0) {
674     fprintf(stderr, "setting user failed for [%s]: %s\n",
675             PrintablePath(output_path), strerror(errno));
676   }
677 }
678 
NextFile(Context * context)679 static BROTLI_BOOL NextFile(Context* context) {
680   const char* arg;
681   size_t arg_len;
682 
683   /* Iterator points to last used arg; increment to search for the next one. */
684   context->iterator++;
685 
686   context->input_file_length = -1;
687 
688   /* No input path; read from console. */
689   if (context->input_count == 0) {
690     if (context->iterator > 1) return BROTLI_FALSE;
691     context->current_input_path = NULL;
692     /* Either write to the specified path, or to console. */
693     context->current_output_path = context->output_path;
694     return BROTLI_TRUE;
695   }
696 
697   /* Skip option arguments. */
698   while (context->iterator == context->not_input_indices[context->ignore]) {
699     context->iterator++;
700     context->ignore++;
701   }
702 
703   /* All args are scanned already. */
704   if (context->iterator >= context->argc) return BROTLI_FALSE;
705 
706   /* Iterator now points to the input file name. */
707   arg = context->argv[context->iterator];
708   arg_len = strlen(arg);
709   /* Read from console. */
710   if (arg_len == 1 && arg[0] == '-') {
711     context->current_input_path = NULL;
712     context->current_output_path = context->output_path;
713     return BROTLI_TRUE;
714   }
715 
716   context->current_input_path = arg;
717   context->input_file_length = FileSize(arg);
718   context->current_output_path = context->output_path;
719 
720   if (context->output_path) return BROTLI_TRUE;
721   if (context->write_to_stdout) return BROTLI_TRUE;
722 
723   strcpy(context->modified_path, arg);
724   context->current_output_path = context->modified_path;
725   /* If output is not specified, input path suffix should match. */
726   if (context->decompress) {
727     size_t suffix_len = strlen(context->suffix);
728     char* name = (char*)FileName(context->modified_path);
729     char* name_suffix;
730     size_t name_len = strlen(name);
731     if (name_len < suffix_len + 1) {
732       fprintf(stderr, "empty output file name for [%s] input file\n",
733               PrintablePath(arg));
734       context->iterator_error = BROTLI_TRUE;
735       return BROTLI_FALSE;
736     }
737     name_suffix = name + name_len - suffix_len;
738     if (strcmp(context->suffix, name_suffix) != 0) {
739       fprintf(stderr, "input file [%s] suffix mismatch\n",
740               PrintablePath(arg));
741       context->iterator_error = BROTLI_TRUE;
742       return BROTLI_FALSE;
743     }
744     name_suffix[0] = 0;
745     return BROTLI_TRUE;
746   } else {
747     strcpy(context->modified_path + arg_len, context->suffix);
748     return BROTLI_TRUE;
749   }
750 }
751 
OpenFiles(Context * context)752 static BROTLI_BOOL OpenFiles(Context* context) {
753   BROTLI_BOOL is_ok = OpenInputFile(context->current_input_path, &context->fin);
754   if (!context->test_integrity && is_ok) {
755     is_ok = OpenOutputFile(
756         context->current_output_path, &context->fout, context->force_overwrite);
757   }
758   return is_ok;
759 }
760 
CloseFiles(Context * context,BROTLI_BOOL success)761 static BROTLI_BOOL CloseFiles(Context* context, BROTLI_BOOL success) {
762   BROTLI_BOOL is_ok = BROTLI_TRUE;
763   if (!context->test_integrity && context->fout) {
764     if (!success && context->current_output_path) {
765       unlink(context->current_output_path);
766     }
767     if (fclose(context->fout) != 0) {
768       if (success) {
769         fprintf(stderr, "fclose failed [%s]: %s\n",
770                 PrintablePath(context->current_output_path), strerror(errno));
771       }
772       is_ok = BROTLI_FALSE;
773     }
774 
775     /* TOCTOU violation, but otherwise it is impossible to set file times. */
776     if (success && is_ok && context->copy_stat) {
777       CopyStat(context->current_input_path, context->current_output_path);
778     }
779   }
780 
781   if (context->fin) {
782     if (fclose(context->fin) != 0) {
783       if (is_ok) {
784         fprintf(stderr, "fclose failed [%s]: %s\n",
785                 PrintablePath(context->current_input_path), strerror(errno));
786       }
787       is_ok = BROTLI_FALSE;
788     }
789   }
790   if (success && context->junk_source && context->current_input_path) {
791     unlink(context->current_input_path);
792   }
793 
794   context->fin = NULL;
795   context->fout = NULL;
796 
797   return is_ok;
798 }
799 
800 static const size_t kFileBufferSize = 1 << 19;
801 
InitializeBuffers(Context * context)802 static void InitializeBuffers(Context* context) {
803   context->available_in = 0;
804   context->next_in = NULL;
805   context->available_out = kFileBufferSize;
806   context->next_out = context->output;
807   context->total_in = 0;
808   context->total_out = 0;
809 }
810 
811 /* This method might give the false-negative result.
812    However, after an empty / incomplete read it should tell the truth. */
HasMoreInput(Context * context)813 static BROTLI_BOOL HasMoreInput(Context* context) {
814   return feof(context->fin) ? BROTLI_FALSE : BROTLI_TRUE;
815 }
816 
ProvideInput(Context * context)817 static BROTLI_BOOL ProvideInput(Context* context) {
818   context->available_in =
819       fread(context->input, 1, kFileBufferSize, context->fin);
820   context->total_in += context->available_in;
821   context->next_in = context->input;
822   if (ferror(context->fin)) {
823     fprintf(stderr, "failed to read input [%s]: %s\n",
824             PrintablePath(context->current_input_path), strerror(errno));
825     return BROTLI_FALSE;
826   }
827   return BROTLI_TRUE;
828 }
829 
830 /* Internal: should be used only in Provide-/Flush-Output. */
WriteOutput(Context * context)831 static BROTLI_BOOL WriteOutput(Context* context) {
832   size_t out_size = (size_t)(context->next_out - context->output);
833   context->total_out += out_size;
834   if (out_size == 0) return BROTLI_TRUE;
835   if (context->test_integrity) return BROTLI_TRUE;
836 
837   fwrite(context->output, 1, out_size, context->fout);
838   if (ferror(context->fout)) {
839     fprintf(stderr, "failed to write output [%s]: %s\n",
840             PrintablePath(context->current_output_path), strerror(errno));
841     return BROTLI_FALSE;
842   }
843   return BROTLI_TRUE;
844 }
845 
ProvideOutput(Context * context)846 static BROTLI_BOOL ProvideOutput(Context* context) {
847   if (!WriteOutput(context)) return BROTLI_FALSE;
848   context->available_out = kFileBufferSize;
849   context->next_out = context->output;
850   return BROTLI_TRUE;
851 }
852 
FlushOutput(Context * context)853 static BROTLI_BOOL FlushOutput(Context* context) {
854   if (!WriteOutput(context)) return BROTLI_FALSE;
855   context->available_out = 0;
856   return BROTLI_TRUE;
857 }
858 
PrintBytes(size_t value)859 static void PrintBytes(size_t value) {
860   if (value < 1024) {
861     fprintf(stderr, "%d B", (int)value);
862   } else if (value < 1048576) {
863     fprintf(stderr, "%0.3f KiB", (double)value / 1024.0);
864   } else if (value < 1073741824) {
865     fprintf(stderr, "%0.3f MiB", (double)value / 1048576.0);
866   } else {
867     fprintf(stderr, "%0.3f GiB", (double)value / 1073741824.0);
868   }
869 }
870 
PrintFileProcessingProgress(Context * context)871 static void PrintFileProcessingProgress(Context* context) {
872   fprintf(stderr, "[%s]: ", PrintablePath(context->current_input_path));
873   PrintBytes(context->total_in);
874   fprintf(stderr, " -> ");
875   PrintBytes(context->total_out);
876 }
877 
DecompressFile(Context * context,BrotliDecoderState * s)878 static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
879   BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
880   InitializeBuffers(context);
881   for (;;) {
882     if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
883       if (!HasMoreInput(context)) {
884         fprintf(stderr, "corrupt input [%s]\n",
885                 PrintablePath(context->current_input_path));
886         return BROTLI_FALSE;
887       }
888       if (!ProvideInput(context)) return BROTLI_FALSE;
889     } else if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
890       if (!ProvideOutput(context)) return BROTLI_FALSE;
891     } else if (result == BROTLI_DECODER_RESULT_SUCCESS) {
892       if (!FlushOutput(context)) return BROTLI_FALSE;
893       int has_more_input =
894           (context->available_in != 0) || (fgetc(context->fin) != EOF);
895       if (has_more_input) {
896         fprintf(stderr, "corrupt input [%s]\n",
897                 PrintablePath(context->current_input_path));
898         return BROTLI_FALSE;
899       }
900       if (context->verbosity > 0) {
901         fprintf(stderr, "Decompressed ");
902         PrintFileProcessingProgress(context);
903         fprintf(stderr, "\n");
904       }
905       return BROTLI_TRUE;
906     } else {
907       fprintf(stderr, "corrupt input [%s]\n",
908               PrintablePath(context->current_input_path));
909       return BROTLI_FALSE;
910     }
911 
912     result = BrotliDecoderDecompressStream(s, &context->available_in,
913         &context->next_in, &context->available_out, &context->next_out, 0);
914   }
915 }
916 
DecompressFiles(Context * context)917 static BROTLI_BOOL DecompressFiles(Context* context) {
918   while (NextFile(context)) {
919     BROTLI_BOOL is_ok = BROTLI_TRUE;
920     BrotliDecoderState* s = BrotliDecoderCreateInstance(NULL, NULL, NULL);
921     if (!s) {
922       fprintf(stderr, "out of memory\n");
923       return BROTLI_FALSE;
924     }
925     /* This allows decoding "large-window" streams. Though it creates
926        fragmentation (new builds decode streams that old builds don't),
927        it is better from used experience perspective. */
928     BrotliDecoderSetParameter(s, BROTLI_DECODER_PARAM_LARGE_WINDOW, 1u);
929     is_ok = OpenFiles(context);
930     if (is_ok && !context->current_input_path &&
931         !context->force_overwrite && isatty(STDIN_FILENO)) {
932       fprintf(stderr, "Use -h help. Use -f to force input from a terminal.\n");
933       is_ok = BROTLI_FALSE;
934     }
935     if (is_ok) is_ok = DecompressFile(context, s);
936     BrotliDecoderDestroyInstance(s);
937     if (!CloseFiles(context, is_ok)) is_ok = BROTLI_FALSE;
938     if (!is_ok) return BROTLI_FALSE;
939   }
940   return BROTLI_TRUE;
941 }
942 
CompressFile(Context * context,BrotliEncoderState * s)943 static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) {
944   BROTLI_BOOL is_eof = BROTLI_FALSE;
945   InitializeBuffers(context);
946   for (;;) {
947     if (context->available_in == 0 && !is_eof) {
948       if (!ProvideInput(context)) return BROTLI_FALSE;
949       is_eof = !HasMoreInput(context);
950     }
951 
952     if (!BrotliEncoderCompressStream(s,
953         is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
954         &context->available_in, &context->next_in,
955         &context->available_out, &context->next_out, NULL)) {
956       /* Should detect OOM? */
957       fprintf(stderr, "failed to compress data [%s]\n",
958               PrintablePath(context->current_input_path));
959       return BROTLI_FALSE;
960     }
961 
962     if (context->available_out == 0) {
963       if (!ProvideOutput(context)) return BROTLI_FALSE;
964     }
965 
966     if (BrotliEncoderIsFinished(s)) {
967       if (!FlushOutput(context)) return BROTLI_FALSE;
968       if (context->verbosity > 0) {
969         fprintf(stderr, "Compressed ");
970         PrintFileProcessingProgress(context);
971         fprintf(stderr, "\n");
972       }
973       return BROTLI_TRUE;
974     }
975   }
976 }
977 
CompressFiles(Context * context)978 static BROTLI_BOOL CompressFiles(Context* context) {
979   while (NextFile(context)) {
980     BROTLI_BOOL is_ok = BROTLI_TRUE;
981     BrotliEncoderState* s = BrotliEncoderCreateInstance(NULL, NULL, NULL);
982     if (!s) {
983       fprintf(stderr, "out of memory\n");
984       return BROTLI_FALSE;
985     }
986     BrotliEncoderSetParameter(s,
987         BROTLI_PARAM_QUALITY, (uint32_t)context->quality);
988     if (context->lgwin > 0) {
989       /* Specified by user. */
990       /* Do not enable "large-window" extension, if not required. */
991       if (context->lgwin > BROTLI_MAX_WINDOW_BITS) {
992         BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, 1u);
993       }
994       BrotliEncoderSetParameter(s,
995           BROTLI_PARAM_LGWIN, (uint32_t)context->lgwin);
996     } else {
997       /* 0, or not specified by user; could be chosen by compressor. */
998       uint32_t lgwin = DEFAULT_LGWIN;
999       /* Use file size to limit lgwin. */
1000       if (context->input_file_length >= 0) {
1001         lgwin = BROTLI_MIN_WINDOW_BITS;
1002         while (BROTLI_MAX_BACKWARD_LIMIT(lgwin) <
1003                (uint64_t)context->input_file_length) {
1004           lgwin++;
1005           if (lgwin == BROTLI_MAX_WINDOW_BITS) break;
1006         }
1007       }
1008       BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, lgwin);
1009     }
1010     if (context->input_file_length > 0) {
1011       uint32_t size_hint = context->input_file_length < (1 << 30) ?
1012           (uint32_t)context->input_file_length : (1u << 30);
1013       BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, size_hint);
1014     }
1015     is_ok = OpenFiles(context);
1016     if (is_ok && !context->current_output_path &&
1017         !context->force_overwrite && isatty(STDOUT_FILENO)) {
1018       fprintf(stderr, "Use -h help. Use -f to force output to a terminal.\n");
1019       is_ok = BROTLI_FALSE;
1020     }
1021     if (is_ok) is_ok = CompressFile(context, s);
1022     BrotliEncoderDestroyInstance(s);
1023     if (!CloseFiles(context, is_ok)) is_ok = BROTLI_FALSE;
1024     if (!is_ok) return BROTLI_FALSE;
1025   }
1026   return BROTLI_TRUE;
1027 }
1028 
main(int argc,char ** argv)1029 int main(int argc, char** argv) {
1030   Command command;
1031   Context context;
1032   BROTLI_BOOL is_ok = BROTLI_TRUE;
1033   int i;
1034 
1035   context.quality = 11;
1036   context.lgwin = -1;
1037   context.verbosity = 0;
1038   context.force_overwrite = BROTLI_FALSE;
1039   context.junk_source = BROTLI_FALSE;
1040   context.copy_stat = BROTLI_TRUE;
1041   context.test_integrity = BROTLI_FALSE;
1042   context.write_to_stdout = BROTLI_FALSE;
1043   context.decompress = BROTLI_FALSE;
1044   context.large_window = BROTLI_FALSE;
1045   context.output_path = NULL;
1046   context.suffix = DEFAULT_SUFFIX;
1047   for (i = 0; i < MAX_OPTIONS; ++i) context.not_input_indices[i] = 0;
1048   context.longest_path_len = 1;
1049   context.input_count = 0;
1050 
1051   context.argc = argc;
1052   context.argv = argv;
1053   context.modified_path = NULL;
1054   context.iterator = 0;
1055   context.ignore = 0;
1056   context.iterator_error = BROTLI_FALSE;
1057   context.buffer = NULL;
1058   context.current_input_path = NULL;
1059   context.current_output_path = NULL;
1060   context.fin = NULL;
1061   context.fout = NULL;
1062 
1063   command = ParseParams(&context);
1064 
1065   if (command == COMMAND_COMPRESS || command == COMMAND_DECOMPRESS ||
1066       command == COMMAND_TEST_INTEGRITY) {
1067     if (is_ok) {
1068       size_t modified_path_len =
1069           context.longest_path_len + strlen(context.suffix) + 1;
1070       context.modified_path = (char*)malloc(modified_path_len);
1071       context.buffer = (uint8_t*)malloc(kFileBufferSize * 2);
1072       if (!context.modified_path || !context.buffer) {
1073         fprintf(stderr, "out of memory\n");
1074         is_ok = BROTLI_FALSE;
1075       } else {
1076         context.input = context.buffer;
1077         context.output = context.buffer + kFileBufferSize;
1078       }
1079     }
1080   }
1081 
1082   if (!is_ok) command = COMMAND_NOOP;
1083 
1084   switch (command) {
1085     case COMMAND_NOOP:
1086       break;
1087 
1088     case COMMAND_VERSION:
1089       PrintVersion();
1090       break;
1091 
1092     case COMMAND_COMPRESS:
1093       is_ok = CompressFiles(&context);
1094       break;
1095 
1096     case COMMAND_DECOMPRESS:
1097     case COMMAND_TEST_INTEGRITY:
1098       is_ok = DecompressFiles(&context);
1099       break;
1100 
1101     case COMMAND_HELP:
1102     case COMMAND_INVALID:
1103     default:
1104       is_ok = (command == COMMAND_HELP);
1105       PrintHelp(FileName(argv[0]), is_ok);
1106       break;
1107   }
1108 
1109   if (context.iterator_error) is_ok = BROTLI_FALSE;
1110 
1111   free(context.modified_path);
1112   free(context.buffer);
1113 
1114   if (!is_ok) exit(1);
1115   return 0;
1116 }
1117