1*01826a49SYabin Cui /*
2*01826a49SYabin Cui * Copyright (c) Meta Platforms, Inc. and affiliates.
3*01826a49SYabin Cui * All rights reserved.
4*01826a49SYabin Cui *
5*01826a49SYabin Cui * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui */
10*01826a49SYabin Cui
11*01826a49SYabin Cui #include <string.h>
12*01826a49SYabin Cui #include <stdio.h>
13*01826a49SYabin Cui #include <stdlib.h>
14*01826a49SYabin Cui
15*01826a49SYabin Cui #define ZSTD_STATIC_LINKING_ONLY
16*01826a49SYabin Cui #include "util.h"
17*01826a49SYabin Cui #include "zstd.h"
18*01826a49SYabin Cui
19*01826a49SYabin Cui #define CHECK(cond, ...) \
20*01826a49SYabin Cui do { \
21*01826a49SYabin Cui if (!(cond)) { \
22*01826a49SYabin Cui fprintf(stderr, "%s:%d CHECK(%s) failed: ", __FILE__, __LINE__, #cond); \
23*01826a49SYabin Cui fprintf(stderr, "" __VA_ARGS__); \
24*01826a49SYabin Cui fprintf(stderr, "\n"); \
25*01826a49SYabin Cui exit(1); \
26*01826a49SYabin Cui } \
27*01826a49SYabin Cui } while (0)
28*01826a49SYabin Cui
usage(char const * program)29*01826a49SYabin Cui static void usage(char const *program) {
30*01826a49SYabin Cui fprintf(stderr, "USAGE: %s FILE.zst PREFIX\n", program);
31*01826a49SYabin Cui fprintf(stderr, "FILE.zst: A zstd compressed file with multiple frames\n");
32*01826a49SYabin Cui fprintf(stderr, "PREFIX: The output prefix. Uncompressed files will be "
33*01826a49SYabin Cui "created named ${PREFIX}0 ${PREFIX}1...\n\n");
34*01826a49SYabin Cui fprintf(stderr, "This program takes concatenated zstd frames and "
35*01826a49SYabin Cui "decompresses them into individual files.\n");
36*01826a49SYabin Cui fprintf(stderr, "E.g. files created with a command like: zstd -r directory "
37*01826a49SYabin Cui "-o file.zst\n");
38*01826a49SYabin Cui }
39*01826a49SYabin Cui
40*01826a49SYabin Cui typedef struct {
41*01826a49SYabin Cui char *data;
42*01826a49SYabin Cui size_t size;
43*01826a49SYabin Cui size_t frames;
44*01826a49SYabin Cui size_t maxFrameSize;
45*01826a49SYabin Cui } ZstdFrames;
46*01826a49SYabin Cui
readFile(char const * fileName)47*01826a49SYabin Cui static ZstdFrames readFile(char const *fileName) {
48*01826a49SYabin Cui U64 const fileSize = UTIL_getFileSize(fileName);
49*01826a49SYabin Cui CHECK(fileSize != UTIL_FILESIZE_UNKNOWN, "Unknown file size!");
50*01826a49SYabin Cui
51*01826a49SYabin Cui char *const data = (char *)malloc(fileSize);
52*01826a49SYabin Cui CHECK(data != NULL, "Allocation failed");
53*01826a49SYabin Cui
54*01826a49SYabin Cui FILE *file = fopen(fileName, "rb");
55*01826a49SYabin Cui CHECK(file != NULL, "fopen failed");
56*01826a49SYabin Cui
57*01826a49SYabin Cui size_t const readSize = fread(data, 1, fileSize, file);
58*01826a49SYabin Cui CHECK(readSize == fileSize, "fread failed");
59*01826a49SYabin Cui
60*01826a49SYabin Cui fclose(file);
61*01826a49SYabin Cui ZstdFrames frames;
62*01826a49SYabin Cui frames.data = (char *)data;
63*01826a49SYabin Cui frames.size = fileSize;
64*01826a49SYabin Cui frames.frames = 0;
65*01826a49SYabin Cui
66*01826a49SYabin Cui size_t index;
67*01826a49SYabin Cui size_t maxFrameSize = 0;
68*01826a49SYabin Cui for (index = 0; index < fileSize;) {
69*01826a49SYabin Cui size_t const frameSize =
70*01826a49SYabin Cui ZSTD_findFrameCompressedSize(data + index, fileSize - index);
71*01826a49SYabin Cui CHECK(!ZSTD_isError(frameSize), "Bad zstd frame: %s",
72*01826a49SYabin Cui ZSTD_getErrorName(frameSize));
73*01826a49SYabin Cui if (frameSize > maxFrameSize)
74*01826a49SYabin Cui maxFrameSize = frameSize;
75*01826a49SYabin Cui frames.frames += 1;
76*01826a49SYabin Cui index += frameSize;
77*01826a49SYabin Cui }
78*01826a49SYabin Cui CHECK(index == fileSize, "Zstd file corrupt!");
79*01826a49SYabin Cui frames.maxFrameSize = maxFrameSize;
80*01826a49SYabin Cui
81*01826a49SYabin Cui return frames;
82*01826a49SYabin Cui }
83*01826a49SYabin Cui
computePadding(size_t numFrames)84*01826a49SYabin Cui static int computePadding(size_t numFrames) {
85*01826a49SYabin Cui return snprintf(NULL, 0, "%u", (unsigned)numFrames);
86*01826a49SYabin Cui }
87*01826a49SYabin Cui
main(int argc,char ** argv)88*01826a49SYabin Cui int main(int argc, char **argv) {
89*01826a49SYabin Cui if (argc != 3) {
90*01826a49SYabin Cui usage(argv[0]);
91*01826a49SYabin Cui exit(1);
92*01826a49SYabin Cui }
93*01826a49SYabin Cui char const *const zstdFile = argv[1];
94*01826a49SYabin Cui char const *const prefix = argv[2];
95*01826a49SYabin Cui
96*01826a49SYabin Cui ZstdFrames frames = readFile(zstdFile);
97*01826a49SYabin Cui
98*01826a49SYabin Cui if (frames.frames <= 1) {
99*01826a49SYabin Cui fprintf(
100*01826a49SYabin Cui stderr,
101*01826a49SYabin Cui "%s only has %u zstd frame. Simply use `zstd -d` to decompress it.\n",
102*01826a49SYabin Cui zstdFile, (unsigned)frames.frames);
103*01826a49SYabin Cui exit(1);
104*01826a49SYabin Cui }
105*01826a49SYabin Cui
106*01826a49SYabin Cui int const padding = computePadding(frames.frames - 1);
107*01826a49SYabin Cui
108*01826a49SYabin Cui size_t const outFileNameSize = strlen(prefix) + padding + 1;
109*01826a49SYabin Cui char* outFileName = malloc(outFileNameSize);
110*01826a49SYabin Cui CHECK(outFileName != NULL, "Allocation failure");
111*01826a49SYabin Cui
112*01826a49SYabin Cui size_t const bufferSize = 128 * 1024;
113*01826a49SYabin Cui void *buffer = malloc(bufferSize);
114*01826a49SYabin Cui CHECK(buffer != NULL, "Allocation failure");
115*01826a49SYabin Cui
116*01826a49SYabin Cui ZSTD_DCtx* dctx = ZSTD_createDCtx();
117*01826a49SYabin Cui CHECK(dctx != NULL, "Allocation failure");
118*01826a49SYabin Cui
119*01826a49SYabin Cui fprintf(stderr, "Recovering %u files...\n", (unsigned)frames.frames);
120*01826a49SYabin Cui
121*01826a49SYabin Cui size_t index;
122*01826a49SYabin Cui size_t frame = 0;
123*01826a49SYabin Cui for (index = 0; index < frames.size; ++frame) {
124*01826a49SYabin Cui size_t const frameSize =
125*01826a49SYabin Cui ZSTD_findFrameCompressedSize(frames.data + index, frames.size - index);
126*01826a49SYabin Cui
127*01826a49SYabin Cui int const ret = snprintf(outFileName, outFileNameSize, "%s%0*u", prefix, padding, (unsigned)frame);
128*01826a49SYabin Cui CHECK(ret >= 0 && (size_t)ret <= outFileNameSize, "snprintf failed!");
129*01826a49SYabin Cui
130*01826a49SYabin Cui FILE* outFile = fopen(outFileName, "wb");
131*01826a49SYabin Cui CHECK(outFile != NULL, "fopen failed");
132*01826a49SYabin Cui
133*01826a49SYabin Cui ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only);
134*01826a49SYabin Cui ZSTD_inBuffer in = {frames.data + index, frameSize, 0};
135*01826a49SYabin Cui while (in.pos < in.size) {
136*01826a49SYabin Cui ZSTD_outBuffer out = {buffer, bufferSize, 0};
137*01826a49SYabin Cui CHECK(!ZSTD_isError(ZSTD_decompressStream(dctx, &out, &in)), "decompression failed");
138*01826a49SYabin Cui size_t const writeSize = fwrite(out.dst, 1, out.pos, outFile);
139*01826a49SYabin Cui CHECK(writeSize == out.pos, "fwrite failed");
140*01826a49SYabin Cui }
141*01826a49SYabin Cui fclose(outFile);
142*01826a49SYabin Cui fprintf(stderr, "Recovered %s\n", outFileName);
143*01826a49SYabin Cui index += frameSize;
144*01826a49SYabin Cui }
145*01826a49SYabin Cui fprintf(stderr, "Complete\n");
146*01826a49SYabin Cui
147*01826a49SYabin Cui free(outFileName);
148*01826a49SYabin Cui ZSTD_freeDCtx(dctx);
149*01826a49SYabin Cui free(buffer);
150*01826a49SYabin Cui free(frames.data);
151*01826a49SYabin Cui return 0;
152*01826a49SYabin Cui }
153