xref: /aosp_15_r20/external/zstd/tests/decodecorpus.c (revision 01826a4963a0d8a59bc3812d29bdf0fb76416722)
1*01826a49SYabin Cui /*
2*01826a49SYabin Cui  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*01826a49SYabin Cui  * All rights reserved.
4*01826a49SYabin Cui  *
5*01826a49SYabin Cui  * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui  * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui  * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui  */
10*01826a49SYabin Cui 
11*01826a49SYabin Cui #include <limits.h>
12*01826a49SYabin Cui #include <math.h>
13*01826a49SYabin Cui #include <stddef.h>
14*01826a49SYabin Cui #include <stdio.h>
15*01826a49SYabin Cui #include <stdlib.h>
16*01826a49SYabin Cui #include <string.h>
17*01826a49SYabin Cui #include <time.h>  /* time(), for seed random initialization */
18*01826a49SYabin Cui 
19*01826a49SYabin Cui #include "util.h"
20*01826a49SYabin Cui #include "timefn.h"   /* UTIL_clockSpanMicro, SEC_TO_MICRO, UTIL_TIME_INITIALIZER */
21*01826a49SYabin Cui #include "zstd.h"
22*01826a49SYabin Cui #include "zstd_internal.h"
23*01826a49SYabin Cui #include "mem.h"
24*01826a49SYabin Cui #define ZDICT_STATIC_LINKING_ONLY
25*01826a49SYabin Cui #include "zdict.h"
26*01826a49SYabin Cui 
27*01826a49SYabin Cui /* Direct access to internal compression functions is required */
28*01826a49SYabin Cui #include "compress/zstd_compress.c" /* ZSTD_resetSeqStore, ZSTD_storeSeq, *_TO_OFFBASE, HIST_countFast_wksp, HIST_isError */
29*01826a49SYabin Cui #include "decompress/zstd_decompress_block.h" /* ZSTD_decompressBlock_deprecated */
30*01826a49SYabin Cui 
31*01826a49SYabin Cui #define XXH_STATIC_LINKING_ONLY
32*01826a49SYabin Cui #include "xxhash.h"     /* XXH64 */
33*01826a49SYabin Cui 
34*01826a49SYabin Cui #if !(defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */))
35*01826a49SYabin Cui # define inline  /* disable */
36*01826a49SYabin Cui #endif
37*01826a49SYabin Cui 
38*01826a49SYabin Cui /*-************************************
39*01826a49SYabin Cui *  DISPLAY Macros
40*01826a49SYabin Cui **************************************/
41*01826a49SYabin Cui #define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
42*01826a49SYabin Cui #define DISPLAYLEVEL(l, ...)  if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
43*01826a49SYabin Cui static U32 g_displayLevel = 2;
44*01826a49SYabin Cui 
45*01826a49SYabin Cui #define DISPLAYUPDATE(...)                                                     \
46*01826a49SYabin Cui     do {                                                                       \
47*01826a49SYabin Cui         if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) ||           \
48*01826a49SYabin Cui             (g_displayLevel >= 4)) {                                           \
49*01826a49SYabin Cui             g_displayClock = UTIL_getTime();                                   \
50*01826a49SYabin Cui             DISPLAY(__VA_ARGS__);                                              \
51*01826a49SYabin Cui             if (g_displayLevel >= 4) fflush(stderr);                           \
52*01826a49SYabin Cui         }                                                                      \
53*01826a49SYabin Cui     } while (0)
54*01826a49SYabin Cui 
55*01826a49SYabin Cui static const U64 g_refreshRate = SEC_TO_MICRO / 6;
56*01826a49SYabin Cui static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
57*01826a49SYabin Cui 
58*01826a49SYabin Cui #define CHECKERR(code)                                                         \
59*01826a49SYabin Cui     do {                                                                       \
60*01826a49SYabin Cui         if (ZSTD_isError(code)) {                                              \
61*01826a49SYabin Cui             DISPLAY("Error occurred while generating data: %s\n",              \
62*01826a49SYabin Cui                     ZSTD_getErrorName(code));                                  \
63*01826a49SYabin Cui             exit(1);                                                           \
64*01826a49SYabin Cui         }                                                                      \
65*01826a49SYabin Cui     } while (0)
66*01826a49SYabin Cui 
67*01826a49SYabin Cui 
68*01826a49SYabin Cui /*-*******************************************************
69*01826a49SYabin Cui *  Random function
70*01826a49SYabin Cui *********************************************************/
RAND(U32 * src)71*01826a49SYabin Cui static U32 RAND(U32* src)
72*01826a49SYabin Cui {
73*01826a49SYabin Cui #define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r)))
74*01826a49SYabin Cui     static const U32 prime1 = 2654435761U;
75*01826a49SYabin Cui     static const U32 prime2 = 2246822519U;
76*01826a49SYabin Cui     U32 rand32 = *src;
77*01826a49SYabin Cui     rand32 *= prime1;
78*01826a49SYabin Cui     rand32 += prime2;
79*01826a49SYabin Cui     rand32  = RAND_rotl32(rand32, 13);
80*01826a49SYabin Cui     *src = rand32;
81*01826a49SYabin Cui     return RAND_rotl32(rand32, 27);
82*01826a49SYabin Cui #undef RAND_rotl32
83*01826a49SYabin Cui }
84*01826a49SYabin Cui 
85*01826a49SYabin Cui #define DISTSIZE (8192)
86*01826a49SYabin Cui 
87*01826a49SYabin Cui /* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */
RAND_bufferMaxSymb(U32 * seed,void * ptr,size_t size,int maxSymb)88*01826a49SYabin Cui static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb)
89*01826a49SYabin Cui {
90*01826a49SYabin Cui     size_t i;
91*01826a49SYabin Cui     BYTE* op = ptr;
92*01826a49SYabin Cui 
93*01826a49SYabin Cui     for (i = 0; i < size; i++) {
94*01826a49SYabin Cui         op[i] = (BYTE) (RAND(seed) % (maxSymb + 1));
95*01826a49SYabin Cui     }
96*01826a49SYabin Cui }
97*01826a49SYabin Cui 
98*01826a49SYabin Cui /* Write `size` random bytes into `ptr` */
RAND_buffer(U32 * seed,void * ptr,size_t size)99*01826a49SYabin Cui static void RAND_buffer(U32* seed, void* ptr, size_t size)
100*01826a49SYabin Cui {
101*01826a49SYabin Cui     size_t i;
102*01826a49SYabin Cui     BYTE* op = ptr;
103*01826a49SYabin Cui 
104*01826a49SYabin Cui     for (i = 0; i + 4 <= size; i += 4) {
105*01826a49SYabin Cui         MEM_writeLE32(op + i, RAND(seed));
106*01826a49SYabin Cui     }
107*01826a49SYabin Cui     for (; i < size; i++) {
108*01826a49SYabin Cui         op[i] = RAND(seed) & 0xff;
109*01826a49SYabin Cui     }
110*01826a49SYabin Cui }
111*01826a49SYabin Cui 
112*01826a49SYabin Cui /* Write `size` bytes into `ptr` following the distribution `dist` */
RAND_bufferDist(U32 * seed,BYTE * dist,void * ptr,size_t size)113*01826a49SYabin Cui static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size)
114*01826a49SYabin Cui {
115*01826a49SYabin Cui     size_t i;
116*01826a49SYabin Cui     BYTE* op = ptr;
117*01826a49SYabin Cui 
118*01826a49SYabin Cui     for (i = 0; i < size; i++) {
119*01826a49SYabin Cui         op[i] = dist[RAND(seed) % DISTSIZE];
120*01826a49SYabin Cui     }
121*01826a49SYabin Cui }
122*01826a49SYabin Cui 
123*01826a49SYabin Cui /* Generate a random distribution where the frequency of each symbol follows a
124*01826a49SYabin Cui  * geometric distribution defined by `weight`
125*01826a49SYabin Cui  * `dist` should have size at least `DISTSIZE` */
RAND_genDist(U32 * seed,BYTE * dist,double weight)126*01826a49SYabin Cui static void RAND_genDist(U32* seed, BYTE* dist, double weight)
127*01826a49SYabin Cui {
128*01826a49SYabin Cui     size_t i = 0;
129*01826a49SYabin Cui     size_t statesLeft = DISTSIZE;
130*01826a49SYabin Cui     BYTE symb = (BYTE) (RAND(seed) % 256);
131*01826a49SYabin Cui     BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */
132*01826a49SYabin Cui 
133*01826a49SYabin Cui     while (i < DISTSIZE) {
134*01826a49SYabin Cui         size_t states = ((size_t)(weight * (double)statesLeft)) + 1;
135*01826a49SYabin Cui         size_t j;
136*01826a49SYabin Cui         for (j = 0; j < states && i < DISTSIZE; j++, i++) {
137*01826a49SYabin Cui             dist[i] = symb;
138*01826a49SYabin Cui         }
139*01826a49SYabin Cui 
140*01826a49SYabin Cui         symb += step;
141*01826a49SYabin Cui         statesLeft -= states;
142*01826a49SYabin Cui     }
143*01826a49SYabin Cui }
144*01826a49SYabin Cui 
145*01826a49SYabin Cui /* Generates a random number in the range [min, max) */
RAND_range(U32 * seed,U32 min,U32 max)146*01826a49SYabin Cui static inline U32 RAND_range(U32* seed, U32 min, U32 max)
147*01826a49SYabin Cui {
148*01826a49SYabin Cui     return (RAND(seed) % (max-min)) + min;
149*01826a49SYabin Cui }
150*01826a49SYabin Cui 
151*01826a49SYabin Cui #define ROUND(x) ((U32)(x + 0.5))
152*01826a49SYabin Cui 
153*01826a49SYabin Cui /* Generates a random number in an exponential distribution with mean `mean` */
RAND_exp(U32 * seed,double mean)154*01826a49SYabin Cui static double RAND_exp(U32* seed, double mean)
155*01826a49SYabin Cui {
156*01826a49SYabin Cui     double const u = RAND(seed) / (double) UINT_MAX;
157*01826a49SYabin Cui     return log(1-u) * (-mean);
158*01826a49SYabin Cui }
159*01826a49SYabin Cui 
160*01826a49SYabin Cui /*-*******************************************************
161*01826a49SYabin Cui *  Constants and Structs
162*01826a49SYabin Cui *********************************************************/
163*01826a49SYabin Cui const char* BLOCK_TYPES[] = {"raw", "rle", "compressed"};
164*01826a49SYabin Cui 
165*01826a49SYabin Cui #define MAX_DECOMPRESSED_SIZE_LOG 20
166*01826a49SYabin Cui #define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG)
167*01826a49SYabin Cui 
168*01826a49SYabin Cui #define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */
169*01826a49SYabin Cui 
170*01826a49SYabin Cui #define MIN_SEQ_LEN (3)
171*01826a49SYabin Cui #define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN)
172*01826a49SYabin Cui 
173*01826a49SYabin Cui #ifndef MAX_PATH
174*01826a49SYabin Cui     #ifdef PATH_MAX
175*01826a49SYabin Cui         #define MAX_PATH PATH_MAX
176*01826a49SYabin Cui     #else
177*01826a49SYabin Cui         #define MAX_PATH 256
178*01826a49SYabin Cui     #endif
179*01826a49SYabin Cui #endif
180*01826a49SYabin Cui 
181*01826a49SYabin Cui BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE];
182*01826a49SYabin Cui BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2];
183*01826a49SYabin Cui BYTE LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX];
184*01826a49SYabin Cui 
185*01826a49SYabin Cui seqDef SEQUENCE_BUFFER[MAX_NB_SEQ];
186*01826a49SYabin Cui BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to copy literals to */
187*01826a49SYabin Cui BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX];
188*01826a49SYabin Cui BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX];
189*01826a49SYabin Cui BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX];
190*01826a49SYabin Cui 
191*01826a49SYabin Cui U64 WKSP[HUF_WORKSPACE_SIZE_U64];
192*01826a49SYabin Cui 
193*01826a49SYabin Cui typedef struct {
194*01826a49SYabin Cui     size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */
195*01826a49SYabin Cui     unsigned windowSize; /* contentSize >= windowSize means single segment */
196*01826a49SYabin Cui } frameHeader_t;
197*01826a49SYabin Cui 
198*01826a49SYabin Cui /* For repeat modes */
199*01826a49SYabin Cui typedef struct {
200*01826a49SYabin Cui     U32 rep[ZSTD_REP_NUM];
201*01826a49SYabin Cui 
202*01826a49SYabin Cui     int hufInit;
203*01826a49SYabin Cui     /* the distribution used in the previous block for repeat mode */
204*01826a49SYabin Cui     BYTE hufDist[DISTSIZE];
205*01826a49SYabin Cui     HUF_CElt hufTable [HUF_CTABLE_SIZE_ST(255)];
206*01826a49SYabin Cui 
207*01826a49SYabin Cui     int fseInit;
208*01826a49SYabin Cui     FSE_CTable offcodeCTable  [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
209*01826a49SYabin Cui     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
210*01826a49SYabin Cui     FSE_CTable litlengthCTable  [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
211*01826a49SYabin Cui 
212*01826a49SYabin Cui     /* Symbols that were present in the previous distribution, for use with
213*01826a49SYabin Cui      * set_repeat */
214*01826a49SYabin Cui     BYTE litlengthSymbolSet[36];
215*01826a49SYabin Cui     BYTE offsetSymbolSet[29];
216*01826a49SYabin Cui     BYTE matchlengthSymbolSet[53];
217*01826a49SYabin Cui } cblockStats_t;
218*01826a49SYabin Cui 
219*01826a49SYabin Cui typedef struct {
220*01826a49SYabin Cui     void* data;
221*01826a49SYabin Cui     void* dataStart;
222*01826a49SYabin Cui     void* dataEnd;
223*01826a49SYabin Cui 
224*01826a49SYabin Cui     void* src;
225*01826a49SYabin Cui     void* srcStart;
226*01826a49SYabin Cui     void* srcEnd;
227*01826a49SYabin Cui 
228*01826a49SYabin Cui     frameHeader_t header;
229*01826a49SYabin Cui 
230*01826a49SYabin Cui     cblockStats_t stats;
231*01826a49SYabin Cui     cblockStats_t oldStats; /* so they can be rolled back if uncompressible */
232*01826a49SYabin Cui } frame_t;
233*01826a49SYabin Cui 
234*01826a49SYabin Cui typedef struct {
235*01826a49SYabin Cui     int useDict;
236*01826a49SYabin Cui     U32 dictID;
237*01826a49SYabin Cui     size_t dictContentSize;
238*01826a49SYabin Cui     BYTE* dictContent;
239*01826a49SYabin Cui } dictInfo;
240*01826a49SYabin Cui 
241*01826a49SYabin Cui typedef enum {
242*01826a49SYabin Cui   gt_frame = 0,  /* generate frames */
243*01826a49SYabin Cui   gt_block,      /* generate compressed blocks without block/frame headers */
244*01826a49SYabin Cui } genType_e;
245*01826a49SYabin Cui 
246*01826a49SYabin Cui #ifndef MIN
247*01826a49SYabin Cui     #define MIN(a, b) ((a) < (b) ? (a) : (b))
248*01826a49SYabin Cui #endif
249*01826a49SYabin Cui 
250*01826a49SYabin Cui /*-*******************************************************
251*01826a49SYabin Cui *  Global variables (set from command line)
252*01826a49SYabin Cui *********************************************************/
253*01826a49SYabin Cui U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG;  /* <= 20 */
254*01826a49SYabin Cui U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX;                       /* <= 128 KB */
255*01826a49SYabin Cui 
256*01826a49SYabin Cui /*-*******************************************************
257*01826a49SYabin Cui *  Generator Functions
258*01826a49SYabin Cui *********************************************************/
259*01826a49SYabin Cui 
260*01826a49SYabin Cui struct {
261*01826a49SYabin Cui     int contentSize; /* force the content size to be present */
262*01826a49SYabin Cui } opts; /* advanced options on generation */
263*01826a49SYabin Cui 
264*01826a49SYabin Cui /* Generate and write a random frame header */
writeFrameHeader(U32 * seed,frame_t * frame,dictInfo info)265*01826a49SYabin Cui static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
266*01826a49SYabin Cui {
267*01826a49SYabin Cui     BYTE* const op = frame->data;
268*01826a49SYabin Cui     size_t pos = 0;
269*01826a49SYabin Cui     frameHeader_t fh;
270*01826a49SYabin Cui 
271*01826a49SYabin Cui     BYTE windowByte = 0;
272*01826a49SYabin Cui 
273*01826a49SYabin Cui     int singleSegment = 0;
274*01826a49SYabin Cui     int contentSizeFlag = 0;
275*01826a49SYabin Cui     int fcsCode = 0;
276*01826a49SYabin Cui 
277*01826a49SYabin Cui     memset(&fh, 0, sizeof(fh));
278*01826a49SYabin Cui 
279*01826a49SYabin Cui     /* generate window size */
280*01826a49SYabin Cui     {
281*01826a49SYabin Cui         /* Follow window algorithm from specification */
282*01826a49SYabin Cui         int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10);
283*01826a49SYabin Cui         int const mantissa = RAND(seed) % 8;
284*01826a49SYabin Cui         windowByte = (BYTE) ((exponent << 3) | mantissa);
285*01826a49SYabin Cui         fh.windowSize = (1U << (exponent + 10));
286*01826a49SYabin Cui         fh.windowSize += fh.windowSize / 8 * mantissa;
287*01826a49SYabin Cui     }
288*01826a49SYabin Cui 
289*01826a49SYabin Cui     {
290*01826a49SYabin Cui         /* Generate random content size */
291*01826a49SYabin Cui         size_t highBit;
292*01826a49SYabin Cui         if (RAND(seed) & 7 && g_maxDecompressedSizeLog > 7) {
293*01826a49SYabin Cui             /* do content of at least 128 bytes */
294*01826a49SYabin Cui             highBit = 1ULL << RAND_range(seed, 7, g_maxDecompressedSizeLog);
295*01826a49SYabin Cui         } else if (RAND(seed) & 3) {
296*01826a49SYabin Cui             /* do small content */
297*01826a49SYabin Cui             highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog));
298*01826a49SYabin Cui         } else {
299*01826a49SYabin Cui             /* 0 size frame */
300*01826a49SYabin Cui             highBit = 0;
301*01826a49SYabin Cui         }
302*01826a49SYabin Cui         fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0;
303*01826a49SYabin Cui 
304*01826a49SYabin Cui         /* provide size sometimes */
305*01826a49SYabin Cui         contentSizeFlag = opts.contentSize | (RAND(seed) & 1);
306*01826a49SYabin Cui 
307*01826a49SYabin Cui         if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) {
308*01826a49SYabin Cui             /* do single segment sometimes */
309*01826a49SYabin Cui             fh.windowSize = (U32) fh.contentSize;
310*01826a49SYabin Cui             singleSegment = 1;
311*01826a49SYabin Cui         }
312*01826a49SYabin Cui     }
313*01826a49SYabin Cui 
314*01826a49SYabin Cui     if (contentSizeFlag) {
315*01826a49SYabin Cui         /* Determine how large fcs field has to be */
316*01826a49SYabin Cui         int minFcsCode = (fh.contentSize >= 256) +
317*01826a49SYabin Cui                                (fh.contentSize >= 65536 + 256) +
318*01826a49SYabin Cui                                (fh.contentSize > 0xFFFFFFFFU);
319*01826a49SYabin Cui         if (!singleSegment && !minFcsCode) {
320*01826a49SYabin Cui             minFcsCode = 1;
321*01826a49SYabin Cui         }
322*01826a49SYabin Cui         fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode));
323*01826a49SYabin Cui         if (fcsCode == 1 && fh.contentSize < 256) fcsCode++;
324*01826a49SYabin Cui     }
325*01826a49SYabin Cui 
326*01826a49SYabin Cui     /* write out the header */
327*01826a49SYabin Cui     MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER);
328*01826a49SYabin Cui     pos += 4;
329*01826a49SYabin Cui 
330*01826a49SYabin Cui     {
331*01826a49SYabin Cui         /*
332*01826a49SYabin Cui          * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6)
333*01826a49SYabin Cui          * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5)
334*01826a49SYabin Cui          * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2)
335*01826a49SYabin Cui          * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0)
336*01826a49SYabin Cui          * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header
337*01826a49SYabin Cui          */
338*01826a49SYabin Cui         int const dictBits = info.useDict ? 3 : 0;
339*01826a49SYabin Cui         BYTE const frameHeaderDescriptor =
340*01826a49SYabin Cui                 (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits);
341*01826a49SYabin Cui         op[pos++] = frameHeaderDescriptor;
342*01826a49SYabin Cui     }
343*01826a49SYabin Cui 
344*01826a49SYabin Cui     if (!singleSegment) {
345*01826a49SYabin Cui         op[pos++] = windowByte;
346*01826a49SYabin Cui     }
347*01826a49SYabin Cui     if (info.useDict) {
348*01826a49SYabin Cui         MEM_writeLE32(op + pos, (U32) info.dictID);
349*01826a49SYabin Cui         pos += 4;
350*01826a49SYabin Cui     }
351*01826a49SYabin Cui     if (contentSizeFlag) {
352*01826a49SYabin Cui         switch (fcsCode) {
353*01826a49SYabin Cui         default: /* Impossible */
354*01826a49SYabin Cui         case 0: op[pos++] = (BYTE) fh.contentSize; break;
355*01826a49SYabin Cui         case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break;
356*01826a49SYabin Cui         case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break;
357*01826a49SYabin Cui         case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break;
358*01826a49SYabin Cui         }
359*01826a49SYabin Cui     }
360*01826a49SYabin Cui 
361*01826a49SYabin Cui     DISPLAYLEVEL(3, " frame content size:\t%u\n", (unsigned)fh.contentSize);
362*01826a49SYabin Cui     DISPLAYLEVEL(3, " frame window size:\t%u\n", fh.windowSize);
363*01826a49SYabin Cui     DISPLAYLEVEL(3, " content size flag:\t%d\n", contentSizeFlag);
364*01826a49SYabin Cui     DISPLAYLEVEL(3, " single segment flag:\t%d\n", singleSegment);
365*01826a49SYabin Cui 
366*01826a49SYabin Cui     frame->data = op + pos;
367*01826a49SYabin Cui     frame->header = fh;
368*01826a49SYabin Cui }
369*01826a49SYabin Cui 
370*01826a49SYabin Cui /* Write a literal block in either raw or RLE form, return the literals size */
writeLiteralsBlockSimple(U32 * seed,frame_t * frame,size_t contentSize)371*01826a49SYabin Cui static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize)
372*01826a49SYabin Cui {
373*01826a49SYabin Cui     BYTE* op = (BYTE*)frame->data;
374*01826a49SYabin Cui     int const type = RAND(seed) % 2;
375*01826a49SYabin Cui     int const sizeFormatDesc = RAND(seed) % 8;
376*01826a49SYabin Cui     size_t litSize;
377*01826a49SYabin Cui     size_t maxLitSize = MIN(contentSize, g_maxBlockSize);
378*01826a49SYabin Cui 
379*01826a49SYabin Cui     if (sizeFormatDesc == 0) {
380*01826a49SYabin Cui         /* Size_FormatDesc = ?0 */
381*01826a49SYabin Cui         maxLitSize = MIN(maxLitSize, 31);
382*01826a49SYabin Cui     } else if (sizeFormatDesc <= 4) {
383*01826a49SYabin Cui         /* Size_FormatDesc = 01 */
384*01826a49SYabin Cui         maxLitSize = MIN(maxLitSize, 4095);
385*01826a49SYabin Cui     } else {
386*01826a49SYabin Cui         /* Size_Format = 11 */
387*01826a49SYabin Cui         maxLitSize = MIN(maxLitSize, 1048575);
388*01826a49SYabin Cui     }
389*01826a49SYabin Cui 
390*01826a49SYabin Cui     litSize = RAND(seed) % (maxLitSize + 1);
391*01826a49SYabin Cui     if (frame->src == frame->srcStart && litSize == 0) {
392*01826a49SYabin Cui         litSize = 1; /* no empty literals if there's nothing preceding this block */
393*01826a49SYabin Cui     }
394*01826a49SYabin Cui     if (litSize + 3 > contentSize) {
395*01826a49SYabin Cui         litSize = contentSize; /* no matches shorter than 3 are allowed */
396*01826a49SYabin Cui     }
397*01826a49SYabin Cui     /* use smallest size format that fits */
398*01826a49SYabin Cui     if (litSize < 32) {
399*01826a49SYabin Cui         op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff;
400*01826a49SYabin Cui         op += 1;
401*01826a49SYabin Cui     } else if (litSize < 4096) {
402*01826a49SYabin Cui         op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff;
403*01826a49SYabin Cui         op[1] = (litSize >> 4) & 0xff;
404*01826a49SYabin Cui         op += 2;
405*01826a49SYabin Cui     } else {
406*01826a49SYabin Cui         op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff;
407*01826a49SYabin Cui         op[1] = (litSize >> 4) & 0xff;
408*01826a49SYabin Cui         op[2] = (litSize >> 12) & 0xff;
409*01826a49SYabin Cui         op += 3;
410*01826a49SYabin Cui     }
411*01826a49SYabin Cui 
412*01826a49SYabin Cui     if (type == 0) {
413*01826a49SYabin Cui         /* Raw literals */
414*01826a49SYabin Cui         DISPLAYLEVEL(4, "   raw literals\n");
415*01826a49SYabin Cui 
416*01826a49SYabin Cui         RAND_buffer(seed, LITERAL_BUFFER, litSize);
417*01826a49SYabin Cui         memcpy(op, LITERAL_BUFFER, litSize);
418*01826a49SYabin Cui         op += litSize;
419*01826a49SYabin Cui     } else {
420*01826a49SYabin Cui         /* RLE literals */
421*01826a49SYabin Cui         BYTE const symb = (BYTE) (RAND(seed) % 256);
422*01826a49SYabin Cui 
423*01826a49SYabin Cui         DISPLAYLEVEL(4, "   rle literals: 0x%02x\n", (unsigned)symb);
424*01826a49SYabin Cui 
425*01826a49SYabin Cui         memset(LITERAL_BUFFER, symb, litSize);
426*01826a49SYabin Cui         op[0] = symb;
427*01826a49SYabin Cui         op++;
428*01826a49SYabin Cui     }
429*01826a49SYabin Cui 
430*01826a49SYabin Cui     frame->data = op;
431*01826a49SYabin Cui 
432*01826a49SYabin Cui     return litSize;
433*01826a49SYabin Cui }
434*01826a49SYabin Cui 
435*01826a49SYabin Cui /* Generate a Huffman header for the given source */
writeHufHeader(U32 * seed,HUF_CElt * hufTable,void * dst,size_t dstSize,const void * src,size_t srcSize)436*01826a49SYabin Cui static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize,
437*01826a49SYabin Cui                                  const void* src, size_t srcSize)
438*01826a49SYabin Cui {
439*01826a49SYabin Cui     BYTE* const ostart = (BYTE*)dst;
440*01826a49SYabin Cui     BYTE* op = ostart;
441*01826a49SYabin Cui 
442*01826a49SYabin Cui     unsigned huffLog = 11;
443*01826a49SYabin Cui     unsigned maxSymbolValue = 255;
444*01826a49SYabin Cui 
445*01826a49SYabin Cui     unsigned count[HUF_SYMBOLVALUE_MAX+1];
446*01826a49SYabin Cui 
447*01826a49SYabin Cui     /* Scan input and build symbol stats */
448*01826a49SYabin Cui     {   size_t const largest = HIST_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP, sizeof(WKSP));
449*01826a49SYabin Cui         assert(!HIST_isError(largest));
450*01826a49SYabin Cui         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; }   /* single symbol, rle */
451*01826a49SYabin Cui         if (largest <= (srcSize >> 7)+1) return 0;   /* Fast heuristic : not compressible enough */
452*01826a49SYabin Cui     }
453*01826a49SYabin Cui 
454*01826a49SYabin Cui     /* Build Huffman Tree */
455*01826a49SYabin Cui     /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */
456*01826a49SYabin Cui     huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1);
457*01826a49SYabin Cui     DISPLAYLEVEL(6, "     huffman log: %u\n", huffLog);
458*01826a49SYabin Cui     {   size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP));
459*01826a49SYabin Cui         CHECKERR(maxBits);
460*01826a49SYabin Cui         huffLog = (U32)maxBits;
461*01826a49SYabin Cui     }
462*01826a49SYabin Cui 
463*01826a49SYabin Cui     /* Write table description header */
464*01826a49SYabin Cui     {   size_t const hSize = HUF_writeCTable_wksp (op, dstSize, hufTable, maxSymbolValue, huffLog, WKSP, sizeof(WKSP));
465*01826a49SYabin Cui         if (hSize + 12 >= srcSize) return 0;   /* not useful to try compression */
466*01826a49SYabin Cui         op += hSize;
467*01826a49SYabin Cui     }
468*01826a49SYabin Cui 
469*01826a49SYabin Cui     return op - ostart;
470*01826a49SYabin Cui }
471*01826a49SYabin Cui 
472*01826a49SYabin Cui /* Write a Huffman coded literals block and return the literals size */
writeLiteralsBlockCompressed(U32 * seed,frame_t * frame,size_t contentSize)473*01826a49SYabin Cui static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize)
474*01826a49SYabin Cui {
475*01826a49SYabin Cui     BYTE* origop = (BYTE*)frame->data;
476*01826a49SYabin Cui     BYTE* opend = (BYTE*)frame->dataEnd;
477*01826a49SYabin Cui     BYTE* op;
478*01826a49SYabin Cui     BYTE* const ostart = origop;
479*01826a49SYabin Cui     int const sizeFormat = RAND(seed) % 4;
480*01826a49SYabin Cui     size_t litSize;
481*01826a49SYabin Cui     size_t hufHeaderSize = 0;
482*01826a49SYabin Cui     size_t compressedSize = 0;
483*01826a49SYabin Cui     size_t maxLitSize = MIN(contentSize-3, g_maxBlockSize);
484*01826a49SYabin Cui 
485*01826a49SYabin Cui     symbolEncodingType_e hType;
486*01826a49SYabin Cui 
487*01826a49SYabin Cui     if (contentSize < 64) {
488*01826a49SYabin Cui         /* make sure we get reasonably-sized literals for compression */
489*01826a49SYabin Cui         return ERROR(GENERIC);
490*01826a49SYabin Cui     }
491*01826a49SYabin Cui 
492*01826a49SYabin Cui     DISPLAYLEVEL(4, "   compressed literals\n");
493*01826a49SYabin Cui 
494*01826a49SYabin Cui     switch (sizeFormat) {
495*01826a49SYabin Cui     case 0: /* fall through, size is the same as case 1 */
496*01826a49SYabin Cui     case 1:
497*01826a49SYabin Cui         maxLitSize = MIN(maxLitSize, 1023);
498*01826a49SYabin Cui         origop += 3;
499*01826a49SYabin Cui         break;
500*01826a49SYabin Cui     case 2:
501*01826a49SYabin Cui         maxLitSize = MIN(maxLitSize, 16383);
502*01826a49SYabin Cui         origop += 4;
503*01826a49SYabin Cui         break;
504*01826a49SYabin Cui     case 3:
505*01826a49SYabin Cui         maxLitSize = MIN(maxLitSize, 262143);
506*01826a49SYabin Cui         origop += 5;
507*01826a49SYabin Cui         break;
508*01826a49SYabin Cui     default:; /* impossible */
509*01826a49SYabin Cui     }
510*01826a49SYabin Cui 
511*01826a49SYabin Cui     do {
512*01826a49SYabin Cui         op = origop;
513*01826a49SYabin Cui         do {
514*01826a49SYabin Cui             litSize = RAND(seed) % (maxLitSize + 1);
515*01826a49SYabin Cui         } while (litSize < 32); /* avoid small literal sizes */
516*01826a49SYabin Cui         if (litSize + 3 > contentSize) {
517*01826a49SYabin Cui             litSize = contentSize; /* no matches shorter than 3 are allowed */
518*01826a49SYabin Cui         }
519*01826a49SYabin Cui 
520*01826a49SYabin Cui         /* most of the time generate a new distribution */
521*01826a49SYabin Cui         if ((RAND(seed) & 3) || !frame->stats.hufInit) {
522*01826a49SYabin Cui             do {
523*01826a49SYabin Cui                 if (RAND(seed) & 3) {
524*01826a49SYabin Cui                     /* add 10 to ensure some compressibility */
525*01826a49SYabin Cui                     double const weight = ((RAND(seed) % 90) + 10) / 100.0;
526*01826a49SYabin Cui 
527*01826a49SYabin Cui                     DISPLAYLEVEL(5, "    distribution weight: %d%%\n",
528*01826a49SYabin Cui                                  (int)(weight * 100));
529*01826a49SYabin Cui 
530*01826a49SYabin Cui                     RAND_genDist(seed, frame->stats.hufDist, weight);
531*01826a49SYabin Cui                 } else {
532*01826a49SYabin Cui                     /* sometimes do restricted range literals to force
533*01826a49SYabin Cui                      * non-huffman headers */
534*01826a49SYabin Cui                     DISPLAYLEVEL(5, "    small range literals\n");
535*01826a49SYabin Cui                     RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE,
536*01826a49SYabin Cui                                        15);
537*01826a49SYabin Cui                 }
538*01826a49SYabin Cui                 RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
539*01826a49SYabin Cui                                 litSize);
540*01826a49SYabin Cui 
541*01826a49SYabin Cui                 /* generate the header from the distribution instead of the
542*01826a49SYabin Cui                  * actual data to avoid bugs with symbols that were in the
543*01826a49SYabin Cui                  * distribution but never showed up in the output */
544*01826a49SYabin Cui                 hufHeaderSize = writeHufHeader(
545*01826a49SYabin Cui                         seed, frame->stats.hufTable, op, opend - op,
546*01826a49SYabin Cui                         frame->stats.hufDist, DISTSIZE);
547*01826a49SYabin Cui                 CHECKERR(hufHeaderSize);
548*01826a49SYabin Cui                 /* repeat until a valid header is written */
549*01826a49SYabin Cui             } while (hufHeaderSize == 0);
550*01826a49SYabin Cui             op += hufHeaderSize;
551*01826a49SYabin Cui             hType = set_compressed;
552*01826a49SYabin Cui 
553*01826a49SYabin Cui             frame->stats.hufInit = 1;
554*01826a49SYabin Cui         } else {
555*01826a49SYabin Cui             /* repeat the distribution/table from last time */
556*01826a49SYabin Cui             DISPLAYLEVEL(5, "    huffman repeat stats\n");
557*01826a49SYabin Cui             RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
558*01826a49SYabin Cui                             litSize);
559*01826a49SYabin Cui             hufHeaderSize = 0;
560*01826a49SYabin Cui             hType = set_repeat;
561*01826a49SYabin Cui         }
562*01826a49SYabin Cui 
563*01826a49SYabin Cui         do {
564*01826a49SYabin Cui             compressedSize =
565*01826a49SYabin Cui                     sizeFormat == 0
566*01826a49SYabin Cui                             ? HUF_compress1X_usingCTable(
567*01826a49SYabin Cui                                       op, opend - op, LITERAL_BUFFER, litSize,
568*01826a49SYabin Cui                                       frame->stats.hufTable, /* flags */ 0)
569*01826a49SYabin Cui                             : HUF_compress4X_usingCTable(
570*01826a49SYabin Cui                                       op, opend - op, LITERAL_BUFFER, litSize,
571*01826a49SYabin Cui                                       frame->stats.hufTable, /* flags */ 0);
572*01826a49SYabin Cui             CHECKERR(compressedSize);
573*01826a49SYabin Cui             /* this only occurs when it could not compress or similar */
574*01826a49SYabin Cui         } while (compressedSize <= 0);
575*01826a49SYabin Cui 
576*01826a49SYabin Cui         op += compressedSize;
577*01826a49SYabin Cui 
578*01826a49SYabin Cui         compressedSize += hufHeaderSize;
579*01826a49SYabin Cui         DISPLAYLEVEL(5, "    regenerated size: %u\n", (unsigned)litSize);
580*01826a49SYabin Cui         DISPLAYLEVEL(5, "    compressed size: %u\n", (unsigned)compressedSize);
581*01826a49SYabin Cui         if (compressedSize >= litSize) {
582*01826a49SYabin Cui             DISPLAYLEVEL(5, "     trying again\n");
583*01826a49SYabin Cui             /* if we have to try again, reset the stats so we don't accidentally
584*01826a49SYabin Cui              * try to repeat a distribution we just made */
585*01826a49SYabin Cui             frame->stats = frame->oldStats;
586*01826a49SYabin Cui         } else {
587*01826a49SYabin Cui             break;
588*01826a49SYabin Cui         }
589*01826a49SYabin Cui     } while (1);
590*01826a49SYabin Cui 
591*01826a49SYabin Cui     /* write header */
592*01826a49SYabin Cui     switch (sizeFormat) {
593*01826a49SYabin Cui     case 0: /* fall through, size is the same as case 1 */
594*01826a49SYabin Cui     case 1: {
595*01826a49SYabin Cui         U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
596*01826a49SYabin Cui                            ((U32)compressedSize << 14);
597*01826a49SYabin Cui         MEM_writeLE24(ostart, header);
598*01826a49SYabin Cui         break;
599*01826a49SYabin Cui     }
600*01826a49SYabin Cui     case 2: {
601*01826a49SYabin Cui         U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
602*01826a49SYabin Cui                            ((U32)compressedSize << 18);
603*01826a49SYabin Cui         MEM_writeLE32(ostart, header);
604*01826a49SYabin Cui         break;
605*01826a49SYabin Cui     }
606*01826a49SYabin Cui     case 3: {
607*01826a49SYabin Cui         U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
608*01826a49SYabin Cui                            ((U32)compressedSize << 22);
609*01826a49SYabin Cui         MEM_writeLE32(ostart, header);
610*01826a49SYabin Cui         ostart[4] = (BYTE)(compressedSize >> 10);
611*01826a49SYabin Cui         break;
612*01826a49SYabin Cui     }
613*01826a49SYabin Cui     default:; /* impossible */
614*01826a49SYabin Cui     }
615*01826a49SYabin Cui 
616*01826a49SYabin Cui     frame->data = op;
617*01826a49SYabin Cui     return litSize;
618*01826a49SYabin Cui }
619*01826a49SYabin Cui 
writeLiteralsBlock(U32 * seed,frame_t * frame,size_t contentSize)620*01826a49SYabin Cui static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize)
621*01826a49SYabin Cui {
622*01826a49SYabin Cui     /* only do compressed for larger segments to avoid compressibility issues */
623*01826a49SYabin Cui     if (RAND(seed) & 7 && contentSize >= 64) {
624*01826a49SYabin Cui         return writeLiteralsBlockCompressed(seed, frame, contentSize);
625*01826a49SYabin Cui     } else {
626*01826a49SYabin Cui         return writeLiteralsBlockSimple(seed, frame, contentSize);
627*01826a49SYabin Cui     }
628*01826a49SYabin Cui }
629*01826a49SYabin Cui 
initSeqStore(seqStore_t * seqStore)630*01826a49SYabin Cui static inline void initSeqStore(seqStore_t *seqStore) {
631*01826a49SYabin Cui     seqStore->maxNbSeq = MAX_NB_SEQ;
632*01826a49SYabin Cui     seqStore->maxNbLit = ZSTD_BLOCKSIZE_MAX;
633*01826a49SYabin Cui     seqStore->sequencesStart = SEQUENCE_BUFFER;
634*01826a49SYabin Cui     seqStore->litStart = SEQUENCE_LITERAL_BUFFER;
635*01826a49SYabin Cui     seqStore->llCode = SEQUENCE_LLCODE;
636*01826a49SYabin Cui     seqStore->mlCode = SEQUENCE_MLCODE;
637*01826a49SYabin Cui     seqStore->ofCode = SEQUENCE_OFCODE;
638*01826a49SYabin Cui 
639*01826a49SYabin Cui     ZSTD_resetSeqStore(seqStore);
640*01826a49SYabin Cui }
641*01826a49SYabin Cui 
642*01826a49SYabin Cui /* Randomly generate sequence commands */
643*01826a49SYabin Cui static U32
generateSequences(U32 * seed,frame_t * frame,seqStore_t * seqStore,size_t contentSize,size_t literalsSize,dictInfo info)644*01826a49SYabin Cui generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
645*01826a49SYabin Cui                   size_t contentSize, size_t literalsSize, dictInfo info)
646*01826a49SYabin Cui {
647*01826a49SYabin Cui     /* The total length of all the matches */
648*01826a49SYabin Cui     size_t const remainingMatch = contentSize - literalsSize;
649*01826a49SYabin Cui     size_t excessMatch = 0;
650*01826a49SYabin Cui     U32 numSequences = 0;
651*01826a49SYabin Cui     U32 i;
652*01826a49SYabin Cui 
653*01826a49SYabin Cui     const BYTE* literals = LITERAL_BUFFER;
654*01826a49SYabin Cui     BYTE* srcPtr = frame->src;
655*01826a49SYabin Cui 
656*01826a49SYabin Cui     if (literalsSize != contentSize) {
657*01826a49SYabin Cui         /* each match must be at least MIN_SEQ_LEN, so this is the maximum
658*01826a49SYabin Cui          * number of sequences we can have */
659*01826a49SYabin Cui         U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN;
660*01826a49SYabin Cui         numSequences = (RAND(seed) % maxSequences) + 1;
661*01826a49SYabin Cui 
662*01826a49SYabin Cui         /* the extra match lengths we have to allocate to each sequence */
663*01826a49SYabin Cui         excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN;
664*01826a49SYabin Cui     }
665*01826a49SYabin Cui 
666*01826a49SYabin Cui     DISPLAYLEVEL(5, "    total match lengths: %u\n", (unsigned)remainingMatch);
667*01826a49SYabin Cui     for (i = 0; i < numSequences; i++) {
668*01826a49SYabin Cui         /* Generate match and literal lengths by exponential distribution to
669*01826a49SYabin Cui          * ensure nice numbers */
670*01826a49SYabin Cui         U32 matchLen =
671*01826a49SYabin Cui                 MIN_SEQ_LEN +
672*01826a49SYabin Cui                 ROUND(RAND_exp(seed, (double)excessMatch / (double)(numSequences - i)));
673*01826a49SYabin Cui         U32 literalLen =
674*01826a49SYabin Cui                 (RAND(seed) & 7)
675*01826a49SYabin Cui                         ? ROUND(RAND_exp(seed,
676*01826a49SYabin Cui                                          (double)literalsSize /
677*01826a49SYabin Cui                                                  (double)(numSequences - i)))
678*01826a49SYabin Cui                         : 0;
679*01826a49SYabin Cui         /* actual offset, code to send, and point to copy up to when shifting
680*01826a49SYabin Cui          * codes in the repeat offsets history */
681*01826a49SYabin Cui         U32 offset, offBase, repIndex;
682*01826a49SYabin Cui 
683*01826a49SYabin Cui         /* bounds checks */
684*01826a49SYabin Cui         matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN);
685*01826a49SYabin Cui         literalLen = MIN(literalLen, (U32) literalsSize);
686*01826a49SYabin Cui         if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1;
687*01826a49SYabin Cui         if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch;
688*01826a49SYabin Cui 
689*01826a49SYabin Cui         memcpy(srcPtr, literals, literalLen);
690*01826a49SYabin Cui         srcPtr += literalLen;
691*01826a49SYabin Cui         do {
692*01826a49SYabin Cui             if (RAND(seed) & 7) {
693*01826a49SYabin Cui                 /* do a normal offset */
694*01826a49SYabin Cui                 U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart);
695*01826a49SYabin Cui                 offset = (RAND(seed) %
696*01826a49SYabin Cui                           MIN(frame->header.windowSize,
697*01826a49SYabin Cui                               (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) +
698*01826a49SYabin Cui                          1;
699*01826a49SYabin Cui                 if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) {
700*01826a49SYabin Cui                     /* need to occasionally generate offsets that go past the start */
701*01826a49SYabin Cui                     /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */
702*01826a49SYabin Cui                     U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1;
703*01826a49SYabin Cui                     offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart;
704*01826a49SYabin Cui                     if (offset > frame->header.windowSize) {
705*01826a49SYabin Cui                         if (lenPastStart < MIN_SEQ_LEN) {
706*01826a49SYabin Cui                             /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */
707*01826a49SYabin Cui                             /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */
708*01826a49SYabin Cui                             /* make sure lenPastStart does not go past dictionary start though */
709*01826a49SYabin Cui                             lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize);
710*01826a49SYabin Cui                             offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart;
711*01826a49SYabin Cui                         }
712*01826a49SYabin Cui                         {   U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart);
713*01826a49SYabin Cui                             matchLen = MIN(matchLen, matchLenBound);
714*01826a49SYabin Cui                         }
715*01826a49SYabin Cui                     }
716*01826a49SYabin Cui                 }
717*01826a49SYabin Cui                 offBase = OFFSET_TO_OFFBASE(offset);
718*01826a49SYabin Cui                 repIndex = 2;
719*01826a49SYabin Cui             } else {
720*01826a49SYabin Cui                 /* do a repeat offset */
721*01826a49SYabin Cui                 U32 const randomRepIndex = RAND(seed) % 3;
722*01826a49SYabin Cui                 offBase = REPCODE_TO_OFFBASE(randomRepIndex + 1);  /* expects values between 1 & 3 */
723*01826a49SYabin Cui                 if (literalLen > 0) {
724*01826a49SYabin Cui                     offset = frame->stats.rep[randomRepIndex];
725*01826a49SYabin Cui                     repIndex = randomRepIndex;
726*01826a49SYabin Cui                 } else {
727*01826a49SYabin Cui                     /* special case : literalLen == 0 */
728*01826a49SYabin Cui                     offset = randomRepIndex == 2 ? frame->stats.rep[0] - 1
729*01826a49SYabin Cui                                            : frame->stats.rep[randomRepIndex + 1];
730*01826a49SYabin Cui                     repIndex = MIN(2, randomRepIndex + 1);
731*01826a49SYabin Cui                 }
732*01826a49SYabin Cui             }
733*01826a49SYabin Cui         } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
734*01826a49SYabin Cui 
735*01826a49SYabin Cui         {   BYTE* const dictEnd = ZSTD_maybeNullPtrAdd(info.dictContent, info.dictContentSize);
736*01826a49SYabin Cui             size_t j;
737*01826a49SYabin Cui             for (j = 0; j < matchLen; j++) {
738*01826a49SYabin Cui                 if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) {
739*01826a49SYabin Cui                     /* copy from dictionary instead of literals */
740*01826a49SYabin Cui                     size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart);
741*01826a49SYabin Cui                     *srcPtr = *(dictEnd - dictOffset);
742*01826a49SYabin Cui                 }
743*01826a49SYabin Cui                 else {
744*01826a49SYabin Cui                     *srcPtr = *(srcPtr-offset);
745*01826a49SYabin Cui                 }
746*01826a49SYabin Cui                 srcPtr++;
747*01826a49SYabin Cui         }   }
748*01826a49SYabin Cui 
749*01826a49SYabin Cui         {   int r;
750*01826a49SYabin Cui             for (r = repIndex; r > 0; r--) {
751*01826a49SYabin Cui                 frame->stats.rep[r] = frame->stats.rep[r - 1];
752*01826a49SYabin Cui             }
753*01826a49SYabin Cui             frame->stats.rep[0] = offset;
754*01826a49SYabin Cui         }
755*01826a49SYabin Cui 
756*01826a49SYabin Cui         DISPLAYLEVEL(6, "      LL: %5u OF: %5u ML: %5u",
757*01826a49SYabin Cui                     (unsigned)literalLen, (unsigned)offset, (unsigned)matchLen);
758*01826a49SYabin Cui         DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u",
759*01826a49SYabin Cui                      (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart), (unsigned)i);
760*01826a49SYabin Cui         DISPLAYLEVEL(6, "\n");
761*01826a49SYabin Cui         if (OFFBASE_IS_REPCODE(offBase)) {  /* expects sumtype numeric representation of ZSTD_storeSeq() */
762*01826a49SYabin Cui             DISPLAYLEVEL(7, "        repeat offset: %d\n", (int)repIndex);
763*01826a49SYabin Cui         }
764*01826a49SYabin Cui         /* use libzstd sequence handling */
765*01826a49SYabin Cui         ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen,
766*01826a49SYabin Cui                       offBase, matchLen);
767*01826a49SYabin Cui 
768*01826a49SYabin Cui         literalsSize -= literalLen;
769*01826a49SYabin Cui         excessMatch -= (matchLen - MIN_SEQ_LEN);
770*01826a49SYabin Cui         literals += literalLen;
771*01826a49SYabin Cui     }
772*01826a49SYabin Cui 
773*01826a49SYabin Cui     memcpy(srcPtr, literals, literalsSize);
774*01826a49SYabin Cui     srcPtr += literalsSize;
775*01826a49SYabin Cui     DISPLAYLEVEL(6, "      excess literals: %5u ", (unsigned)literalsSize);
776*01826a49SYabin Cui     DISPLAYLEVEL(7, "srcPos: %8u ", (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart));
777*01826a49SYabin Cui     DISPLAYLEVEL(6, "\n");
778*01826a49SYabin Cui 
779*01826a49SYabin Cui     return numSequences;
780*01826a49SYabin Cui }
781*01826a49SYabin Cui 
initSymbolSet(const BYTE * symbols,size_t len,BYTE * set,BYTE maxSymbolValue)782*01826a49SYabin Cui static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue)
783*01826a49SYabin Cui {
784*01826a49SYabin Cui     size_t i;
785*01826a49SYabin Cui 
786*01826a49SYabin Cui     memset(set, 0, (size_t)maxSymbolValue+1);
787*01826a49SYabin Cui 
788*01826a49SYabin Cui     for (i = 0; i < len; i++) {
789*01826a49SYabin Cui         set[symbols[i]] = 1;
790*01826a49SYabin Cui     }
791*01826a49SYabin Cui }
792*01826a49SYabin Cui 
isSymbolSubset(const BYTE * symbols,size_t len,const BYTE * set,BYTE maxSymbolValue)793*01826a49SYabin Cui static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue)
794*01826a49SYabin Cui {
795*01826a49SYabin Cui     size_t i;
796*01826a49SYabin Cui 
797*01826a49SYabin Cui     for (i = 0; i < len; i++) {
798*01826a49SYabin Cui         if (symbols[i] > maxSymbolValue || !set[symbols[i]]) {
799*01826a49SYabin Cui             return 0;
800*01826a49SYabin Cui         }
801*01826a49SYabin Cui     }
802*01826a49SYabin Cui     return 1;
803*01826a49SYabin Cui }
804*01826a49SYabin Cui 
writeSequences(U32 * seed,frame_t * frame,seqStore_t * seqStorePtr,size_t nbSeq)805*01826a49SYabin Cui static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
806*01826a49SYabin Cui                              size_t nbSeq)
807*01826a49SYabin Cui {
808*01826a49SYabin Cui     /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */
809*01826a49SYabin Cui     unsigned count[MaxSeq+1];
810*01826a49SYabin Cui     S16 norm[MaxSeq+1];
811*01826a49SYabin Cui     FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable;
812*01826a49SYabin Cui     FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable;
813*01826a49SYabin Cui     FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable;
814*01826a49SYabin Cui     U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
815*01826a49SYabin Cui     const seqDef* const sequences = seqStorePtr->sequencesStart;
816*01826a49SYabin Cui     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
817*01826a49SYabin Cui     const BYTE* const llCodeTable = seqStorePtr->llCode;
818*01826a49SYabin Cui     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
819*01826a49SYabin Cui     BYTE* const oend = (BYTE*)frame->dataEnd;
820*01826a49SYabin Cui     BYTE* op = (BYTE*)frame->data;
821*01826a49SYabin Cui     BYTE* seqHead;
822*01826a49SYabin Cui     BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog)];
823*01826a49SYabin Cui 
824*01826a49SYabin Cui     /* literals compressing block removed so that can be done separately */
825*01826a49SYabin Cui 
826*01826a49SYabin Cui     /* Sequences Header */
827*01826a49SYabin Cui     if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
828*01826a49SYabin Cui     if (nbSeq < 128) *op++ = (BYTE)nbSeq;
829*01826a49SYabin Cui     else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
830*01826a49SYabin Cui     else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
831*01826a49SYabin Cui 
832*01826a49SYabin Cui     if (nbSeq==0) {
833*01826a49SYabin Cui         frame->data = op;
834*01826a49SYabin Cui         return 0;
835*01826a49SYabin Cui     }
836*01826a49SYabin Cui 
837*01826a49SYabin Cui     /* seqHead : flags for FSE encoding type */
838*01826a49SYabin Cui     seqHead = op++;
839*01826a49SYabin Cui 
840*01826a49SYabin Cui     /* convert length/distances into codes */
841*01826a49SYabin Cui     ZSTD_seqToCodes(seqStorePtr);
842*01826a49SYabin Cui 
843*01826a49SYabin Cui     /* CTable for Literal Lengths */
844*01826a49SYabin Cui     {   unsigned max = MaxLL;
845*01826a49SYabin Cui         size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
846*01826a49SYabin Cui         assert(!HIST_isError(mostFrequent));
847*01826a49SYabin Cui         if (frame->stats.fseInit && !(RAND(seed) & 3) &&
848*01826a49SYabin Cui                    isSymbolSubset(llCodeTable, nbSeq,
849*01826a49SYabin Cui                                   frame->stats.litlengthSymbolSet, 35)) {
850*01826a49SYabin Cui             /* maybe do repeat mode if we're allowed to */
851*01826a49SYabin Cui             LLtype = set_repeat;
852*01826a49SYabin Cui         } else if (mostFrequent == nbSeq) {
853*01826a49SYabin Cui             /* do RLE if we have the chance */
854*01826a49SYabin Cui             *op++ = llCodeTable[0];
855*01826a49SYabin Cui             FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
856*01826a49SYabin Cui             LLtype = set_rle;
857*01826a49SYabin Cui         } else if (!(RAND(seed) & 3)) {
858*01826a49SYabin Cui             /* maybe use the default distribution */
859*01826a49SYabin Cui             CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)));
860*01826a49SYabin Cui             LLtype = set_basic;
861*01826a49SYabin Cui         } else {
862*01826a49SYabin Cui             /* fall back on a full table */
863*01826a49SYabin Cui             size_t nbSeq_1 = nbSeq;
864*01826a49SYabin Cui             const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
865*01826a49SYabin Cui             if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
866*01826a49SYabin Cui             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
867*01826a49SYabin Cui             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
868*01826a49SYabin Cui               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
869*01826a49SYabin Cui               op += NCountSize; }
870*01826a49SYabin Cui             CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)));
871*01826a49SYabin Cui             LLtype = set_compressed;
872*01826a49SYabin Cui     }   }
873*01826a49SYabin Cui 
874*01826a49SYabin Cui     /* CTable for Offsets */
875*01826a49SYabin Cui     /* see Literal Lengths for descriptions of mode choices */
876*01826a49SYabin Cui     {   unsigned max = MaxOff;
877*01826a49SYabin Cui         size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
878*01826a49SYabin Cui         assert(!HIST_isError(mostFrequent));
879*01826a49SYabin Cui         if (frame->stats.fseInit && !(RAND(seed) & 3) &&
880*01826a49SYabin Cui                    isSymbolSubset(ofCodeTable, nbSeq,
881*01826a49SYabin Cui                                   frame->stats.offsetSymbolSet, 28)) {
882*01826a49SYabin Cui             Offtype = set_repeat;
883*01826a49SYabin Cui         } else if (mostFrequent == nbSeq) {
884*01826a49SYabin Cui             *op++ = ofCodeTable[0];
885*01826a49SYabin Cui             FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
886*01826a49SYabin Cui             Offtype = set_rle;
887*01826a49SYabin Cui         } else if (!(RAND(seed) & 3)) {
888*01826a49SYabin Cui             FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
889*01826a49SYabin Cui             Offtype = set_basic;
890*01826a49SYabin Cui         } else {
891*01826a49SYabin Cui             size_t nbSeq_1 = nbSeq;
892*01826a49SYabin Cui             const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
893*01826a49SYabin Cui             if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
894*01826a49SYabin Cui             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
895*01826a49SYabin Cui             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
896*01826a49SYabin Cui               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
897*01826a49SYabin Cui               op += NCountSize; }
898*01826a49SYabin Cui             FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
899*01826a49SYabin Cui             Offtype = set_compressed;
900*01826a49SYabin Cui     }   }
901*01826a49SYabin Cui 
902*01826a49SYabin Cui     /* CTable for MatchLengths */
903*01826a49SYabin Cui     /* see Literal Lengths for descriptions of mode choices */
904*01826a49SYabin Cui     {   unsigned max = MaxML;
905*01826a49SYabin Cui         size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
906*01826a49SYabin Cui         assert(!HIST_isError(mostFrequent));
907*01826a49SYabin Cui         if (frame->stats.fseInit && !(RAND(seed) & 3) &&
908*01826a49SYabin Cui                    isSymbolSubset(mlCodeTable, nbSeq,
909*01826a49SYabin Cui                                   frame->stats.matchlengthSymbolSet, 52)) {
910*01826a49SYabin Cui             MLtype = set_repeat;
911*01826a49SYabin Cui         } else if (mostFrequent == nbSeq) {
912*01826a49SYabin Cui             *op++ = *mlCodeTable;
913*01826a49SYabin Cui             FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
914*01826a49SYabin Cui             MLtype = set_rle;
915*01826a49SYabin Cui         } else if (!(RAND(seed) & 3)) {
916*01826a49SYabin Cui             /* sometimes do default distribution */
917*01826a49SYabin Cui             FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
918*01826a49SYabin Cui             MLtype = set_basic;
919*01826a49SYabin Cui         } else {
920*01826a49SYabin Cui             /* fall back on table */
921*01826a49SYabin Cui             size_t nbSeq_1 = nbSeq;
922*01826a49SYabin Cui             const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
923*01826a49SYabin Cui             if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
924*01826a49SYabin Cui             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
925*01826a49SYabin Cui             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
926*01826a49SYabin Cui               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
927*01826a49SYabin Cui               op += NCountSize; }
928*01826a49SYabin Cui             FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
929*01826a49SYabin Cui             MLtype = set_compressed;
930*01826a49SYabin Cui     }   }
931*01826a49SYabin Cui     frame->stats.fseInit = 1;
932*01826a49SYabin Cui     initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35);
933*01826a49SYabin Cui     initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28);
934*01826a49SYabin Cui     initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52);
935*01826a49SYabin Cui 
936*01826a49SYabin Cui     DISPLAYLEVEL(5, "    LL type: %d OF type: %d ML type: %d\n", (unsigned)LLtype, (unsigned)Offtype, (unsigned)MLtype);
937*01826a49SYabin Cui 
938*01826a49SYabin Cui     *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
939*01826a49SYabin Cui 
940*01826a49SYabin Cui     /* Encoding Sequences */
941*01826a49SYabin Cui     {   BIT_CStream_t blockStream;
942*01826a49SYabin Cui         FSE_CState_t  stateMatchLength;
943*01826a49SYabin Cui         FSE_CState_t  stateOffsetBits;
944*01826a49SYabin Cui         FSE_CState_t  stateLitLength;
945*01826a49SYabin Cui 
946*01826a49SYabin Cui         RETURN_ERROR_IF(
947*01826a49SYabin Cui             ERR_isError(BIT_initCStream(&blockStream, op, oend-op)),
948*01826a49SYabin Cui             dstSize_tooSmall, "not enough space remaining");
949*01826a49SYabin Cui 
950*01826a49SYabin Cui         /* first symbols */
951*01826a49SYabin Cui         FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
952*01826a49SYabin Cui         FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
953*01826a49SYabin Cui         FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
954*01826a49SYabin Cui         BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
955*01826a49SYabin Cui         if (MEM_32bits()) BIT_flushBits(&blockStream);
956*01826a49SYabin Cui         BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
957*01826a49SYabin Cui         if (MEM_32bits()) BIT_flushBits(&blockStream);
958*01826a49SYabin Cui         BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
959*01826a49SYabin Cui         BIT_flushBits(&blockStream);
960*01826a49SYabin Cui 
961*01826a49SYabin Cui         {   size_t n;
962*01826a49SYabin Cui             for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
963*01826a49SYabin Cui                 BYTE const llCode = llCodeTable[n];
964*01826a49SYabin Cui                 BYTE const ofCode = ofCodeTable[n];
965*01826a49SYabin Cui                 BYTE const mlCode = mlCodeTable[n];
966*01826a49SYabin Cui                 U32  const llBits = LL_bits[llCode];
967*01826a49SYabin Cui                 U32  const ofBits = ofCode;                                     /* 32b*/  /* 64b*/
968*01826a49SYabin Cui                 U32  const mlBits = ML_bits[mlCode];
969*01826a49SYabin Cui                                                                                 /* (7)*/  /* (7)*/
970*01826a49SYabin Cui                 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
971*01826a49SYabin Cui                 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
972*01826a49SYabin Cui                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
973*01826a49SYabin Cui                 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
974*01826a49SYabin Cui                 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
975*01826a49SYabin Cui                     BIT_flushBits(&blockStream);                                /* (7)*/
976*01826a49SYabin Cui                 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
977*01826a49SYabin Cui                 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
978*01826a49SYabin Cui                 BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
979*01826a49SYabin Cui                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
980*01826a49SYabin Cui                 BIT_addBits(&blockStream, sequences[n].offBase, ofBits);         /* 31 */
981*01826a49SYabin Cui                 BIT_flushBits(&blockStream);                                    /* (7)*/
982*01826a49SYabin Cui         }   }
983*01826a49SYabin Cui 
984*01826a49SYabin Cui         FSE_flushCState(&blockStream, &stateMatchLength);
985*01826a49SYabin Cui         FSE_flushCState(&blockStream, &stateOffsetBits);
986*01826a49SYabin Cui         FSE_flushCState(&blockStream, &stateLitLength);
987*01826a49SYabin Cui 
988*01826a49SYabin Cui         {   size_t const streamSize = BIT_closeCStream(&blockStream);
989*01826a49SYabin Cui             if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */
990*01826a49SYabin Cui             op += streamSize;
991*01826a49SYabin Cui     }   }
992*01826a49SYabin Cui 
993*01826a49SYabin Cui     frame->data = op;
994*01826a49SYabin Cui 
995*01826a49SYabin Cui     return 0;
996*01826a49SYabin Cui }
997*01826a49SYabin Cui 
writeSequencesBlock(U32 * seed,frame_t * frame,size_t contentSize,size_t literalsSize,dictInfo info)998*01826a49SYabin Cui static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
999*01826a49SYabin Cui                                   size_t literalsSize, dictInfo info)
1000*01826a49SYabin Cui {
1001*01826a49SYabin Cui     seqStore_t seqStore;
1002*01826a49SYabin Cui     size_t numSequences;
1003*01826a49SYabin Cui 
1004*01826a49SYabin Cui 
1005*01826a49SYabin Cui     initSeqStore(&seqStore);
1006*01826a49SYabin Cui 
1007*01826a49SYabin Cui     /* randomly generate sequences */
1008*01826a49SYabin Cui     numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info);
1009*01826a49SYabin Cui     /* write them out to the frame data */
1010*01826a49SYabin Cui     CHECKERR(writeSequences(seed, frame, &seqStore, numSequences));
1011*01826a49SYabin Cui 
1012*01826a49SYabin Cui     return numSequences;
1013*01826a49SYabin Cui }
1014*01826a49SYabin Cui 
writeCompressedBlock(U32 * seed,frame_t * frame,size_t contentSize,dictInfo info)1015*01826a49SYabin Cui static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info)
1016*01826a49SYabin Cui {
1017*01826a49SYabin Cui     BYTE* const blockStart = (BYTE*)frame->data;
1018*01826a49SYabin Cui     size_t literalsSize;
1019*01826a49SYabin Cui     size_t nbSeq;
1020*01826a49SYabin Cui 
1021*01826a49SYabin Cui     DISPLAYLEVEL(4, "  compressed block:\n");
1022*01826a49SYabin Cui 
1023*01826a49SYabin Cui     literalsSize = writeLiteralsBlock(seed, frame, contentSize);
1024*01826a49SYabin Cui 
1025*01826a49SYabin Cui     DISPLAYLEVEL(4, "   literals size: %u\n", (unsigned)literalsSize);
1026*01826a49SYabin Cui 
1027*01826a49SYabin Cui     nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info);
1028*01826a49SYabin Cui 
1029*01826a49SYabin Cui     DISPLAYLEVEL(4, "   number of sequences: %u\n", (unsigned)nbSeq);
1030*01826a49SYabin Cui 
1031*01826a49SYabin Cui     return (BYTE*)frame->data - blockStart;
1032*01826a49SYabin Cui }
1033*01826a49SYabin Cui 
writeBlock(U32 * seed,frame_t * frame,size_t contentSize,int lastBlock,dictInfo info)1034*01826a49SYabin Cui static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
1035*01826a49SYabin Cui                        int lastBlock, dictInfo info)
1036*01826a49SYabin Cui {
1037*01826a49SYabin Cui     int const blockTypeDesc = RAND(seed) % 8;
1038*01826a49SYabin Cui     size_t blockSize;
1039*01826a49SYabin Cui     int blockType;
1040*01826a49SYabin Cui 
1041*01826a49SYabin Cui     BYTE *const header = (BYTE*)frame->data;
1042*01826a49SYabin Cui     BYTE *op = header + 3;
1043*01826a49SYabin Cui 
1044*01826a49SYabin Cui     DISPLAYLEVEL(4, " block:\n");
1045*01826a49SYabin Cui     DISPLAYLEVEL(4, "  block content size: %u\n", (unsigned)contentSize);
1046*01826a49SYabin Cui     DISPLAYLEVEL(4, "  last block: %s\n", lastBlock ? "yes" : "no");
1047*01826a49SYabin Cui 
1048*01826a49SYabin Cui     if (blockTypeDesc == 0) {
1049*01826a49SYabin Cui         /* Raw data frame */
1050*01826a49SYabin Cui 
1051*01826a49SYabin Cui         RAND_buffer(seed, frame->src, contentSize);
1052*01826a49SYabin Cui         memcpy(op, frame->src, contentSize);
1053*01826a49SYabin Cui 
1054*01826a49SYabin Cui         op += contentSize;
1055*01826a49SYabin Cui         blockType = 0;
1056*01826a49SYabin Cui         blockSize = contentSize;
1057*01826a49SYabin Cui     } else if (blockTypeDesc == 1 && frame->header.contentSize > 0) {
1058*01826a49SYabin Cui         /* RLE (Don't create RLE block if frame content is 0 since block size of 1 may exceed max block size)*/
1059*01826a49SYabin Cui         BYTE const symbol = RAND(seed) & 0xff;
1060*01826a49SYabin Cui 
1061*01826a49SYabin Cui         op[0] = symbol;
1062*01826a49SYabin Cui         memset(frame->src, symbol, contentSize);
1063*01826a49SYabin Cui 
1064*01826a49SYabin Cui         op++;
1065*01826a49SYabin Cui         blockType = 1;
1066*01826a49SYabin Cui         blockSize = contentSize;
1067*01826a49SYabin Cui     } else {
1068*01826a49SYabin Cui         /* compressed, most common */
1069*01826a49SYabin Cui         size_t compressedSize;
1070*01826a49SYabin Cui         blockType = 2;
1071*01826a49SYabin Cui 
1072*01826a49SYabin Cui         frame->oldStats = frame->stats;
1073*01826a49SYabin Cui 
1074*01826a49SYabin Cui         frame->data = op;
1075*01826a49SYabin Cui         compressedSize = writeCompressedBlock(seed, frame, contentSize, info);
1076*01826a49SYabin Cui         if (compressedSize >= contentSize) {   /* compressed block must be strictly smaller than uncompressed one */
1077*01826a49SYabin Cui             blockType = 0;
1078*01826a49SYabin Cui             memcpy(op, frame->src, contentSize);
1079*01826a49SYabin Cui 
1080*01826a49SYabin Cui             op += contentSize;
1081*01826a49SYabin Cui             blockSize = contentSize; /* fall back on raw block if data doesn't
1082*01826a49SYabin Cui                                         compress */
1083*01826a49SYabin Cui 
1084*01826a49SYabin Cui             frame->stats = frame->oldStats; /* don't update the stats */
1085*01826a49SYabin Cui         } else {
1086*01826a49SYabin Cui             op += compressedSize;
1087*01826a49SYabin Cui             blockSize = compressedSize;
1088*01826a49SYabin Cui         }
1089*01826a49SYabin Cui     }
1090*01826a49SYabin Cui     frame->src = (BYTE*)frame->src + contentSize;
1091*01826a49SYabin Cui 
1092*01826a49SYabin Cui     DISPLAYLEVEL(4, "  block type: %s\n", BLOCK_TYPES[blockType]);
1093*01826a49SYabin Cui     DISPLAYLEVEL(4, "  block size field: %u\n", (unsigned)blockSize);
1094*01826a49SYabin Cui 
1095*01826a49SYabin Cui     header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff);
1096*01826a49SYabin Cui     MEM_writeLE16(header + 1, (U16) (blockSize >> 5));
1097*01826a49SYabin Cui 
1098*01826a49SYabin Cui     frame->data = op;
1099*01826a49SYabin Cui }
1100*01826a49SYabin Cui 
writeBlocks(U32 * seed,frame_t * frame,dictInfo info)1101*01826a49SYabin Cui static void writeBlocks(U32* seed, frame_t* frame, dictInfo info)
1102*01826a49SYabin Cui {
1103*01826a49SYabin Cui     size_t contentLeft = frame->header.contentSize;
1104*01826a49SYabin Cui     size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
1105*01826a49SYabin Cui     while (1) {
1106*01826a49SYabin Cui         /* 1 in 4 chance of ending frame */
1107*01826a49SYabin Cui         int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3);
1108*01826a49SYabin Cui         size_t blockContentSize;
1109*01826a49SYabin Cui         if (lastBlock) {
1110*01826a49SYabin Cui             blockContentSize = contentLeft;
1111*01826a49SYabin Cui         } else {
1112*01826a49SYabin Cui             if (contentLeft > 0 && (RAND(seed) & 7)) {
1113*01826a49SYabin Cui                 /* some variable size block */
1114*01826a49SYabin Cui                 blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1);
1115*01826a49SYabin Cui             } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) {
1116*01826a49SYabin Cui                 /* some full size block */
1117*01826a49SYabin Cui                 blockContentSize = maxBlockSize;
1118*01826a49SYabin Cui             } else {
1119*01826a49SYabin Cui                 /* some empty block */
1120*01826a49SYabin Cui                 blockContentSize = 0;
1121*01826a49SYabin Cui             }
1122*01826a49SYabin Cui         }
1123*01826a49SYabin Cui 
1124*01826a49SYabin Cui         writeBlock(seed, frame, blockContentSize, lastBlock, info);
1125*01826a49SYabin Cui 
1126*01826a49SYabin Cui         contentLeft -= blockContentSize;
1127*01826a49SYabin Cui         if (lastBlock) break;
1128*01826a49SYabin Cui     }
1129*01826a49SYabin Cui }
1130*01826a49SYabin Cui 
writeChecksum(frame_t * frame)1131*01826a49SYabin Cui static void writeChecksum(frame_t* frame)
1132*01826a49SYabin Cui {
1133*01826a49SYabin Cui     /* write checksum so implementations can verify their output */
1134*01826a49SYabin Cui     U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0);
1135*01826a49SYabin Cui     DISPLAYLEVEL(3, "  checksum: %08x\n", (unsigned)digest);
1136*01826a49SYabin Cui     MEM_writeLE32(frame->data, (U32)digest);
1137*01826a49SYabin Cui     frame->data = (BYTE*)frame->data + 4;
1138*01826a49SYabin Cui }
1139*01826a49SYabin Cui 
outputBuffer(const void * buf,size_t size,const char * const path)1140*01826a49SYabin Cui static void outputBuffer(const void* buf, size_t size, const char* const path)
1141*01826a49SYabin Cui {
1142*01826a49SYabin Cui     /* write data out to file */
1143*01826a49SYabin Cui     const BYTE* ip = (const BYTE*)buf;
1144*01826a49SYabin Cui     FILE* out;
1145*01826a49SYabin Cui     if (path) {
1146*01826a49SYabin Cui         out = fopen(path, "wb");
1147*01826a49SYabin Cui     } else {
1148*01826a49SYabin Cui         out = stdout;
1149*01826a49SYabin Cui     }
1150*01826a49SYabin Cui     if (!out) {
1151*01826a49SYabin Cui         fprintf(stderr, "Failed to open file at %s: ", path);
1152*01826a49SYabin Cui         perror(NULL);
1153*01826a49SYabin Cui         exit(1);
1154*01826a49SYabin Cui     }
1155*01826a49SYabin Cui 
1156*01826a49SYabin Cui     {   size_t fsize = size;
1157*01826a49SYabin Cui         size_t written = 0;
1158*01826a49SYabin Cui         while (written < fsize) {
1159*01826a49SYabin Cui             written += fwrite(ip + written, 1, fsize - written, out);
1160*01826a49SYabin Cui             if (ferror(out)) {
1161*01826a49SYabin Cui                 fprintf(stderr, "Failed to write to file at %s: ", path);
1162*01826a49SYabin Cui                 perror(NULL);
1163*01826a49SYabin Cui                 exit(1);
1164*01826a49SYabin Cui             }
1165*01826a49SYabin Cui         }
1166*01826a49SYabin Cui     }
1167*01826a49SYabin Cui 
1168*01826a49SYabin Cui     if (path) {
1169*01826a49SYabin Cui         fclose(out);
1170*01826a49SYabin Cui     }
1171*01826a49SYabin Cui }
1172*01826a49SYabin Cui 
initFrame(frame_t * fr)1173*01826a49SYabin Cui static void initFrame(frame_t* fr)
1174*01826a49SYabin Cui {
1175*01826a49SYabin Cui     memset(fr, 0, sizeof(*fr));
1176*01826a49SYabin Cui     fr->data = fr->dataStart = FRAME_BUFFER;
1177*01826a49SYabin Cui     fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER);
1178*01826a49SYabin Cui     fr->src = fr->srcStart = CONTENT_BUFFER;
1179*01826a49SYabin Cui     fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER);
1180*01826a49SYabin Cui 
1181*01826a49SYabin Cui     /* init repeat codes */
1182*01826a49SYabin Cui     fr->stats.rep[0] = 1;
1183*01826a49SYabin Cui     fr->stats.rep[1] = 4;
1184*01826a49SYabin Cui     fr->stats.rep[2] = 8;
1185*01826a49SYabin Cui }
1186*01826a49SYabin Cui 
1187*01826a49SYabin Cui /**
1188*01826a49SYabin Cui  * Generated a single zstd compressed block with no block/frame header.
1189*01826a49SYabin Cui  * Returns the final seed.
1190*01826a49SYabin Cui  */
generateCompressedBlock(U32 seed,frame_t * frame,dictInfo info)1191*01826a49SYabin Cui static U32 generateCompressedBlock(U32 seed, frame_t* frame, dictInfo info)
1192*01826a49SYabin Cui {
1193*01826a49SYabin Cui     size_t blockContentSize;
1194*01826a49SYabin Cui     int blockWritten = 0;
1195*01826a49SYabin Cui     BYTE* op;
1196*01826a49SYabin Cui     DISPLAYLEVEL(4, "block seed: %u\n", (unsigned)seed);
1197*01826a49SYabin Cui     initFrame(frame);
1198*01826a49SYabin Cui     op = (BYTE*)frame->data;
1199*01826a49SYabin Cui 
1200*01826a49SYabin Cui     while (!blockWritten) {
1201*01826a49SYabin Cui         size_t cSize;
1202*01826a49SYabin Cui         /* generate window size */
1203*01826a49SYabin Cui         {   int const exponent = RAND(&seed) % (MAX_WINDOW_LOG - 10);
1204*01826a49SYabin Cui             int const mantissa = RAND(&seed) % 8;
1205*01826a49SYabin Cui             frame->header.windowSize = (1U << (exponent + 10));
1206*01826a49SYabin Cui             frame->header.windowSize += (frame->header.windowSize / 8) * mantissa;
1207*01826a49SYabin Cui         }
1208*01826a49SYabin Cui 
1209*01826a49SYabin Cui         /* generate content size */
1210*01826a49SYabin Cui         {   size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
1211*01826a49SYabin Cui             if (RAND(&seed) & 15) {
1212*01826a49SYabin Cui                 /* some full size blocks */
1213*01826a49SYabin Cui                 blockContentSize = maxBlockSize;
1214*01826a49SYabin Cui             } else if (RAND(&seed) & 7 && g_maxBlockSize >= (1U << 7)) {
1215*01826a49SYabin Cui                 /* some small blocks <= 128 bytes*/
1216*01826a49SYabin Cui                 blockContentSize = RAND(&seed) % (1U << 7);
1217*01826a49SYabin Cui             } else {
1218*01826a49SYabin Cui                 /* some variable size blocks */
1219*01826a49SYabin Cui                 blockContentSize = RAND(&seed) % maxBlockSize;
1220*01826a49SYabin Cui             }
1221*01826a49SYabin Cui         }
1222*01826a49SYabin Cui 
1223*01826a49SYabin Cui         /* try generating a compressed block */
1224*01826a49SYabin Cui         frame->oldStats = frame->stats;
1225*01826a49SYabin Cui         frame->data = op;
1226*01826a49SYabin Cui         cSize = writeCompressedBlock(&seed, frame, blockContentSize, info);
1227*01826a49SYabin Cui         if (cSize >= blockContentSize) {  /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */
1228*01826a49SYabin Cui             /* data doesn't compress -- try again */
1229*01826a49SYabin Cui             frame->stats = frame->oldStats; /* don't update the stats */
1230*01826a49SYabin Cui             DISPLAYLEVEL(5, "   can't compress block : try again \n");
1231*01826a49SYabin Cui         } else {
1232*01826a49SYabin Cui             blockWritten = 1;
1233*01826a49SYabin Cui             DISPLAYLEVEL(4, "   block size: %u \n", (unsigned)cSize);
1234*01826a49SYabin Cui             frame->src = (BYTE*)frame->src + blockContentSize;
1235*01826a49SYabin Cui         }
1236*01826a49SYabin Cui     }
1237*01826a49SYabin Cui     return seed;
1238*01826a49SYabin Cui }
1239*01826a49SYabin Cui 
1240*01826a49SYabin Cui /* Return the final seed */
generateFrame(U32 seed,frame_t * fr,dictInfo info)1241*01826a49SYabin Cui static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info)
1242*01826a49SYabin Cui {
1243*01826a49SYabin Cui     /* generate a complete frame */
1244*01826a49SYabin Cui     DISPLAYLEVEL(3, "frame seed: %u\n", (unsigned)seed);
1245*01826a49SYabin Cui     initFrame(fr);
1246*01826a49SYabin Cui 
1247*01826a49SYabin Cui     writeFrameHeader(&seed, fr, info);
1248*01826a49SYabin Cui     writeBlocks(&seed, fr, info);
1249*01826a49SYabin Cui     writeChecksum(fr);
1250*01826a49SYabin Cui 
1251*01826a49SYabin Cui     return seed;
1252*01826a49SYabin Cui }
1253*01826a49SYabin Cui 
1254*01826a49SYabin Cui /*_*******************************************************
1255*01826a49SYabin Cui *  Dictionary Helper Functions
1256*01826a49SYabin Cui *********************************************************/
1257*01826a49SYabin Cui /* returns 0 if successful, otherwise returns 1 upon error */
genRandomDict(U32 dictID,U32 seed,size_t dictSize,BYTE * fullDict)1258*01826a49SYabin Cui static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict)
1259*01826a49SYabin Cui {
1260*01826a49SYabin Cui     /* allocate space for samples */
1261*01826a49SYabin Cui     int ret = 0;
1262*01826a49SYabin Cui     unsigned const numSamples = 4;
1263*01826a49SYabin Cui     size_t sampleSizes[4];
1264*01826a49SYabin Cui     BYTE* const samples = malloc(5000*sizeof(BYTE));
1265*01826a49SYabin Cui     if (samples == NULL) {
1266*01826a49SYabin Cui         DISPLAY("Error: could not allocate space for samples\n");
1267*01826a49SYabin Cui         return 1;
1268*01826a49SYabin Cui     }
1269*01826a49SYabin Cui 
1270*01826a49SYabin Cui     /* generate samples */
1271*01826a49SYabin Cui     {   unsigned literalValue = 1;
1272*01826a49SYabin Cui         unsigned samplesPos = 0;
1273*01826a49SYabin Cui         size_t currSize = 1;
1274*01826a49SYabin Cui         while (literalValue <= 4) {
1275*01826a49SYabin Cui             sampleSizes[literalValue - 1] = currSize;
1276*01826a49SYabin Cui             {   size_t k;
1277*01826a49SYabin Cui                 for (k = 0; k < currSize; k++) {
1278*01826a49SYabin Cui                     *(samples + (samplesPos++)) = (BYTE)literalValue;
1279*01826a49SYabin Cui             }   }
1280*01826a49SYabin Cui             literalValue++;
1281*01826a49SYabin Cui             currSize *= 16;
1282*01826a49SYabin Cui     }   }
1283*01826a49SYabin Cui 
1284*01826a49SYabin Cui     {   size_t dictWriteSize = 0;
1285*01826a49SYabin Cui         ZDICT_params_t zdictParams;
1286*01826a49SYabin Cui         size_t const headerSize = MAX(dictSize/4, 256);
1287*01826a49SYabin Cui         size_t const dictContentSize = dictSize - headerSize;
1288*01826a49SYabin Cui         BYTE* const dictContent = fullDict + headerSize;
1289*01826a49SYabin Cui         if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) {
1290*01826a49SYabin Cui             DISPLAY("Error: dictionary size is too small\n");
1291*01826a49SYabin Cui             ret = 1;
1292*01826a49SYabin Cui             goto exitGenRandomDict;
1293*01826a49SYabin Cui         }
1294*01826a49SYabin Cui 
1295*01826a49SYabin Cui         /* init dictionary params */
1296*01826a49SYabin Cui         memset(&zdictParams, 0, sizeof(zdictParams));
1297*01826a49SYabin Cui         zdictParams.dictID = dictID;
1298*01826a49SYabin Cui         zdictParams.notificationLevel = 1;
1299*01826a49SYabin Cui 
1300*01826a49SYabin Cui         /* fill in dictionary content */
1301*01826a49SYabin Cui         RAND_buffer(&seed, (void*)dictContent, dictContentSize);
1302*01826a49SYabin Cui 
1303*01826a49SYabin Cui         /* finalize dictionary with random samples */
1304*01826a49SYabin Cui         dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize,
1305*01826a49SYabin Cui                                     dictContent, dictContentSize,
1306*01826a49SYabin Cui                                     samples, sampleSizes, numSamples,
1307*01826a49SYabin Cui                                     zdictParams);
1308*01826a49SYabin Cui 
1309*01826a49SYabin Cui         if (ZDICT_isError(dictWriteSize)) {
1310*01826a49SYabin Cui             DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize));
1311*01826a49SYabin Cui             ret = 1;
1312*01826a49SYabin Cui         }
1313*01826a49SYabin Cui     }
1314*01826a49SYabin Cui 
1315*01826a49SYabin Cui exitGenRandomDict:
1316*01826a49SYabin Cui     free(samples);
1317*01826a49SYabin Cui     return ret;
1318*01826a49SYabin Cui }
1319*01826a49SYabin Cui 
initDictInfo(int useDict,size_t dictContentSize,BYTE * dictContent,U32 dictID)1320*01826a49SYabin Cui static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){
1321*01826a49SYabin Cui     /* allocate space statically */
1322*01826a49SYabin Cui     dictInfo dictOp;
1323*01826a49SYabin Cui     memset(&dictOp, 0, sizeof(dictOp));
1324*01826a49SYabin Cui     dictOp.useDict = useDict;
1325*01826a49SYabin Cui     dictOp.dictContentSize = dictContentSize;
1326*01826a49SYabin Cui     dictOp.dictContent = dictContent;
1327*01826a49SYabin Cui     dictOp.dictID = dictID;
1328*01826a49SYabin Cui     return dictOp;
1329*01826a49SYabin Cui }
1330*01826a49SYabin Cui 
1331*01826a49SYabin Cui /*-*******************************************************
1332*01826a49SYabin Cui *  Test Mode
1333*01826a49SYabin Cui *********************************************************/
1334*01826a49SYabin Cui 
1335*01826a49SYabin Cui BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE];
1336*01826a49SYabin Cui 
testDecodeSimple(frame_t * fr)1337*01826a49SYabin Cui static size_t testDecodeSimple(frame_t* fr)
1338*01826a49SYabin Cui {
1339*01826a49SYabin Cui     /* test decoding the generated data with the simple API */
1340*01826a49SYabin Cui     size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1341*01826a49SYabin Cui                            fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
1342*01826a49SYabin Cui 
1343*01826a49SYabin Cui     if (ZSTD_isError(ret)) return ret;
1344*01826a49SYabin Cui 
1345*01826a49SYabin Cui     if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
1346*01826a49SYabin Cui                (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
1347*01826a49SYabin Cui         return ERROR(corruption_detected);
1348*01826a49SYabin Cui     }
1349*01826a49SYabin Cui 
1350*01826a49SYabin Cui     return ret;
1351*01826a49SYabin Cui }
1352*01826a49SYabin Cui 
testDecodeStreaming(frame_t * fr)1353*01826a49SYabin Cui static size_t testDecodeStreaming(frame_t* fr)
1354*01826a49SYabin Cui {
1355*01826a49SYabin Cui     /* test decoding the generated data with the streaming API */
1356*01826a49SYabin Cui     ZSTD_DStream* zd = ZSTD_createDStream();
1357*01826a49SYabin Cui     ZSTD_inBuffer in;
1358*01826a49SYabin Cui     ZSTD_outBuffer out;
1359*01826a49SYabin Cui     size_t ret;
1360*01826a49SYabin Cui 
1361*01826a49SYabin Cui     if (!zd) return ERROR(memory_allocation);
1362*01826a49SYabin Cui 
1363*01826a49SYabin Cui     in.src = fr->dataStart;
1364*01826a49SYabin Cui     in.pos = 0;
1365*01826a49SYabin Cui     in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart;
1366*01826a49SYabin Cui 
1367*01826a49SYabin Cui     out.dst = DECOMPRESSED_BUFFER;
1368*01826a49SYabin Cui     out.pos = 0;
1369*01826a49SYabin Cui     out.size = ZSTD_DStreamOutSize();
1370*01826a49SYabin Cui 
1371*01826a49SYabin Cui     ZSTD_initDStream(zd);
1372*01826a49SYabin Cui     while (1) {
1373*01826a49SYabin Cui         ret = ZSTD_decompressStream(zd, &out, &in);
1374*01826a49SYabin Cui         if (ZSTD_isError(ret)) goto cleanup; /* error */
1375*01826a49SYabin Cui         if (ret == 0) break; /* frame is done */
1376*01826a49SYabin Cui 
1377*01826a49SYabin Cui         /* force decoding to be done in chunks */
1378*01826a49SYabin Cui         out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size);
1379*01826a49SYabin Cui     }
1380*01826a49SYabin Cui 
1381*01826a49SYabin Cui     ret = out.pos;
1382*01826a49SYabin Cui 
1383*01826a49SYabin Cui     if (memcmp(out.dst, fr->srcStart, out.pos) != 0) {
1384*01826a49SYabin Cui         return ERROR(corruption_detected);
1385*01826a49SYabin Cui     }
1386*01826a49SYabin Cui 
1387*01826a49SYabin Cui cleanup:
1388*01826a49SYabin Cui     ZSTD_freeDStream(zd);
1389*01826a49SYabin Cui     return ret;
1390*01826a49SYabin Cui }
1391*01826a49SYabin Cui 
testDecodeWithDict(U32 seed,genType_e genType)1392*01826a49SYabin Cui static size_t testDecodeWithDict(U32 seed, genType_e genType)
1393*01826a49SYabin Cui {
1394*01826a49SYabin Cui     /* create variables */
1395*01826a49SYabin Cui     size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN;
1396*01826a49SYabin Cui     U32 const dictID = RAND(&seed);
1397*01826a49SYabin Cui     size_t errorDetected = 0;
1398*01826a49SYabin Cui     BYTE* const fullDict = malloc(dictSize);
1399*01826a49SYabin Cui     if (fullDict == NULL) {
1400*01826a49SYabin Cui         return ERROR(GENERIC);
1401*01826a49SYabin Cui     }
1402*01826a49SYabin Cui 
1403*01826a49SYabin Cui     /* generate random dictionary */
1404*01826a49SYabin Cui     if (genRandomDict(dictID, seed, dictSize, fullDict)) {  /* return 0 on success */
1405*01826a49SYabin Cui         errorDetected = ERROR(GENERIC);
1406*01826a49SYabin Cui         goto dictTestCleanup;
1407*01826a49SYabin Cui     }
1408*01826a49SYabin Cui 
1409*01826a49SYabin Cui 
1410*01826a49SYabin Cui     {   frame_t fr;
1411*01826a49SYabin Cui         dictInfo info;
1412*01826a49SYabin Cui         ZSTD_DCtx* const dctx = ZSTD_createDCtx();
1413*01826a49SYabin Cui         size_t ret;
1414*01826a49SYabin Cui 
1415*01826a49SYabin Cui         /* get dict info */
1416*01826a49SYabin Cui         {   size_t const headerSize = MAX(dictSize/4, 256);
1417*01826a49SYabin Cui             size_t const dictContentSize = dictSize-headerSize;
1418*01826a49SYabin Cui             BYTE* const dictContent = fullDict+headerSize;
1419*01826a49SYabin Cui             info = initDictInfo(1, dictContentSize, dictContent, dictID);
1420*01826a49SYabin Cui         }
1421*01826a49SYabin Cui 
1422*01826a49SYabin Cui         /* manually decompress and check difference */
1423*01826a49SYabin Cui         if (genType == gt_frame) {
1424*01826a49SYabin Cui             /* Test frame */
1425*01826a49SYabin Cui             generateFrame(seed, &fr, info);
1426*01826a49SYabin Cui             ret = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1427*01826a49SYabin Cui                                             fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart,
1428*01826a49SYabin Cui                                             fullDict, dictSize);
1429*01826a49SYabin Cui         } else {
1430*01826a49SYabin Cui             /* Test block */
1431*01826a49SYabin Cui             generateCompressedBlock(seed, &fr, info);
1432*01826a49SYabin Cui             ret = ZSTD_decompressBegin_usingDict(dctx, fullDict, dictSize);
1433*01826a49SYabin Cui             if (ZSTD_isError(ret)) {
1434*01826a49SYabin Cui                 errorDetected = ret;
1435*01826a49SYabin Cui                 ZSTD_freeDCtx(dctx);
1436*01826a49SYabin Cui                 goto dictTestCleanup;
1437*01826a49SYabin Cui             }
1438*01826a49SYabin Cui             ret = ZSTD_decompressBlock_deprecated(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1439*01826a49SYabin Cui                                        fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart);
1440*01826a49SYabin Cui         }
1441*01826a49SYabin Cui         ZSTD_freeDCtx(dctx);
1442*01826a49SYabin Cui 
1443*01826a49SYabin Cui         if (ZSTD_isError(ret)) {
1444*01826a49SYabin Cui             errorDetected = ret;
1445*01826a49SYabin Cui             goto dictTestCleanup;
1446*01826a49SYabin Cui         }
1447*01826a49SYabin Cui 
1448*01826a49SYabin Cui         if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) {
1449*01826a49SYabin Cui             errorDetected = ERROR(corruption_detected);
1450*01826a49SYabin Cui             goto dictTestCleanup;
1451*01826a49SYabin Cui         }
1452*01826a49SYabin Cui     }
1453*01826a49SYabin Cui 
1454*01826a49SYabin Cui dictTestCleanup:
1455*01826a49SYabin Cui     free(fullDict);
1456*01826a49SYabin Cui     return errorDetected;
1457*01826a49SYabin Cui }
1458*01826a49SYabin Cui 
testDecodeRawBlock(frame_t * fr)1459*01826a49SYabin Cui static size_t testDecodeRawBlock(frame_t* fr)
1460*01826a49SYabin Cui {
1461*01826a49SYabin Cui     ZSTD_DCtx* dctx = ZSTD_createDCtx();
1462*01826a49SYabin Cui     size_t ret = ZSTD_decompressBegin(dctx);
1463*01826a49SYabin Cui     if (ZSTD_isError(ret)) return ret;
1464*01826a49SYabin Cui 
1465*01826a49SYabin Cui     ret = ZSTD_decompressBlock_deprecated(
1466*01826a49SYabin Cui             dctx,
1467*01826a49SYabin Cui             DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1468*01826a49SYabin Cui             fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
1469*01826a49SYabin Cui     ZSTD_freeDCtx(dctx);
1470*01826a49SYabin Cui     if (ZSTD_isError(ret)) return ret;
1471*01826a49SYabin Cui 
1472*01826a49SYabin Cui     if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
1473*01826a49SYabin Cui                (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
1474*01826a49SYabin Cui         return ERROR(corruption_detected);
1475*01826a49SYabin Cui     }
1476*01826a49SYabin Cui 
1477*01826a49SYabin Cui     return ret;
1478*01826a49SYabin Cui }
1479*01826a49SYabin Cui 
runBlockTest(U32 * seed)1480*01826a49SYabin Cui static int runBlockTest(U32* seed)
1481*01826a49SYabin Cui {
1482*01826a49SYabin Cui     frame_t fr;
1483*01826a49SYabin Cui     U32 const seedCopy = *seed;
1484*01826a49SYabin Cui     {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
1485*01826a49SYabin Cui         *seed = generateCompressedBlock(*seed, &fr, info);
1486*01826a49SYabin Cui     }
1487*01826a49SYabin Cui 
1488*01826a49SYabin Cui     {   size_t const r = testDecodeRawBlock(&fr);
1489*01826a49SYabin Cui         if (ZSTD_isError(r)) {
1490*01826a49SYabin Cui             DISPLAY("Error in block mode on test seed %u: %s\n",
1491*01826a49SYabin Cui                     (unsigned)seedCopy, ZSTD_getErrorName(r));
1492*01826a49SYabin Cui             return 1;
1493*01826a49SYabin Cui         }
1494*01826a49SYabin Cui     }
1495*01826a49SYabin Cui 
1496*01826a49SYabin Cui     {   size_t const r = testDecodeWithDict(*seed, gt_block);
1497*01826a49SYabin Cui         if (ZSTD_isError(r)) {
1498*01826a49SYabin Cui             DISPLAY("Error in block mode with dictionary on test seed %u: %s\n",
1499*01826a49SYabin Cui                     (unsigned)seedCopy, ZSTD_getErrorName(r));
1500*01826a49SYabin Cui             return 1;
1501*01826a49SYabin Cui         }
1502*01826a49SYabin Cui     }
1503*01826a49SYabin Cui     return 0;
1504*01826a49SYabin Cui }
1505*01826a49SYabin Cui 
runFrameTest(U32 * seed)1506*01826a49SYabin Cui static int runFrameTest(U32* seed)
1507*01826a49SYabin Cui {
1508*01826a49SYabin Cui     frame_t fr;
1509*01826a49SYabin Cui     U32 const seedCopy = *seed;
1510*01826a49SYabin Cui     {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
1511*01826a49SYabin Cui         *seed = generateFrame(*seed, &fr, info);
1512*01826a49SYabin Cui     }
1513*01826a49SYabin Cui 
1514*01826a49SYabin Cui     {   size_t const r = testDecodeSimple(&fr);
1515*01826a49SYabin Cui         if (ZSTD_isError(r)) {
1516*01826a49SYabin Cui             DISPLAY("Error in simple mode on test seed %u: %s\n",
1517*01826a49SYabin Cui                     (unsigned)seedCopy, ZSTD_getErrorName(r));
1518*01826a49SYabin Cui             return 1;
1519*01826a49SYabin Cui         }
1520*01826a49SYabin Cui     }
1521*01826a49SYabin Cui     {   size_t const r = testDecodeStreaming(&fr);
1522*01826a49SYabin Cui         if (ZSTD_isError(r)) {
1523*01826a49SYabin Cui             DISPLAY("Error in streaming mode on test seed %u: %s\n",
1524*01826a49SYabin Cui                     (unsigned)seedCopy, ZSTD_getErrorName(r));
1525*01826a49SYabin Cui             return 1;
1526*01826a49SYabin Cui         }
1527*01826a49SYabin Cui     }
1528*01826a49SYabin Cui     {   size_t const r = testDecodeWithDict(*seed, gt_frame);  /* avoid big dictionaries */
1529*01826a49SYabin Cui         if (ZSTD_isError(r)) {
1530*01826a49SYabin Cui             DISPLAY("Error in dictionary mode on test seed %u: %s\n",
1531*01826a49SYabin Cui                     (unsigned)seedCopy, ZSTD_getErrorName(r));
1532*01826a49SYabin Cui             return 1;
1533*01826a49SYabin Cui         }
1534*01826a49SYabin Cui     }
1535*01826a49SYabin Cui     return 0;
1536*01826a49SYabin Cui }
1537*01826a49SYabin Cui 
runTestMode(U32 seed,unsigned numFiles,unsigned const testDurationS,genType_e genType)1538*01826a49SYabin Cui static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS,
1539*01826a49SYabin Cui                        genType_e genType)
1540*01826a49SYabin Cui {
1541*01826a49SYabin Cui     unsigned fnum;
1542*01826a49SYabin Cui 
1543*01826a49SYabin Cui     UTIL_time_t const startClock = UTIL_getTime();
1544*01826a49SYabin Cui     U64 const maxClockSpan = testDurationS * SEC_TO_MICRO;
1545*01826a49SYabin Cui 
1546*01826a49SYabin Cui     if (numFiles == 0 && !testDurationS) numFiles = 1;
1547*01826a49SYabin Cui 
1548*01826a49SYabin Cui     DISPLAY("seed: %u\n", (unsigned)seed);
1549*01826a49SYabin Cui 
1550*01826a49SYabin Cui     for (fnum = 0; fnum < numFiles || UTIL_clockSpanMicro(startClock) < maxClockSpan; fnum++) {
1551*01826a49SYabin Cui         if (fnum < numFiles)
1552*01826a49SYabin Cui             DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
1553*01826a49SYabin Cui         else
1554*01826a49SYabin Cui             DISPLAYUPDATE("\r%u           ", fnum);
1555*01826a49SYabin Cui 
1556*01826a49SYabin Cui         {   int const ret = (genType == gt_frame) ?
1557*01826a49SYabin Cui                             runFrameTest(&seed) :
1558*01826a49SYabin Cui                             runBlockTest(&seed);
1559*01826a49SYabin Cui             if (ret) return ret;
1560*01826a49SYabin Cui         }
1561*01826a49SYabin Cui     }
1562*01826a49SYabin Cui 
1563*01826a49SYabin Cui     DISPLAY("\r%u tests completed: ", fnum);
1564*01826a49SYabin Cui     DISPLAY("OK\n");
1565*01826a49SYabin Cui 
1566*01826a49SYabin Cui     return 0;
1567*01826a49SYabin Cui }
1568*01826a49SYabin Cui 
1569*01826a49SYabin Cui /*-*******************************************************
1570*01826a49SYabin Cui *  File I/O
1571*01826a49SYabin Cui *********************************************************/
1572*01826a49SYabin Cui 
generateFile(U32 seed,const char * const path,const char * const origPath,genType_e genType)1573*01826a49SYabin Cui static int generateFile(U32 seed, const char* const path,
1574*01826a49SYabin Cui                         const char* const origPath, genType_e genType)
1575*01826a49SYabin Cui {
1576*01826a49SYabin Cui     frame_t fr;
1577*01826a49SYabin Cui 
1578*01826a49SYabin Cui     DISPLAY("seed: %u\n", (unsigned)seed);
1579*01826a49SYabin Cui 
1580*01826a49SYabin Cui     {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
1581*01826a49SYabin Cui         if (genType == gt_frame) {
1582*01826a49SYabin Cui             generateFrame(seed, &fr, info);
1583*01826a49SYabin Cui         } else {
1584*01826a49SYabin Cui             generateCompressedBlock(seed, &fr, info);
1585*01826a49SYabin Cui         }
1586*01826a49SYabin Cui     }
1587*01826a49SYabin Cui     outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
1588*01826a49SYabin Cui     if (origPath) {
1589*01826a49SYabin Cui         outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
1590*01826a49SYabin Cui     }
1591*01826a49SYabin Cui     return 0;
1592*01826a49SYabin Cui }
1593*01826a49SYabin Cui 
generateCorpus(U32 seed,unsigned numFiles,const char * const path,const char * const origPath,genType_e genType)1594*01826a49SYabin Cui static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
1595*01826a49SYabin Cui                           const char* const origPath, genType_e genType)
1596*01826a49SYabin Cui {
1597*01826a49SYabin Cui     char outPath[MAX_PATH];
1598*01826a49SYabin Cui     unsigned fnum;
1599*01826a49SYabin Cui 
1600*01826a49SYabin Cui     DISPLAY("seed: %u\n", (unsigned)seed);
1601*01826a49SYabin Cui 
1602*01826a49SYabin Cui     for (fnum = 0; fnum < numFiles; fnum++) {
1603*01826a49SYabin Cui         frame_t fr;
1604*01826a49SYabin Cui 
1605*01826a49SYabin Cui         DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
1606*01826a49SYabin Cui 
1607*01826a49SYabin Cui         {   dictInfo const info = initDictInfo(0, 0, NULL, 0);
1608*01826a49SYabin Cui             if (genType == gt_frame) {
1609*01826a49SYabin Cui                 seed = generateFrame(seed, &fr, info);
1610*01826a49SYabin Cui             } else {
1611*01826a49SYabin Cui                 seed = generateCompressedBlock(seed, &fr, info);
1612*01826a49SYabin Cui             }
1613*01826a49SYabin Cui         }
1614*01826a49SYabin Cui 
1615*01826a49SYabin Cui         if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
1616*01826a49SYabin Cui             DISPLAY("Error: path too long\n");
1617*01826a49SYabin Cui             return 1;
1618*01826a49SYabin Cui         }
1619*01826a49SYabin Cui         outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
1620*01826a49SYabin Cui 
1621*01826a49SYabin Cui         if (origPath) {
1622*01826a49SYabin Cui             if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
1623*01826a49SYabin Cui                 DISPLAY("Error: path too long\n");
1624*01826a49SYabin Cui                 return 1;
1625*01826a49SYabin Cui             }
1626*01826a49SYabin Cui             outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
1627*01826a49SYabin Cui         }
1628*01826a49SYabin Cui     }
1629*01826a49SYabin Cui 
1630*01826a49SYabin Cui     DISPLAY("\r%u/%u      \n", fnum, numFiles);
1631*01826a49SYabin Cui 
1632*01826a49SYabin Cui     return 0;
1633*01826a49SYabin Cui }
1634*01826a49SYabin Cui 
generateCorpusWithDict(U32 seed,unsigned numFiles,const char * const path,const char * const origPath,const size_t dictSize,genType_e genType)1635*01826a49SYabin Cui static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path,
1636*01826a49SYabin Cui                                   const char* const origPath, const size_t dictSize,
1637*01826a49SYabin Cui                                   genType_e genType)
1638*01826a49SYabin Cui {
1639*01826a49SYabin Cui     char outPath[MAX_PATH];
1640*01826a49SYabin Cui     BYTE* fullDict;
1641*01826a49SYabin Cui     U32 const dictID = RAND(&seed);
1642*01826a49SYabin Cui     int errorDetected = 0;
1643*01826a49SYabin Cui 
1644*01826a49SYabin Cui     if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
1645*01826a49SYabin Cui         DISPLAY("Error: path too long\n");
1646*01826a49SYabin Cui         return 1;
1647*01826a49SYabin Cui     }
1648*01826a49SYabin Cui 
1649*01826a49SYabin Cui     /* allocate space for the dictionary */
1650*01826a49SYabin Cui     fullDict = malloc(dictSize);
1651*01826a49SYabin Cui     if (fullDict == NULL) {
1652*01826a49SYabin Cui         DISPLAY("Error: could not allocate space for full dictionary.\n");
1653*01826a49SYabin Cui         return 1;
1654*01826a49SYabin Cui     }
1655*01826a49SYabin Cui 
1656*01826a49SYabin Cui     /* randomly generate the dictionary */
1657*01826a49SYabin Cui     {   int const ret = genRandomDict(dictID, seed, dictSize, fullDict);
1658*01826a49SYabin Cui         if (ret != 0) {
1659*01826a49SYabin Cui             errorDetected = ret;
1660*01826a49SYabin Cui             goto dictCleanup;
1661*01826a49SYabin Cui         }
1662*01826a49SYabin Cui     }
1663*01826a49SYabin Cui 
1664*01826a49SYabin Cui     /* write out dictionary */
1665*01826a49SYabin Cui     if (numFiles != 0) {
1666*01826a49SYabin Cui         if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
1667*01826a49SYabin Cui             DISPLAY("Error: dictionary path too long\n");
1668*01826a49SYabin Cui             errorDetected = 1;
1669*01826a49SYabin Cui             goto dictCleanup;
1670*01826a49SYabin Cui         }
1671*01826a49SYabin Cui         outputBuffer(fullDict, dictSize, outPath);
1672*01826a49SYabin Cui     }
1673*01826a49SYabin Cui     else {
1674*01826a49SYabin Cui         outputBuffer(fullDict, dictSize, "dictionary");
1675*01826a49SYabin Cui     }
1676*01826a49SYabin Cui 
1677*01826a49SYabin Cui     /* generate random compressed/decompressed files */
1678*01826a49SYabin Cui     {   unsigned fnum;
1679*01826a49SYabin Cui         for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) {
1680*01826a49SYabin Cui             frame_t fr;
1681*01826a49SYabin Cui             DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
1682*01826a49SYabin Cui             {
1683*01826a49SYabin Cui                 size_t const headerSize = MAX(dictSize/4, 256);
1684*01826a49SYabin Cui                 size_t const dictContentSize = dictSize-headerSize;
1685*01826a49SYabin Cui                 BYTE* const dictContent = fullDict+headerSize;
1686*01826a49SYabin Cui                 dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID);
1687*01826a49SYabin Cui                 if (genType == gt_frame) {
1688*01826a49SYabin Cui                     seed = generateFrame(seed, &fr, info);
1689*01826a49SYabin Cui                 } else {
1690*01826a49SYabin Cui                     seed = generateCompressedBlock(seed, &fr, info);
1691*01826a49SYabin Cui                 }
1692*01826a49SYabin Cui             }
1693*01826a49SYabin Cui 
1694*01826a49SYabin Cui             if (numFiles != 0) {
1695*01826a49SYabin Cui                 if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
1696*01826a49SYabin Cui                     DISPLAY("Error: path too long\n");
1697*01826a49SYabin Cui                     errorDetected = 1;
1698*01826a49SYabin Cui                     goto dictCleanup;
1699*01826a49SYabin Cui                 }
1700*01826a49SYabin Cui                 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
1701*01826a49SYabin Cui 
1702*01826a49SYabin Cui                 if (origPath) {
1703*01826a49SYabin Cui                     if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
1704*01826a49SYabin Cui                         DISPLAY("Error: path too long\n");
1705*01826a49SYabin Cui                         errorDetected = 1;
1706*01826a49SYabin Cui                         goto dictCleanup;
1707*01826a49SYabin Cui                     }
1708*01826a49SYabin Cui                     outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
1709*01826a49SYabin Cui                 }
1710*01826a49SYabin Cui             }
1711*01826a49SYabin Cui             else {
1712*01826a49SYabin Cui                 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
1713*01826a49SYabin Cui                 if (origPath) {
1714*01826a49SYabin Cui                     outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
1715*01826a49SYabin Cui                 }
1716*01826a49SYabin Cui             }
1717*01826a49SYabin Cui         }
1718*01826a49SYabin Cui     }
1719*01826a49SYabin Cui 
1720*01826a49SYabin Cui dictCleanup:
1721*01826a49SYabin Cui     free(fullDict);
1722*01826a49SYabin Cui     return errorDetected;
1723*01826a49SYabin Cui }
1724*01826a49SYabin Cui 
1725*01826a49SYabin Cui 
1726*01826a49SYabin Cui /*_*******************************************************
1727*01826a49SYabin Cui *  Command line
1728*01826a49SYabin Cui *********************************************************/
makeSeed(void)1729*01826a49SYabin Cui static U32 makeSeed(void)
1730*01826a49SYabin Cui {
1731*01826a49SYabin Cui     U32 t = (U32) time(NULL);
1732*01826a49SYabin Cui     return XXH32(&t, sizeof(t), 0) % 65536;
1733*01826a49SYabin Cui }
1734*01826a49SYabin Cui 
readInt(const char ** argument)1735*01826a49SYabin Cui static unsigned readInt(const char** argument)
1736*01826a49SYabin Cui {
1737*01826a49SYabin Cui     unsigned val = 0;
1738*01826a49SYabin Cui     while ((**argument>='0') && (**argument<='9')) {
1739*01826a49SYabin Cui         val *= 10;
1740*01826a49SYabin Cui         val += **argument - '0';
1741*01826a49SYabin Cui         (*argument)++;
1742*01826a49SYabin Cui     }
1743*01826a49SYabin Cui     return val;
1744*01826a49SYabin Cui }
1745*01826a49SYabin Cui 
usage(const char * programName)1746*01826a49SYabin Cui static void usage(const char* programName)
1747*01826a49SYabin Cui {
1748*01826a49SYabin Cui     DISPLAY( "Usage :\n");
1749*01826a49SYabin Cui     DISPLAY( "      %s [args]\n", programName);
1750*01826a49SYabin Cui     DISPLAY( "\n");
1751*01826a49SYabin Cui     DISPLAY( "Arguments :\n");
1752*01826a49SYabin Cui     DISPLAY( " -p<path> : select output path (default:stdout)\n");
1753*01826a49SYabin Cui     DISPLAY( "                in multiple files mode this should be a directory\n");
1754*01826a49SYabin Cui     DISPLAY( " -o<path> : select path to output original file (default:no output)\n");
1755*01826a49SYabin Cui     DISPLAY( "                in multiple files mode this should be a directory\n");
1756*01826a49SYabin Cui     DISPLAY( " -s#      : select seed (default:random based on time)\n");
1757*01826a49SYabin Cui     DISPLAY( " -n#      : number of files to generate (default:1)\n");
1758*01826a49SYabin Cui     DISPLAY( " -t       : activate test mode (test files against libzstd instead of outputting them)\n");
1759*01826a49SYabin Cui     DISPLAY( " -T#      : length of time to run tests for\n");
1760*01826a49SYabin Cui     DISPLAY( " -v       : increase verbosity level (default:0, max:7)\n");
1761*01826a49SYabin Cui     DISPLAY( " -h/H     : display help/long help and exit\n");
1762*01826a49SYabin Cui }
1763*01826a49SYabin Cui 
advancedUsage(const char * programName)1764*01826a49SYabin Cui static void advancedUsage(const char* programName)
1765*01826a49SYabin Cui {
1766*01826a49SYabin Cui     usage(programName);
1767*01826a49SYabin Cui     DISPLAY( "\n");
1768*01826a49SYabin Cui     DISPLAY( "Advanced arguments        :\n");
1769*01826a49SYabin Cui     DISPLAY( " --content-size           : always include the content size in the frame header\n");
1770*01826a49SYabin Cui     DISPLAY( " --use-dict=#             : include a dictionary used to decompress the corpus\n");
1771*01826a49SYabin Cui     DISPLAY( " --gen-blocks             : generate raw compressed blocks without block/frame headers\n");
1772*01826a49SYabin Cui     DISPLAY( " --max-block-size-log=#   : max block size log, must be in range [2, 17]\n");
1773*01826a49SYabin Cui     DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n");
1774*01826a49SYabin Cui     DISPLAY( "                            (this is ignored with gen-blocks)\n");
1775*01826a49SYabin Cui }
1776*01826a49SYabin Cui 
1777*01826a49SYabin Cui /*! readU32FromChar() :
1778*01826a49SYabin Cui     @return : unsigned integer value read from input in `char` format
1779*01826a49SYabin Cui     allows and interprets K, KB, KiB, M, MB and MiB suffix.
1780*01826a49SYabin Cui     Will also modify `*stringPtr`, advancing it to position where it stopped reading.
1781*01826a49SYabin Cui     Note : function result can overflow if digit string > MAX_UINT */
readU32FromChar(const char ** stringPtr)1782*01826a49SYabin Cui static unsigned readU32FromChar(const char** stringPtr)
1783*01826a49SYabin Cui {
1784*01826a49SYabin Cui     unsigned result = 0;
1785*01826a49SYabin Cui     while ((**stringPtr >='0') && (**stringPtr <='9'))
1786*01826a49SYabin Cui         result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
1787*01826a49SYabin Cui     if ((**stringPtr=='K') || (**stringPtr=='M')) {
1788*01826a49SYabin Cui         result <<= 10;
1789*01826a49SYabin Cui         if (**stringPtr=='M') result <<= 10;
1790*01826a49SYabin Cui         (*stringPtr)++ ;
1791*01826a49SYabin Cui         if (**stringPtr=='i') (*stringPtr)++;
1792*01826a49SYabin Cui         if (**stringPtr=='B') (*stringPtr)++;
1793*01826a49SYabin Cui     }
1794*01826a49SYabin Cui     return result;
1795*01826a49SYabin Cui }
1796*01826a49SYabin Cui 
1797*01826a49SYabin Cui /** longCommandWArg() :
1798*01826a49SYabin Cui  *  check if *stringPtr is the same as longCommand.
1799*01826a49SYabin Cui  *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
1800*01826a49SYabin Cui  *  @return 0 and doesn't modify *stringPtr otherwise.
1801*01826a49SYabin Cui  */
longCommandWArg(const char ** stringPtr,const char * longCommand)1802*01826a49SYabin Cui static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
1803*01826a49SYabin Cui {
1804*01826a49SYabin Cui     size_t const comSize = strlen(longCommand);
1805*01826a49SYabin Cui     int const result = !strncmp(*stringPtr, longCommand, comSize);
1806*01826a49SYabin Cui     if (result) *stringPtr += comSize;
1807*01826a49SYabin Cui     return result;
1808*01826a49SYabin Cui }
1809*01826a49SYabin Cui 
main(int argc,char ** argv)1810*01826a49SYabin Cui int main(int argc, char** argv)
1811*01826a49SYabin Cui {
1812*01826a49SYabin Cui     U32 seed = 0;
1813*01826a49SYabin Cui     int seedset = 0;
1814*01826a49SYabin Cui     unsigned numFiles = 0;
1815*01826a49SYabin Cui     unsigned testDuration = 0;
1816*01826a49SYabin Cui     int testMode = 0;
1817*01826a49SYabin Cui     const char* path = NULL;
1818*01826a49SYabin Cui     const char* origPath = NULL;
1819*01826a49SYabin Cui     int useDict = 0;
1820*01826a49SYabin Cui     unsigned dictSize = (10 << 10); /* 10 kB default */
1821*01826a49SYabin Cui     genType_e genType = gt_frame;
1822*01826a49SYabin Cui 
1823*01826a49SYabin Cui     int argNb;
1824*01826a49SYabin Cui 
1825*01826a49SYabin Cui     /* Check command line */
1826*01826a49SYabin Cui     for (argNb=1; argNb<argc; argNb++) {
1827*01826a49SYabin Cui         const char* argument = argv[argNb];
1828*01826a49SYabin Cui         if(!argument) continue;   /* Protection if argument empty */
1829*01826a49SYabin Cui 
1830*01826a49SYabin Cui         /* Handle commands. Aggregated commands are allowed */
1831*01826a49SYabin Cui         if (argument[0]=='-') {
1832*01826a49SYabin Cui             argument++;
1833*01826a49SYabin Cui             while (*argument!=0) {
1834*01826a49SYabin Cui                 switch(*argument)
1835*01826a49SYabin Cui                 {
1836*01826a49SYabin Cui                 case 'h':
1837*01826a49SYabin Cui                     usage(argv[0]);
1838*01826a49SYabin Cui                     return 0;
1839*01826a49SYabin Cui                 case 'H':
1840*01826a49SYabin Cui                     advancedUsage(argv[0]);
1841*01826a49SYabin Cui                     return 0;
1842*01826a49SYabin Cui                 case 'v':
1843*01826a49SYabin Cui                     argument++;
1844*01826a49SYabin Cui                     g_displayLevel++;
1845*01826a49SYabin Cui                     break;
1846*01826a49SYabin Cui                 case 's':
1847*01826a49SYabin Cui                     argument++;
1848*01826a49SYabin Cui                     seedset=1;
1849*01826a49SYabin Cui                     seed = readInt(&argument);
1850*01826a49SYabin Cui                     break;
1851*01826a49SYabin Cui                 case 'n':
1852*01826a49SYabin Cui                     argument++;
1853*01826a49SYabin Cui                     numFiles = readInt(&argument);
1854*01826a49SYabin Cui                     break;
1855*01826a49SYabin Cui                 case 'T':
1856*01826a49SYabin Cui                     argument++;
1857*01826a49SYabin Cui                     testDuration = readInt(&argument);
1858*01826a49SYabin Cui                     if (*argument == 'm') {
1859*01826a49SYabin Cui                         testDuration *= 60;
1860*01826a49SYabin Cui                         argument++;
1861*01826a49SYabin Cui                         if (*argument == 'n') argument++;
1862*01826a49SYabin Cui                     }
1863*01826a49SYabin Cui                     break;
1864*01826a49SYabin Cui                 case 'o':
1865*01826a49SYabin Cui                     argument++;
1866*01826a49SYabin Cui                     origPath = argument;
1867*01826a49SYabin Cui                     argument += strlen(argument);
1868*01826a49SYabin Cui                     break;
1869*01826a49SYabin Cui                 case 'p':
1870*01826a49SYabin Cui                     argument++;
1871*01826a49SYabin Cui                     path = argument;
1872*01826a49SYabin Cui                     argument += strlen(argument);
1873*01826a49SYabin Cui                     break;
1874*01826a49SYabin Cui                 case 't':
1875*01826a49SYabin Cui                     argument++;
1876*01826a49SYabin Cui                     testMode = 1;
1877*01826a49SYabin Cui                     break;
1878*01826a49SYabin Cui                 case '-':
1879*01826a49SYabin Cui                     argument++;
1880*01826a49SYabin Cui                     if (strcmp(argument, "content-size") == 0) {
1881*01826a49SYabin Cui                         opts.contentSize = 1;
1882*01826a49SYabin Cui                     } else if (longCommandWArg(&argument, "use-dict=")) {
1883*01826a49SYabin Cui                         dictSize = readU32FromChar(&argument);
1884*01826a49SYabin Cui                         useDict = 1;
1885*01826a49SYabin Cui                     } else if (strcmp(argument, "gen-blocks") == 0) {
1886*01826a49SYabin Cui                         genType = gt_block;
1887*01826a49SYabin Cui                     } else if (longCommandWArg(&argument, "max-block-size-log=")) {
1888*01826a49SYabin Cui                         U32 value = readU32FromChar(&argument);
1889*01826a49SYabin Cui                         if (value >= 2 && value <= ZSTD_BLOCKSIZE_MAX) {
1890*01826a49SYabin Cui                             g_maxBlockSize = 1U << value;
1891*01826a49SYabin Cui                         }
1892*01826a49SYabin Cui                     } else if (longCommandWArg(&argument, "max-content-size-log=")) {
1893*01826a49SYabin Cui                         U32 value = readU32FromChar(&argument);
1894*01826a49SYabin Cui                         g_maxDecompressedSizeLog =
1895*01826a49SYabin Cui                                 MIN(MAX_DECOMPRESSED_SIZE_LOG, value);
1896*01826a49SYabin Cui                     } else {
1897*01826a49SYabin Cui                         advancedUsage(argv[0]);
1898*01826a49SYabin Cui                         return 1;
1899*01826a49SYabin Cui                     }
1900*01826a49SYabin Cui                     argument += strlen(argument);
1901*01826a49SYabin Cui                     break;
1902*01826a49SYabin Cui                 default:
1903*01826a49SYabin Cui                     usage(argv[0]);
1904*01826a49SYabin Cui                     return 1;
1905*01826a49SYabin Cui     }   }   }   }   /* for (argNb=1; argNb<argc; argNb++) */
1906*01826a49SYabin Cui 
1907*01826a49SYabin Cui     if (!seedset) {
1908*01826a49SYabin Cui         seed = makeSeed();
1909*01826a49SYabin Cui     }
1910*01826a49SYabin Cui 
1911*01826a49SYabin Cui     if (testMode) {
1912*01826a49SYabin Cui         return runTestMode(seed, numFiles, testDuration, genType);
1913*01826a49SYabin Cui     } else {
1914*01826a49SYabin Cui         if (testDuration) {
1915*01826a49SYabin Cui             DISPLAY("Error: -T requires test mode (-t)\n\n");
1916*01826a49SYabin Cui             usage(argv[0]);
1917*01826a49SYabin Cui             return 1;
1918*01826a49SYabin Cui         }
1919*01826a49SYabin Cui     }
1920*01826a49SYabin Cui 
1921*01826a49SYabin Cui     if (!path) {
1922*01826a49SYabin Cui         DISPLAY("Error: path is required in file generation mode\n");
1923*01826a49SYabin Cui         usage(argv[0]);
1924*01826a49SYabin Cui         return 1;
1925*01826a49SYabin Cui     }
1926*01826a49SYabin Cui 
1927*01826a49SYabin Cui     if (numFiles == 0 && useDict == 0) {
1928*01826a49SYabin Cui         return generateFile(seed, path, origPath, genType);
1929*01826a49SYabin Cui     } else if (useDict == 0){
1930*01826a49SYabin Cui         return generateCorpus(seed, numFiles, path, origPath, genType);
1931*01826a49SYabin Cui     } else {
1932*01826a49SYabin Cui         /* should generate files with a dictionary */
1933*01826a49SYabin Cui         return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize, genType);
1934*01826a49SYabin Cui     }
1935*01826a49SYabin Cui 
1936*01826a49SYabin Cui }
1937