1*01826a49SYabin Cui /*
2*01826a49SYabin Cui * Copyright (c) Meta Platforms, Inc. and affiliates.
3*01826a49SYabin Cui * All rights reserved.
4*01826a49SYabin Cui *
5*01826a49SYabin Cui * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui */
10*01826a49SYabin Cui
11*01826a49SYabin Cui /* zstd_ddict.c :
12*01826a49SYabin Cui * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13*01826a49SYabin Cui
14*01826a49SYabin Cui /*-*******************************************************
15*01826a49SYabin Cui * Dependencies
16*01826a49SYabin Cui *********************************************************/
17*01826a49SYabin Cui #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
18*01826a49SYabin Cui #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
19*01826a49SYabin Cui #include "../common/cpu.h" /* bmi2 */
20*01826a49SYabin Cui #include "../common/mem.h" /* low level memory routines */
21*01826a49SYabin Cui #define FSE_STATIC_LINKING_ONLY
22*01826a49SYabin Cui #include "../common/fse.h"
23*01826a49SYabin Cui #include "../common/huf.h"
24*01826a49SYabin Cui #include "zstd_decompress_internal.h"
25*01826a49SYabin Cui #include "zstd_ddict.h"
26*01826a49SYabin Cui
27*01826a49SYabin Cui #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
28*01826a49SYabin Cui # include "../legacy/zstd_legacy.h"
29*01826a49SYabin Cui #endif
30*01826a49SYabin Cui
31*01826a49SYabin Cui
32*01826a49SYabin Cui
33*01826a49SYabin Cui /*-*******************************************************
34*01826a49SYabin Cui * Types
35*01826a49SYabin Cui *********************************************************/
36*01826a49SYabin Cui struct ZSTD_DDict_s {
37*01826a49SYabin Cui void* dictBuffer;
38*01826a49SYabin Cui const void* dictContent;
39*01826a49SYabin Cui size_t dictSize;
40*01826a49SYabin Cui ZSTD_entropyDTables_t entropy;
41*01826a49SYabin Cui U32 dictID;
42*01826a49SYabin Cui U32 entropyPresent;
43*01826a49SYabin Cui ZSTD_customMem cMem;
44*01826a49SYabin Cui }; /* typedef'd to ZSTD_DDict within "zstd.h" */
45*01826a49SYabin Cui
ZSTD_DDict_dictContent(const ZSTD_DDict * ddict)46*01826a49SYabin Cui const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
47*01826a49SYabin Cui {
48*01826a49SYabin Cui assert(ddict != NULL);
49*01826a49SYabin Cui return ddict->dictContent;
50*01826a49SYabin Cui }
51*01826a49SYabin Cui
ZSTD_DDict_dictSize(const ZSTD_DDict * ddict)52*01826a49SYabin Cui size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
53*01826a49SYabin Cui {
54*01826a49SYabin Cui assert(ddict != NULL);
55*01826a49SYabin Cui return ddict->dictSize;
56*01826a49SYabin Cui }
57*01826a49SYabin Cui
ZSTD_copyDDictParameters(ZSTD_DCtx * dctx,const ZSTD_DDict * ddict)58*01826a49SYabin Cui void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
59*01826a49SYabin Cui {
60*01826a49SYabin Cui DEBUGLOG(4, "ZSTD_copyDDictParameters");
61*01826a49SYabin Cui assert(dctx != NULL);
62*01826a49SYabin Cui assert(ddict != NULL);
63*01826a49SYabin Cui dctx->dictID = ddict->dictID;
64*01826a49SYabin Cui dctx->prefixStart = ddict->dictContent;
65*01826a49SYabin Cui dctx->virtualStart = ddict->dictContent;
66*01826a49SYabin Cui dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
67*01826a49SYabin Cui dctx->previousDstEnd = dctx->dictEnd;
68*01826a49SYabin Cui #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
69*01826a49SYabin Cui dctx->dictContentBeginForFuzzing = dctx->prefixStart;
70*01826a49SYabin Cui dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
71*01826a49SYabin Cui #endif
72*01826a49SYabin Cui if (ddict->entropyPresent) {
73*01826a49SYabin Cui dctx->litEntropy = 1;
74*01826a49SYabin Cui dctx->fseEntropy = 1;
75*01826a49SYabin Cui dctx->LLTptr = ddict->entropy.LLTable;
76*01826a49SYabin Cui dctx->MLTptr = ddict->entropy.MLTable;
77*01826a49SYabin Cui dctx->OFTptr = ddict->entropy.OFTable;
78*01826a49SYabin Cui dctx->HUFptr = ddict->entropy.hufTable;
79*01826a49SYabin Cui dctx->entropy.rep[0] = ddict->entropy.rep[0];
80*01826a49SYabin Cui dctx->entropy.rep[1] = ddict->entropy.rep[1];
81*01826a49SYabin Cui dctx->entropy.rep[2] = ddict->entropy.rep[2];
82*01826a49SYabin Cui } else {
83*01826a49SYabin Cui dctx->litEntropy = 0;
84*01826a49SYabin Cui dctx->fseEntropy = 0;
85*01826a49SYabin Cui }
86*01826a49SYabin Cui }
87*01826a49SYabin Cui
88*01826a49SYabin Cui
89*01826a49SYabin Cui static size_t
ZSTD_loadEntropy_intoDDict(ZSTD_DDict * ddict,ZSTD_dictContentType_e dictContentType)90*01826a49SYabin Cui ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
91*01826a49SYabin Cui ZSTD_dictContentType_e dictContentType)
92*01826a49SYabin Cui {
93*01826a49SYabin Cui ddict->dictID = 0;
94*01826a49SYabin Cui ddict->entropyPresent = 0;
95*01826a49SYabin Cui if (dictContentType == ZSTD_dct_rawContent) return 0;
96*01826a49SYabin Cui
97*01826a49SYabin Cui if (ddict->dictSize < 8) {
98*01826a49SYabin Cui if (dictContentType == ZSTD_dct_fullDict)
99*01826a49SYabin Cui return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
100*01826a49SYabin Cui return 0; /* pure content mode */
101*01826a49SYabin Cui }
102*01826a49SYabin Cui { U32 const magic = MEM_readLE32(ddict->dictContent);
103*01826a49SYabin Cui if (magic != ZSTD_MAGIC_DICTIONARY) {
104*01826a49SYabin Cui if (dictContentType == ZSTD_dct_fullDict)
105*01826a49SYabin Cui return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
106*01826a49SYabin Cui return 0; /* pure content mode */
107*01826a49SYabin Cui }
108*01826a49SYabin Cui }
109*01826a49SYabin Cui ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
110*01826a49SYabin Cui
111*01826a49SYabin Cui /* load entropy tables */
112*01826a49SYabin Cui RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
113*01826a49SYabin Cui &ddict->entropy, ddict->dictContent, ddict->dictSize)),
114*01826a49SYabin Cui dictionary_corrupted, "");
115*01826a49SYabin Cui ddict->entropyPresent = 1;
116*01826a49SYabin Cui return 0;
117*01826a49SYabin Cui }
118*01826a49SYabin Cui
119*01826a49SYabin Cui
ZSTD_initDDict_internal(ZSTD_DDict * ddict,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)120*01826a49SYabin Cui static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
121*01826a49SYabin Cui const void* dict, size_t dictSize,
122*01826a49SYabin Cui ZSTD_dictLoadMethod_e dictLoadMethod,
123*01826a49SYabin Cui ZSTD_dictContentType_e dictContentType)
124*01826a49SYabin Cui {
125*01826a49SYabin Cui if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
126*01826a49SYabin Cui ddict->dictBuffer = NULL;
127*01826a49SYabin Cui ddict->dictContent = dict;
128*01826a49SYabin Cui if (!dict) dictSize = 0;
129*01826a49SYabin Cui } else {
130*01826a49SYabin Cui void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
131*01826a49SYabin Cui ddict->dictBuffer = internalBuffer;
132*01826a49SYabin Cui ddict->dictContent = internalBuffer;
133*01826a49SYabin Cui if (!internalBuffer) return ERROR(memory_allocation);
134*01826a49SYabin Cui ZSTD_memcpy(internalBuffer, dict, dictSize);
135*01826a49SYabin Cui }
136*01826a49SYabin Cui ddict->dictSize = dictSize;
137*01826a49SYabin Cui ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
138*01826a49SYabin Cui
139*01826a49SYabin Cui /* parse dictionary content */
140*01826a49SYabin Cui FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
141*01826a49SYabin Cui
142*01826a49SYabin Cui return 0;
143*01826a49SYabin Cui }
144*01826a49SYabin Cui
ZSTD_createDDict_advanced(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_customMem customMem)145*01826a49SYabin Cui ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
146*01826a49SYabin Cui ZSTD_dictLoadMethod_e dictLoadMethod,
147*01826a49SYabin Cui ZSTD_dictContentType_e dictContentType,
148*01826a49SYabin Cui ZSTD_customMem customMem)
149*01826a49SYabin Cui {
150*01826a49SYabin Cui if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
151*01826a49SYabin Cui
152*01826a49SYabin Cui { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
153*01826a49SYabin Cui if (ddict == NULL) return NULL;
154*01826a49SYabin Cui ddict->cMem = customMem;
155*01826a49SYabin Cui { size_t const initResult = ZSTD_initDDict_internal(ddict,
156*01826a49SYabin Cui dict, dictSize,
157*01826a49SYabin Cui dictLoadMethod, dictContentType);
158*01826a49SYabin Cui if (ZSTD_isError(initResult)) {
159*01826a49SYabin Cui ZSTD_freeDDict(ddict);
160*01826a49SYabin Cui return NULL;
161*01826a49SYabin Cui } }
162*01826a49SYabin Cui return ddict;
163*01826a49SYabin Cui }
164*01826a49SYabin Cui }
165*01826a49SYabin Cui
166*01826a49SYabin Cui /*! ZSTD_createDDict() :
167*01826a49SYabin Cui * Create a digested dictionary, to start decompression without startup delay.
168*01826a49SYabin Cui * `dict` content is copied inside DDict.
169*01826a49SYabin Cui * Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_createDDict(const void * dict,size_t dictSize)170*01826a49SYabin Cui ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
171*01826a49SYabin Cui {
172*01826a49SYabin Cui ZSTD_customMem const allocator = { NULL, NULL, NULL };
173*01826a49SYabin Cui return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
174*01826a49SYabin Cui }
175*01826a49SYabin Cui
176*01826a49SYabin Cui /*! ZSTD_createDDict_byReference() :
177*01826a49SYabin Cui * Create a digested dictionary, to start decompression without startup delay.
178*01826a49SYabin Cui * Dictionary content is simply referenced, it will be accessed during decompression.
179*01826a49SYabin Cui * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_createDDict_byReference(const void * dictBuffer,size_t dictSize)180*01826a49SYabin Cui ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
181*01826a49SYabin Cui {
182*01826a49SYabin Cui ZSTD_customMem const allocator = { NULL, NULL, NULL };
183*01826a49SYabin Cui return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
184*01826a49SYabin Cui }
185*01826a49SYabin Cui
186*01826a49SYabin Cui
ZSTD_initStaticDDict(void * sBuffer,size_t sBufferSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)187*01826a49SYabin Cui const ZSTD_DDict* ZSTD_initStaticDDict(
188*01826a49SYabin Cui void* sBuffer, size_t sBufferSize,
189*01826a49SYabin Cui const void* dict, size_t dictSize,
190*01826a49SYabin Cui ZSTD_dictLoadMethod_e dictLoadMethod,
191*01826a49SYabin Cui ZSTD_dictContentType_e dictContentType)
192*01826a49SYabin Cui {
193*01826a49SYabin Cui size_t const neededSpace = sizeof(ZSTD_DDict)
194*01826a49SYabin Cui + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
195*01826a49SYabin Cui ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
196*01826a49SYabin Cui assert(sBuffer != NULL);
197*01826a49SYabin Cui assert(dict != NULL);
198*01826a49SYabin Cui if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
199*01826a49SYabin Cui if (sBufferSize < neededSpace) return NULL;
200*01826a49SYabin Cui if (dictLoadMethod == ZSTD_dlm_byCopy) {
201*01826a49SYabin Cui ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */
202*01826a49SYabin Cui dict = ddict+1;
203*01826a49SYabin Cui }
204*01826a49SYabin Cui if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
205*01826a49SYabin Cui dict, dictSize,
206*01826a49SYabin Cui ZSTD_dlm_byRef, dictContentType) ))
207*01826a49SYabin Cui return NULL;
208*01826a49SYabin Cui return ddict;
209*01826a49SYabin Cui }
210*01826a49SYabin Cui
211*01826a49SYabin Cui
ZSTD_freeDDict(ZSTD_DDict * ddict)212*01826a49SYabin Cui size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
213*01826a49SYabin Cui {
214*01826a49SYabin Cui if (ddict==NULL) return 0; /* support free on NULL */
215*01826a49SYabin Cui { ZSTD_customMem const cMem = ddict->cMem;
216*01826a49SYabin Cui ZSTD_customFree(ddict->dictBuffer, cMem);
217*01826a49SYabin Cui ZSTD_customFree(ddict, cMem);
218*01826a49SYabin Cui return 0;
219*01826a49SYabin Cui }
220*01826a49SYabin Cui }
221*01826a49SYabin Cui
222*01826a49SYabin Cui /*! ZSTD_estimateDDictSize() :
223*01826a49SYabin Cui * Estimate amount of memory that will be needed to create a dictionary for decompression.
224*01826a49SYabin Cui * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
ZSTD_estimateDDictSize(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod)225*01826a49SYabin Cui size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
226*01826a49SYabin Cui {
227*01826a49SYabin Cui return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
228*01826a49SYabin Cui }
229*01826a49SYabin Cui
ZSTD_sizeof_DDict(const ZSTD_DDict * ddict)230*01826a49SYabin Cui size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
231*01826a49SYabin Cui {
232*01826a49SYabin Cui if (ddict==NULL) return 0; /* support sizeof on NULL */
233*01826a49SYabin Cui return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
234*01826a49SYabin Cui }
235*01826a49SYabin Cui
236*01826a49SYabin Cui /*! ZSTD_getDictID_fromDDict() :
237*01826a49SYabin Cui * Provides the dictID of the dictionary loaded into `ddict`.
238*01826a49SYabin Cui * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
239*01826a49SYabin Cui * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromDDict(const ZSTD_DDict * ddict)240*01826a49SYabin Cui unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
241*01826a49SYabin Cui {
242*01826a49SYabin Cui if (ddict==NULL) return 0;
243*01826a49SYabin Cui return ddict->dictID;
244*01826a49SYabin Cui }
245