xref: /aosp_15_r20/external/zstd/lib/decompress/zstd_ddict.c (revision 01826a4963a0d8a59bc3812d29bdf0fb76416722)
1*01826a49SYabin Cui /*
2*01826a49SYabin Cui  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*01826a49SYabin Cui  * All rights reserved.
4*01826a49SYabin Cui  *
5*01826a49SYabin Cui  * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui  * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui  * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui  */
10*01826a49SYabin Cui 
11*01826a49SYabin Cui /* zstd_ddict.c :
12*01826a49SYabin Cui  * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13*01826a49SYabin Cui 
14*01826a49SYabin Cui /*-*******************************************************
15*01826a49SYabin Cui *  Dependencies
16*01826a49SYabin Cui *********************************************************/
17*01826a49SYabin Cui #include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customFree */
18*01826a49SYabin Cui #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
19*01826a49SYabin Cui #include "../common/cpu.h"         /* bmi2 */
20*01826a49SYabin Cui #include "../common/mem.h"         /* low level memory routines */
21*01826a49SYabin Cui #define FSE_STATIC_LINKING_ONLY
22*01826a49SYabin Cui #include "../common/fse.h"
23*01826a49SYabin Cui #include "../common/huf.h"
24*01826a49SYabin Cui #include "zstd_decompress_internal.h"
25*01826a49SYabin Cui #include "zstd_ddict.h"
26*01826a49SYabin Cui 
27*01826a49SYabin Cui #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
28*01826a49SYabin Cui #  include "../legacy/zstd_legacy.h"
29*01826a49SYabin Cui #endif
30*01826a49SYabin Cui 
31*01826a49SYabin Cui 
32*01826a49SYabin Cui 
33*01826a49SYabin Cui /*-*******************************************************
34*01826a49SYabin Cui *  Types
35*01826a49SYabin Cui *********************************************************/
36*01826a49SYabin Cui struct ZSTD_DDict_s {
37*01826a49SYabin Cui     void* dictBuffer;
38*01826a49SYabin Cui     const void* dictContent;
39*01826a49SYabin Cui     size_t dictSize;
40*01826a49SYabin Cui     ZSTD_entropyDTables_t entropy;
41*01826a49SYabin Cui     U32 dictID;
42*01826a49SYabin Cui     U32 entropyPresent;
43*01826a49SYabin Cui     ZSTD_customMem cMem;
44*01826a49SYabin Cui };  /* typedef'd to ZSTD_DDict within "zstd.h" */
45*01826a49SYabin Cui 
ZSTD_DDict_dictContent(const ZSTD_DDict * ddict)46*01826a49SYabin Cui const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
47*01826a49SYabin Cui {
48*01826a49SYabin Cui     assert(ddict != NULL);
49*01826a49SYabin Cui     return ddict->dictContent;
50*01826a49SYabin Cui }
51*01826a49SYabin Cui 
ZSTD_DDict_dictSize(const ZSTD_DDict * ddict)52*01826a49SYabin Cui size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
53*01826a49SYabin Cui {
54*01826a49SYabin Cui     assert(ddict != NULL);
55*01826a49SYabin Cui     return ddict->dictSize;
56*01826a49SYabin Cui }
57*01826a49SYabin Cui 
ZSTD_copyDDictParameters(ZSTD_DCtx * dctx,const ZSTD_DDict * ddict)58*01826a49SYabin Cui void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
59*01826a49SYabin Cui {
60*01826a49SYabin Cui     DEBUGLOG(4, "ZSTD_copyDDictParameters");
61*01826a49SYabin Cui     assert(dctx != NULL);
62*01826a49SYabin Cui     assert(ddict != NULL);
63*01826a49SYabin Cui     dctx->dictID = ddict->dictID;
64*01826a49SYabin Cui     dctx->prefixStart = ddict->dictContent;
65*01826a49SYabin Cui     dctx->virtualStart = ddict->dictContent;
66*01826a49SYabin Cui     dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
67*01826a49SYabin Cui     dctx->previousDstEnd = dctx->dictEnd;
68*01826a49SYabin Cui #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
69*01826a49SYabin Cui     dctx->dictContentBeginForFuzzing = dctx->prefixStart;
70*01826a49SYabin Cui     dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
71*01826a49SYabin Cui #endif
72*01826a49SYabin Cui     if (ddict->entropyPresent) {
73*01826a49SYabin Cui         dctx->litEntropy = 1;
74*01826a49SYabin Cui         dctx->fseEntropy = 1;
75*01826a49SYabin Cui         dctx->LLTptr = ddict->entropy.LLTable;
76*01826a49SYabin Cui         dctx->MLTptr = ddict->entropy.MLTable;
77*01826a49SYabin Cui         dctx->OFTptr = ddict->entropy.OFTable;
78*01826a49SYabin Cui         dctx->HUFptr = ddict->entropy.hufTable;
79*01826a49SYabin Cui         dctx->entropy.rep[0] = ddict->entropy.rep[0];
80*01826a49SYabin Cui         dctx->entropy.rep[1] = ddict->entropy.rep[1];
81*01826a49SYabin Cui         dctx->entropy.rep[2] = ddict->entropy.rep[2];
82*01826a49SYabin Cui     } else {
83*01826a49SYabin Cui         dctx->litEntropy = 0;
84*01826a49SYabin Cui         dctx->fseEntropy = 0;
85*01826a49SYabin Cui     }
86*01826a49SYabin Cui }
87*01826a49SYabin Cui 
88*01826a49SYabin Cui 
89*01826a49SYabin Cui static size_t
ZSTD_loadEntropy_intoDDict(ZSTD_DDict * ddict,ZSTD_dictContentType_e dictContentType)90*01826a49SYabin Cui ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
91*01826a49SYabin Cui                            ZSTD_dictContentType_e dictContentType)
92*01826a49SYabin Cui {
93*01826a49SYabin Cui     ddict->dictID = 0;
94*01826a49SYabin Cui     ddict->entropyPresent = 0;
95*01826a49SYabin Cui     if (dictContentType == ZSTD_dct_rawContent) return 0;
96*01826a49SYabin Cui 
97*01826a49SYabin Cui     if (ddict->dictSize < 8) {
98*01826a49SYabin Cui         if (dictContentType == ZSTD_dct_fullDict)
99*01826a49SYabin Cui             return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
100*01826a49SYabin Cui         return 0;   /* pure content mode */
101*01826a49SYabin Cui     }
102*01826a49SYabin Cui     {   U32 const magic = MEM_readLE32(ddict->dictContent);
103*01826a49SYabin Cui         if (magic != ZSTD_MAGIC_DICTIONARY) {
104*01826a49SYabin Cui             if (dictContentType == ZSTD_dct_fullDict)
105*01826a49SYabin Cui                 return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
106*01826a49SYabin Cui             return 0;   /* pure content mode */
107*01826a49SYabin Cui         }
108*01826a49SYabin Cui     }
109*01826a49SYabin Cui     ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
110*01826a49SYabin Cui 
111*01826a49SYabin Cui     /* load entropy tables */
112*01826a49SYabin Cui     RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
113*01826a49SYabin Cui             &ddict->entropy, ddict->dictContent, ddict->dictSize)),
114*01826a49SYabin Cui         dictionary_corrupted, "");
115*01826a49SYabin Cui     ddict->entropyPresent = 1;
116*01826a49SYabin Cui     return 0;
117*01826a49SYabin Cui }
118*01826a49SYabin Cui 
119*01826a49SYabin Cui 
ZSTD_initDDict_internal(ZSTD_DDict * ddict,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)120*01826a49SYabin Cui static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
121*01826a49SYabin Cui                                       const void* dict, size_t dictSize,
122*01826a49SYabin Cui                                       ZSTD_dictLoadMethod_e dictLoadMethod,
123*01826a49SYabin Cui                                       ZSTD_dictContentType_e dictContentType)
124*01826a49SYabin Cui {
125*01826a49SYabin Cui     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
126*01826a49SYabin Cui         ddict->dictBuffer = NULL;
127*01826a49SYabin Cui         ddict->dictContent = dict;
128*01826a49SYabin Cui         if (!dict) dictSize = 0;
129*01826a49SYabin Cui     } else {
130*01826a49SYabin Cui         void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
131*01826a49SYabin Cui         ddict->dictBuffer = internalBuffer;
132*01826a49SYabin Cui         ddict->dictContent = internalBuffer;
133*01826a49SYabin Cui         if (!internalBuffer) return ERROR(memory_allocation);
134*01826a49SYabin Cui         ZSTD_memcpy(internalBuffer, dict, dictSize);
135*01826a49SYabin Cui     }
136*01826a49SYabin Cui     ddict->dictSize = dictSize;
137*01826a49SYabin Cui     ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001);  /* cover both little and big endian */
138*01826a49SYabin Cui 
139*01826a49SYabin Cui     /* parse dictionary content */
140*01826a49SYabin Cui     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
141*01826a49SYabin Cui 
142*01826a49SYabin Cui     return 0;
143*01826a49SYabin Cui }
144*01826a49SYabin Cui 
ZSTD_createDDict_advanced(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_customMem customMem)145*01826a49SYabin Cui ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
146*01826a49SYabin Cui                                       ZSTD_dictLoadMethod_e dictLoadMethod,
147*01826a49SYabin Cui                                       ZSTD_dictContentType_e dictContentType,
148*01826a49SYabin Cui                                       ZSTD_customMem customMem)
149*01826a49SYabin Cui {
150*01826a49SYabin Cui     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
151*01826a49SYabin Cui 
152*01826a49SYabin Cui     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
153*01826a49SYabin Cui         if (ddict == NULL) return NULL;
154*01826a49SYabin Cui         ddict->cMem = customMem;
155*01826a49SYabin Cui         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
156*01826a49SYabin Cui                                             dict, dictSize,
157*01826a49SYabin Cui                                             dictLoadMethod, dictContentType);
158*01826a49SYabin Cui             if (ZSTD_isError(initResult)) {
159*01826a49SYabin Cui                 ZSTD_freeDDict(ddict);
160*01826a49SYabin Cui                 return NULL;
161*01826a49SYabin Cui         }   }
162*01826a49SYabin Cui         return ddict;
163*01826a49SYabin Cui     }
164*01826a49SYabin Cui }
165*01826a49SYabin Cui 
166*01826a49SYabin Cui /*! ZSTD_createDDict() :
167*01826a49SYabin Cui *   Create a digested dictionary, to start decompression without startup delay.
168*01826a49SYabin Cui *   `dict` content is copied inside DDict.
169*01826a49SYabin Cui *   Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_createDDict(const void * dict,size_t dictSize)170*01826a49SYabin Cui ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
171*01826a49SYabin Cui {
172*01826a49SYabin Cui     ZSTD_customMem const allocator = { NULL, NULL, NULL };
173*01826a49SYabin Cui     return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
174*01826a49SYabin Cui }
175*01826a49SYabin Cui 
176*01826a49SYabin Cui /*! ZSTD_createDDict_byReference() :
177*01826a49SYabin Cui  *  Create a digested dictionary, to start decompression without startup delay.
178*01826a49SYabin Cui  *  Dictionary content is simply referenced, it will be accessed during decompression.
179*01826a49SYabin Cui  *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_createDDict_byReference(const void * dictBuffer,size_t dictSize)180*01826a49SYabin Cui ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
181*01826a49SYabin Cui {
182*01826a49SYabin Cui     ZSTD_customMem const allocator = { NULL, NULL, NULL };
183*01826a49SYabin Cui     return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
184*01826a49SYabin Cui }
185*01826a49SYabin Cui 
186*01826a49SYabin Cui 
ZSTD_initStaticDDict(void * sBuffer,size_t sBufferSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)187*01826a49SYabin Cui const ZSTD_DDict* ZSTD_initStaticDDict(
188*01826a49SYabin Cui                                 void* sBuffer, size_t sBufferSize,
189*01826a49SYabin Cui                                 const void* dict, size_t dictSize,
190*01826a49SYabin Cui                                 ZSTD_dictLoadMethod_e dictLoadMethod,
191*01826a49SYabin Cui                                 ZSTD_dictContentType_e dictContentType)
192*01826a49SYabin Cui {
193*01826a49SYabin Cui     size_t const neededSpace = sizeof(ZSTD_DDict)
194*01826a49SYabin Cui                              + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
195*01826a49SYabin Cui     ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
196*01826a49SYabin Cui     assert(sBuffer != NULL);
197*01826a49SYabin Cui     assert(dict != NULL);
198*01826a49SYabin Cui     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
199*01826a49SYabin Cui     if (sBufferSize < neededSpace) return NULL;
200*01826a49SYabin Cui     if (dictLoadMethod == ZSTD_dlm_byCopy) {
201*01826a49SYabin Cui         ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
202*01826a49SYabin Cui         dict = ddict+1;
203*01826a49SYabin Cui     }
204*01826a49SYabin Cui     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
205*01826a49SYabin Cui                                               dict, dictSize,
206*01826a49SYabin Cui                                               ZSTD_dlm_byRef, dictContentType) ))
207*01826a49SYabin Cui         return NULL;
208*01826a49SYabin Cui     return ddict;
209*01826a49SYabin Cui }
210*01826a49SYabin Cui 
211*01826a49SYabin Cui 
ZSTD_freeDDict(ZSTD_DDict * ddict)212*01826a49SYabin Cui size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
213*01826a49SYabin Cui {
214*01826a49SYabin Cui     if (ddict==NULL) return 0;   /* support free on NULL */
215*01826a49SYabin Cui     {   ZSTD_customMem const cMem = ddict->cMem;
216*01826a49SYabin Cui         ZSTD_customFree(ddict->dictBuffer, cMem);
217*01826a49SYabin Cui         ZSTD_customFree(ddict, cMem);
218*01826a49SYabin Cui         return 0;
219*01826a49SYabin Cui     }
220*01826a49SYabin Cui }
221*01826a49SYabin Cui 
222*01826a49SYabin Cui /*! ZSTD_estimateDDictSize() :
223*01826a49SYabin Cui  *  Estimate amount of memory that will be needed to create a dictionary for decompression.
224*01826a49SYabin Cui  *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
ZSTD_estimateDDictSize(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod)225*01826a49SYabin Cui size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
226*01826a49SYabin Cui {
227*01826a49SYabin Cui     return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
228*01826a49SYabin Cui }
229*01826a49SYabin Cui 
ZSTD_sizeof_DDict(const ZSTD_DDict * ddict)230*01826a49SYabin Cui size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
231*01826a49SYabin Cui {
232*01826a49SYabin Cui     if (ddict==NULL) return 0;   /* support sizeof on NULL */
233*01826a49SYabin Cui     return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
234*01826a49SYabin Cui }
235*01826a49SYabin Cui 
236*01826a49SYabin Cui /*! ZSTD_getDictID_fromDDict() :
237*01826a49SYabin Cui  *  Provides the dictID of the dictionary loaded into `ddict`.
238*01826a49SYabin Cui  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
239*01826a49SYabin Cui  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromDDict(const ZSTD_DDict * ddict)240*01826a49SYabin Cui unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
241*01826a49SYabin Cui {
242*01826a49SYabin Cui     if (ddict==NULL) return 0;
243*01826a49SYabin Cui     return ddict->dictID;
244*01826a49SYabin Cui }
245