xref: /aosp_15_r20/external/lzma/C/Xz.h (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1 /* Xz.h - Xz interface
2 2024-01-26 : Igor Pavlov : Public domain */
3 
4 #ifndef ZIP7_INC_XZ_H
5 #define ZIP7_INC_XZ_H
6 
7 #include "Sha256.h"
8 #include "Delta.h"
9 
10 EXTERN_C_BEGIN
11 
12 #define XZ_ID_Subblock 1
13 #define XZ_ID_Delta 3
14 #define XZ_ID_X86   4
15 #define XZ_ID_PPC   5
16 #define XZ_ID_IA64  6
17 #define XZ_ID_ARM   7
18 #define XZ_ID_ARMT  8
19 #define XZ_ID_SPARC 9
20 #define XZ_ID_ARM64 0xa
21 #define XZ_ID_RISCV 0xb
22 #define XZ_ID_LZMA2 0x21
23 
24 unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value);
25 unsigned Xz_WriteVarInt(Byte *buf, UInt64 v);
26 
27 /* ---------- xz block ---------- */
28 
29 #define XZ_BLOCK_HEADER_SIZE_MAX 1024
30 
31 #define XZ_NUM_FILTERS_MAX 4
32 #define XZ_BF_NUM_FILTERS_MASK 3
33 #define XZ_BF_PACK_SIZE (1 << 6)
34 #define XZ_BF_UNPACK_SIZE (1 << 7)
35 
36 #define XZ_FILTER_PROPS_SIZE_MAX 20
37 
38 typedef struct
39 {
40   UInt64 id;
41   UInt32 propsSize;
42   Byte props[XZ_FILTER_PROPS_SIZE_MAX];
43 } CXzFilter;
44 
45 typedef struct
46 {
47   UInt64 packSize;
48   UInt64 unpackSize;
49   Byte flags;
50   CXzFilter filters[XZ_NUM_FILTERS_MAX];
51 } CXzBlock;
52 
53 #define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1)
54 #define XzBlock_HasPackSize(p)   (((p)->flags & XZ_BF_PACK_SIZE) != 0)
55 #define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0)
56 #define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0)
57 
58 SRes XzBlock_Parse(CXzBlock *p, const Byte *header);
59 SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes);
60 
61 /* ---------- xz stream ---------- */
62 
63 #define XZ_SIG_SIZE 6
64 #define XZ_FOOTER_SIG_SIZE 2
65 
66 extern const Byte XZ_SIG[XZ_SIG_SIZE];
67 
68 /*
69 extern const Byte XZ_FOOTER_SIG[XZ_FOOTER_SIG_SIZE];
70 */
71 
72 #define XZ_FOOTER_SIG_0 'Y'
73 #define XZ_FOOTER_SIG_1 'Z'
74 
75 #define XZ_STREAM_FLAGS_SIZE 2
76 #define XZ_STREAM_CRC_SIZE 4
77 
78 #define XZ_STREAM_HEADER_SIZE (XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE)
79 #define XZ_STREAM_FOOTER_SIZE (XZ_FOOTER_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE + 4)
80 
81 #define XZ_CHECK_MASK 0xF
82 #define XZ_CHECK_NO 0
83 #define XZ_CHECK_CRC32 1
84 #define XZ_CHECK_CRC64 4
85 #define XZ_CHECK_SHA256 10
86 
87 typedef struct
88 {
89   unsigned mode;
90   UInt32 crc;
91   UInt64 crc64;
92   CSha256 sha;
93 } CXzCheck;
94 
95 void XzCheck_Init(CXzCheck *p, unsigned mode);
96 void XzCheck_Update(CXzCheck *p, const void *data, size_t size);
97 int XzCheck_Final(CXzCheck *p, Byte *digest);
98 
99 typedef UInt16 CXzStreamFlags;
100 
101 #define XzFlags_IsSupported(f) ((f) <= XZ_CHECK_MASK)
102 #define XzFlags_GetCheckType(f) ((f) & XZ_CHECK_MASK)
103 #define XzFlags_HasDataCrc32(f) (Xz_GetCheckType(f) == XZ_CHECK_CRC32)
104 unsigned XzFlags_GetCheckSize(CXzStreamFlags f);
105 
106 SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf);
107 SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream);
108 
109 typedef struct
110 {
111   UInt64 unpackSize;
112   UInt64 totalSize;
113 } CXzBlockSizes;
114 
115 typedef struct
116 {
117   CXzStreamFlags flags;
118   // Byte _pad[6];
119   size_t numBlocks;
120   CXzBlockSizes *blocks;
121   UInt64 startOffset;
122 } CXzStream;
123 
124 void Xz_Construct(CXzStream *p);
125 void Xz_Free(CXzStream *p, ISzAllocPtr alloc);
126 
127 #define XZ_SIZE_OVERFLOW ((UInt64)(Int64)-1)
128 
129 UInt64 Xz_GetUnpackSize(const CXzStream *p);
130 UInt64 Xz_GetPackSize(const CXzStream *p);
131 
132 typedef struct
133 {
134   size_t num;
135   size_t numAllocated;
136   CXzStream *streams;
137 } CXzs;
138 
139 void Xzs_Construct(CXzs *p);
140 void Xzs_Free(CXzs *p, ISzAllocPtr alloc);
141 SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr inStream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc);
142 
143 UInt64 Xzs_GetNumBlocks(const CXzs *p);
144 UInt64 Xzs_GetUnpackSize(const CXzs *p);
145 
146 
147 // ECoderStatus values are identical to ELzmaStatus values of LZMA2 decoder
148 
149 typedef enum
150 {
151   CODER_STATUS_NOT_SPECIFIED,               /* use main error code instead */
152   CODER_STATUS_FINISHED_WITH_MARK,          /* stream was finished with end mark. */
153   CODER_STATUS_NOT_FINISHED,                /* stream was not finished */
154   CODER_STATUS_NEEDS_MORE_INPUT             /* you must provide more input bytes */
155 } ECoderStatus;
156 
157 
158 // ECoderFinishMode values are identical to ELzmaFinishMode
159 
160 typedef enum
161 {
162   CODER_FINISH_ANY,   /* finish at any point */
163   CODER_FINISH_END    /* block must be finished at the end */
164 } ECoderFinishMode;
165 
166 
167 typedef struct
168 {
169   void *p; // state object;
170   void (*Free)(void *p, ISzAllocPtr alloc);
171   SRes (*SetProps)(void *p, const Byte *props, size_t propSize, ISzAllocPtr alloc);
172   void (*Init)(void *p);
173   SRes (*Code2)(void *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
174       int srcWasFinished, ECoderFinishMode finishMode,
175       // int *wasFinished,
176       ECoderStatus *status);
177   SizeT (*Filter)(void *p, Byte *data, SizeT size);
178 } IStateCoder;
179 
180 
181 typedef struct
182 {
183   UInt32 methodId;
184   UInt32 delta;
185   UInt32 ip;
186   UInt32 X86_State;
187   Byte delta_State[DELTA_STATE_SIZE];
188 } CXzBcFilterStateBase;
189 
190 typedef SizeT (*Xz_Func_BcFilterStateBase_Filter)(CXzBcFilterStateBase *p, Byte *data, SizeT size);
191 
192 SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id,
193     Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc);
194 
195 
196 #define MIXCODER_NUM_FILTERS_MAX 4
197 
198 typedef struct
199 {
200   ISzAllocPtr alloc;
201   Byte *buf;
202   unsigned numCoders;
203 
204   Byte *outBuf;
205   size_t outBufSize;
206   size_t outWritten; // is equal to lzmaDecoder.dicPos (in outBuf mode)
207   BoolInt wasFinished;
208   SRes res;
209   ECoderStatus status;
210   // BoolInt SingleBufMode;
211 
212   int finished[MIXCODER_NUM_FILTERS_MAX - 1];
213   size_t pos[MIXCODER_NUM_FILTERS_MAX - 1];
214   size_t size[MIXCODER_NUM_FILTERS_MAX - 1];
215   UInt64 ids[MIXCODER_NUM_FILTERS_MAX];
216   SRes results[MIXCODER_NUM_FILTERS_MAX];
217   IStateCoder coders[MIXCODER_NUM_FILTERS_MAX];
218 } CMixCoder;
219 
220 
221 typedef enum
222 {
223   XZ_STATE_STREAM_HEADER,
224   XZ_STATE_STREAM_INDEX,
225   XZ_STATE_STREAM_INDEX_CRC,
226   XZ_STATE_STREAM_FOOTER,
227   XZ_STATE_STREAM_PADDING,
228   XZ_STATE_BLOCK_HEADER,
229   XZ_STATE_BLOCK,
230   XZ_STATE_BLOCK_FOOTER
231 } EXzState;
232 
233 
234 typedef struct
235 {
236   EXzState state;
237   unsigned pos;
238   unsigned alignPos;
239   unsigned indexPreSize;
240 
241   CXzStreamFlags streamFlags;
242 
243   unsigned blockHeaderSize;
244   UInt64 packSize;
245   UInt64 unpackSize;
246 
247   UInt64 numBlocks; // number of finished blocks in current stream
248   UInt64 indexSize;
249   UInt64 indexPos;
250   UInt64 padSize;
251 
252   UInt64 numStartedStreams;
253   UInt64 numFinishedStreams;
254   UInt64 numTotalBlocks;
255 
256   UInt32 crc;
257   CMixCoder decoder;
258   CXzBlock block;
259   CXzCheck check;
260   CSha256 sha;
261 
262   BoolInt parseMode;
263   BoolInt headerParsedOk;
264   BoolInt decodeToStreamSignature;
265   unsigned decodeOnlyOneBlock;
266 
267   Byte *outBuf;
268   size_t outBufSize;
269   size_t outDataWritten; // the size of data in (outBuf) that were fully unpacked
270 
271   Byte shaDigest[SHA256_DIGEST_SIZE];
272   Byte buf[XZ_BLOCK_HEADER_SIZE_MAX];
273 } CXzUnpacker;
274 
275 /* alloc : aligned for cache line allocation is better */
276 void XzUnpacker_Construct(CXzUnpacker *p, ISzAllocPtr alloc);
277 void XzUnpacker_Init(CXzUnpacker *p);
278 void XzUnpacker_SetOutBuf(CXzUnpacker *p, Byte *outBuf, size_t outBufSize);
279 void XzUnpacker_Free(CXzUnpacker *p);
280 
281 /*
282   XzUnpacker
283   The sequence for decoding functions:
284   {
285     XzUnpacker_Construct()
286     [Decoding_Calls]
287     XzUnpacker_Free()
288   }
289 
290   [Decoding_Calls]
291 
292   There are 3 types of interfaces for [Decoding_Calls] calls:
293 
294   Interface-1 : Partial output buffers:
295     {
296       XzUnpacker_Init()
297       for()
298       {
299         XzUnpacker_Code();
300       }
301       XzUnpacker_IsStreamWasFinished()
302     }
303 
304   Interface-2 : Direct output buffer:
305     Use it, if you know exact size of decoded data, and you need
306     whole xz unpacked data in one output buffer.
307     xz unpacker doesn't allocate additional buffer for lzma2 dictionary in that mode.
308     {
309       XzUnpacker_Init()
310       XzUnpacker_SetOutBufMode(); // to set output buffer and size
311       for()
312       {
313         XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code()
314       }
315       XzUnpacker_IsStreamWasFinished()
316     }
317 
318   Interface-3 : Direct output buffer : One call full decoding
319     It unpacks whole input buffer to output buffer in one call.
320     It uses Interface-2 internally.
321     {
322       XzUnpacker_CodeFull()
323       XzUnpacker_IsStreamWasFinished()
324     }
325 */
326 
327 /*
328 finishMode:
329   It has meaning only if the decoding reaches output limit (*destLen).
330   CODER_FINISH_ANY - use smallest number of input bytes
331   CODER_FINISH_END - read EndOfStream marker after decoding
332 
333 Returns:
334   SZ_OK
335     status:
336       CODER_STATUS_NOT_FINISHED,
337       CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases:
338          1) it needs more input data to finish current xz stream
339          2) xz stream was finished successfully. But the decoder supports multiple
340             concatented xz streams. So it expects more input data for new xz streams.
341          Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully.
342 
343   SZ_ERROR_MEM  - Memory allocation error
344   SZ_ERROR_DATA - Data error
345   SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
346   SZ_ERROR_CRC  - CRC error
347   // SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
348 
349   SZ_ERROR_NO_ARCHIVE - the error with xz Stream Header with one of the following reasons:
350      - xz Stream Signature failure
351      - CRC32 of xz Stream Header is failed
352      - The size of Stream padding is not multiple of four bytes.
353     It's possible to get that error, if xz stream was finished and the stream
354     contains some another data. In that case you can call XzUnpacker_GetExtraSize()
355     function to get real size of xz stream.
356 */
357 
358 
359 SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
360     const Byte *src, SizeT *srcLen, int srcFinished,
361     ECoderFinishMode finishMode, ECoderStatus *status);
362 
363 SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen,
364     const Byte *src, SizeT *srcLen,
365     ECoderFinishMode finishMode, ECoderStatus *status);
366 
367 /*
368 If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished()
369 after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code().
370 */
371 
372 BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p);
373 
374 /*
375 XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes,
376  if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state.
377 These bytes can be some data after xz archive, or
378 it can be start of new xz stream.
379 
380 Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of
381 xz stream in two cases, if XzUnpacker_Code() returns:
382   res == SZ_OK && status == CODER_STATUS_NEEDS_MORE_INPUT
383   res == SZ_ERROR_NO_ARCHIVE
384 */
385 
386 UInt64 XzUnpacker_GetExtraSize(const CXzUnpacker *p);
387 
388 
389 /*
390   for random block decoding:
391     XzUnpacker_Init();
392     set CXzUnpacker::streamFlags
393     XzUnpacker_PrepareToRandomBlockDecoding()
394     loop
395     {
396       XzUnpacker_Code()
397       XzUnpacker_IsBlockFinished()
398     }
399 */
400 
401 void XzUnpacker_PrepareToRandomBlockDecoding(CXzUnpacker *p);
402 BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p);
403 
404 #define XzUnpacker_GetPackSizeForIndex(p) ((p)->packSize + (p)->blockHeaderSize + XzFlags_GetCheckSize((p)->streamFlags))
405 
406 
407 
408 
409 
410 
411 /* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */
412 
413 /*
414   if (CXzDecMtProps::numThreads > 1), the decoder can try to use
415   Multi-Threading. The decoder analyses xz block header, and if
416   there are pack size and unpack size values stored in xz block header,
417   the decoder reads compressed data of block to internal buffers,
418   and then it can start parallel decoding, if there are another blocks.
419   The decoder can switch back to Single-Thread decoding after some conditions.
420 
421   The sequence of calls for xz decoding with in/out Streams:
422   {
423     XzDecMt_Create()
424     XzDecMtProps_Init(XzDecMtProps) to set default values of properties
425     // then you can change some XzDecMtProps parameters with required values
426     // here you can set the number of threads and (memUseMax) - the maximum
427     Memory usage for multithreading decoding.
428     for()
429     {
430       XzDecMt_Decode() // one call per one file
431     }
432     XzDecMt_Destroy()
433   }
434 */
435 
436 
437 typedef struct
438 {
439   size_t inBufSize_ST;    // size of input buffer for Single-Thread decoding
440   size_t outStep_ST;      // size of output buffer for Single-Thread decoding
441   BoolInt ignoreErrors;   // if set to 1, the decoder can ignore some errors and it skips broken parts of data.
442 
443   #ifndef Z7_ST
444   unsigned numThreads;    // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding
445   size_t inBufSize_MT;    // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created
446   size_t memUseMax;       // the limit of total memory usage for Multi-Thread decoding.
447                           // it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer.
448   #endif
449 } CXzDecMtProps;
450 
451 void XzDecMtProps_Init(CXzDecMtProps *p);
452 
453 typedef struct CXzDecMt CXzDecMt;
454 typedef CXzDecMt * CXzDecMtHandle;
455 // Z7_DECLARE_HANDLE(CXzDecMtHandle)
456 
457 /*
458   alloc    : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc).
459   allocMid : for big allocations, aligned allocation is better
460 */
461 
462 CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid);
463 void XzDecMt_Destroy(CXzDecMtHandle p);
464 
465 
466 typedef struct
467 {
468   Byte UnpackSize_Defined;
469   Byte NumStreams_Defined;
470   Byte NumBlocks_Defined;
471 
472   Byte DataAfterEnd;      // there are some additional data after good xz streams, and that data is not new xz stream.
473   Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data
474 
475   UInt64 InSize;          // pack size processed. That value doesn't include the data after
476                           // end of xz stream, if that data was not correct
477   UInt64 OutSize;
478 
479   UInt64 NumStreams;
480   UInt64 NumBlocks;
481 
482   SRes DecodeRes;         // the error code of xz streams data decoding
483   SRes ReadRes;           // error code from ISeqInStream:Read()
484   SRes ProgressRes;       // error code from ICompressProgress:Progress()
485 
486   SRes CombinedRes;       // Combined result error code that shows main rusult
487                           // = S_OK, if there is no error.
488                           // but check also (DataAfterEnd) that can show additional minor errors.
489 
490   SRes CombinedRes_Type;  // = SZ_ERROR_READ,     if error from ISeqInStream
491                           // = SZ_ERROR_PROGRESS, if error from ICompressProgress
492                           // = SZ_ERROR_WRITE,    if error from ISeqOutStream
493                           // = SZ_ERROR_* codes for decoding
494 } CXzStatInfo;
495 
496 void XzStatInfo_Clear(CXzStatInfo *p);
497 
498 /*
499 
500 XzDecMt_Decode()
501 SRes: it's combined decoding result. It also is equal to stat->CombinedRes.
502 
503   SZ_OK               - no error
504                         check also output value in (stat->DataAfterEnd)
505                         that can show additional possible error
506 
507   SZ_ERROR_MEM        - Memory allocation error
508   SZ_ERROR_NO_ARCHIVE - is not xz archive
509   SZ_ERROR_ARCHIVE    - Headers error
510   SZ_ERROR_DATA       - Data Error
511   SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
512   SZ_ERROR_CRC        - CRC Error
513   SZ_ERROR_INPUT_EOF  - it needs more input data
514   SZ_ERROR_WRITE      - ISeqOutStream error
515   (SZ_ERROR_READ)     - ISeqInStream errors
516   (SZ_ERROR_PROGRESS) - ICompressProgress errors
517   // SZ_ERROR_THREAD     - error in multi-threading functions
518   MY_SRes_HRESULT_FROM_WRes(WRes_error) - error in multi-threading function
519 */
520 
521 SRes XzDecMt_Decode(CXzDecMtHandle p,
522     const CXzDecMtProps *props,
523     const UInt64 *outDataSize, // NULL means undefined
524     int finishMode,            // 0 - partial unpacking is allowed, 1 - xz stream(s) must be finished
525     ISeqOutStreamPtr outStream,
526     // Byte *outBuf, size_t *outBufSize,
527     ISeqInStreamPtr inStream,
528     // const Byte *inData, size_t inDataSize,
529     CXzStatInfo *stat,         // out: decoding results and statistics
530     int *isMT,                 // out: 0 means that ST (Single-Thread) version was used
531                                //      1 means that MT (Multi-Thread) version was used
532     ICompressProgressPtr progress);
533 
534 EXTERN_C_END
535 
536 #endif
537