1 /* Xz.h - Xz interface 2 2024-01-26 : Igor Pavlov : Public domain */ 3 4 #ifndef ZIP7_INC_XZ_H 5 #define ZIP7_INC_XZ_H 6 7 #include "Sha256.h" 8 #include "Delta.h" 9 10 EXTERN_C_BEGIN 11 12 #define XZ_ID_Subblock 1 13 #define XZ_ID_Delta 3 14 #define XZ_ID_X86 4 15 #define XZ_ID_PPC 5 16 #define XZ_ID_IA64 6 17 #define XZ_ID_ARM 7 18 #define XZ_ID_ARMT 8 19 #define XZ_ID_SPARC 9 20 #define XZ_ID_ARM64 0xa 21 #define XZ_ID_RISCV 0xb 22 #define XZ_ID_LZMA2 0x21 23 24 unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value); 25 unsigned Xz_WriteVarInt(Byte *buf, UInt64 v); 26 27 /* ---------- xz block ---------- */ 28 29 #define XZ_BLOCK_HEADER_SIZE_MAX 1024 30 31 #define XZ_NUM_FILTERS_MAX 4 32 #define XZ_BF_NUM_FILTERS_MASK 3 33 #define XZ_BF_PACK_SIZE (1 << 6) 34 #define XZ_BF_UNPACK_SIZE (1 << 7) 35 36 #define XZ_FILTER_PROPS_SIZE_MAX 20 37 38 typedef struct 39 { 40 UInt64 id; 41 UInt32 propsSize; 42 Byte props[XZ_FILTER_PROPS_SIZE_MAX]; 43 } CXzFilter; 44 45 typedef struct 46 { 47 UInt64 packSize; 48 UInt64 unpackSize; 49 Byte flags; 50 CXzFilter filters[XZ_NUM_FILTERS_MAX]; 51 } CXzBlock; 52 53 #define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1) 54 #define XzBlock_HasPackSize(p) (((p)->flags & XZ_BF_PACK_SIZE) != 0) 55 #define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0) 56 #define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0) 57 58 SRes XzBlock_Parse(CXzBlock *p, const Byte *header); 59 SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes); 60 61 /* ---------- xz stream ---------- */ 62 63 #define XZ_SIG_SIZE 6 64 #define XZ_FOOTER_SIG_SIZE 2 65 66 extern const Byte XZ_SIG[XZ_SIG_SIZE]; 67 68 /* 69 extern const Byte XZ_FOOTER_SIG[XZ_FOOTER_SIG_SIZE]; 70 */ 71 72 #define XZ_FOOTER_SIG_0 'Y' 73 #define XZ_FOOTER_SIG_1 'Z' 74 75 #define XZ_STREAM_FLAGS_SIZE 2 76 #define XZ_STREAM_CRC_SIZE 4 77 78 #define XZ_STREAM_HEADER_SIZE (XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE) 79 #define XZ_STREAM_FOOTER_SIZE (XZ_FOOTER_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE + 4) 80 81 #define XZ_CHECK_MASK 0xF 82 #define XZ_CHECK_NO 0 83 #define XZ_CHECK_CRC32 1 84 #define XZ_CHECK_CRC64 4 85 #define XZ_CHECK_SHA256 10 86 87 typedef struct 88 { 89 unsigned mode; 90 UInt32 crc; 91 UInt64 crc64; 92 CSha256 sha; 93 } CXzCheck; 94 95 void XzCheck_Init(CXzCheck *p, unsigned mode); 96 void XzCheck_Update(CXzCheck *p, const void *data, size_t size); 97 int XzCheck_Final(CXzCheck *p, Byte *digest); 98 99 typedef UInt16 CXzStreamFlags; 100 101 #define XzFlags_IsSupported(f) ((f) <= XZ_CHECK_MASK) 102 #define XzFlags_GetCheckType(f) ((f) & XZ_CHECK_MASK) 103 #define XzFlags_HasDataCrc32(f) (Xz_GetCheckType(f) == XZ_CHECK_CRC32) 104 unsigned XzFlags_GetCheckSize(CXzStreamFlags f); 105 106 SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf); 107 SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream); 108 109 typedef struct 110 { 111 UInt64 unpackSize; 112 UInt64 totalSize; 113 } CXzBlockSizes; 114 115 typedef struct 116 { 117 CXzStreamFlags flags; 118 // Byte _pad[6]; 119 size_t numBlocks; 120 CXzBlockSizes *blocks; 121 UInt64 startOffset; 122 } CXzStream; 123 124 void Xz_Construct(CXzStream *p); 125 void Xz_Free(CXzStream *p, ISzAllocPtr alloc); 126 127 #define XZ_SIZE_OVERFLOW ((UInt64)(Int64)-1) 128 129 UInt64 Xz_GetUnpackSize(const CXzStream *p); 130 UInt64 Xz_GetPackSize(const CXzStream *p); 131 132 typedef struct 133 { 134 size_t num; 135 size_t numAllocated; 136 CXzStream *streams; 137 } CXzs; 138 139 void Xzs_Construct(CXzs *p); 140 void Xzs_Free(CXzs *p, ISzAllocPtr alloc); 141 SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr inStream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc); 142 143 UInt64 Xzs_GetNumBlocks(const CXzs *p); 144 UInt64 Xzs_GetUnpackSize(const CXzs *p); 145 146 147 // ECoderStatus values are identical to ELzmaStatus values of LZMA2 decoder 148 149 typedef enum 150 { 151 CODER_STATUS_NOT_SPECIFIED, /* use main error code instead */ 152 CODER_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ 153 CODER_STATUS_NOT_FINISHED, /* stream was not finished */ 154 CODER_STATUS_NEEDS_MORE_INPUT /* you must provide more input bytes */ 155 } ECoderStatus; 156 157 158 // ECoderFinishMode values are identical to ELzmaFinishMode 159 160 typedef enum 161 { 162 CODER_FINISH_ANY, /* finish at any point */ 163 CODER_FINISH_END /* block must be finished at the end */ 164 } ECoderFinishMode; 165 166 167 typedef struct 168 { 169 void *p; // state object; 170 void (*Free)(void *p, ISzAllocPtr alloc); 171 SRes (*SetProps)(void *p, const Byte *props, size_t propSize, ISzAllocPtr alloc); 172 void (*Init)(void *p); 173 SRes (*Code2)(void *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, 174 int srcWasFinished, ECoderFinishMode finishMode, 175 // int *wasFinished, 176 ECoderStatus *status); 177 SizeT (*Filter)(void *p, Byte *data, SizeT size); 178 } IStateCoder; 179 180 181 typedef struct 182 { 183 UInt32 methodId; 184 UInt32 delta; 185 UInt32 ip; 186 UInt32 X86_State; 187 Byte delta_State[DELTA_STATE_SIZE]; 188 } CXzBcFilterStateBase; 189 190 typedef SizeT (*Xz_Func_BcFilterStateBase_Filter)(CXzBcFilterStateBase *p, Byte *data, SizeT size); 191 192 SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id, 193 Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc); 194 195 196 #define MIXCODER_NUM_FILTERS_MAX 4 197 198 typedef struct 199 { 200 ISzAllocPtr alloc; 201 Byte *buf; 202 unsigned numCoders; 203 204 Byte *outBuf; 205 size_t outBufSize; 206 size_t outWritten; // is equal to lzmaDecoder.dicPos (in outBuf mode) 207 BoolInt wasFinished; 208 SRes res; 209 ECoderStatus status; 210 // BoolInt SingleBufMode; 211 212 int finished[MIXCODER_NUM_FILTERS_MAX - 1]; 213 size_t pos[MIXCODER_NUM_FILTERS_MAX - 1]; 214 size_t size[MIXCODER_NUM_FILTERS_MAX - 1]; 215 UInt64 ids[MIXCODER_NUM_FILTERS_MAX]; 216 SRes results[MIXCODER_NUM_FILTERS_MAX]; 217 IStateCoder coders[MIXCODER_NUM_FILTERS_MAX]; 218 } CMixCoder; 219 220 221 typedef enum 222 { 223 XZ_STATE_STREAM_HEADER, 224 XZ_STATE_STREAM_INDEX, 225 XZ_STATE_STREAM_INDEX_CRC, 226 XZ_STATE_STREAM_FOOTER, 227 XZ_STATE_STREAM_PADDING, 228 XZ_STATE_BLOCK_HEADER, 229 XZ_STATE_BLOCK, 230 XZ_STATE_BLOCK_FOOTER 231 } EXzState; 232 233 234 typedef struct 235 { 236 EXzState state; 237 unsigned pos; 238 unsigned alignPos; 239 unsigned indexPreSize; 240 241 CXzStreamFlags streamFlags; 242 243 unsigned blockHeaderSize; 244 UInt64 packSize; 245 UInt64 unpackSize; 246 247 UInt64 numBlocks; // number of finished blocks in current stream 248 UInt64 indexSize; 249 UInt64 indexPos; 250 UInt64 padSize; 251 252 UInt64 numStartedStreams; 253 UInt64 numFinishedStreams; 254 UInt64 numTotalBlocks; 255 256 UInt32 crc; 257 CMixCoder decoder; 258 CXzBlock block; 259 CXzCheck check; 260 CSha256 sha; 261 262 BoolInt parseMode; 263 BoolInt headerParsedOk; 264 BoolInt decodeToStreamSignature; 265 unsigned decodeOnlyOneBlock; 266 267 Byte *outBuf; 268 size_t outBufSize; 269 size_t outDataWritten; // the size of data in (outBuf) that were fully unpacked 270 271 Byte shaDigest[SHA256_DIGEST_SIZE]; 272 Byte buf[XZ_BLOCK_HEADER_SIZE_MAX]; 273 } CXzUnpacker; 274 275 /* alloc : aligned for cache line allocation is better */ 276 void XzUnpacker_Construct(CXzUnpacker *p, ISzAllocPtr alloc); 277 void XzUnpacker_Init(CXzUnpacker *p); 278 void XzUnpacker_SetOutBuf(CXzUnpacker *p, Byte *outBuf, size_t outBufSize); 279 void XzUnpacker_Free(CXzUnpacker *p); 280 281 /* 282 XzUnpacker 283 The sequence for decoding functions: 284 { 285 XzUnpacker_Construct() 286 [Decoding_Calls] 287 XzUnpacker_Free() 288 } 289 290 [Decoding_Calls] 291 292 There are 3 types of interfaces for [Decoding_Calls] calls: 293 294 Interface-1 : Partial output buffers: 295 { 296 XzUnpacker_Init() 297 for() 298 { 299 XzUnpacker_Code(); 300 } 301 XzUnpacker_IsStreamWasFinished() 302 } 303 304 Interface-2 : Direct output buffer: 305 Use it, if you know exact size of decoded data, and you need 306 whole xz unpacked data in one output buffer. 307 xz unpacker doesn't allocate additional buffer for lzma2 dictionary in that mode. 308 { 309 XzUnpacker_Init() 310 XzUnpacker_SetOutBufMode(); // to set output buffer and size 311 for() 312 { 313 XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code() 314 } 315 XzUnpacker_IsStreamWasFinished() 316 } 317 318 Interface-3 : Direct output buffer : One call full decoding 319 It unpacks whole input buffer to output buffer in one call. 320 It uses Interface-2 internally. 321 { 322 XzUnpacker_CodeFull() 323 XzUnpacker_IsStreamWasFinished() 324 } 325 */ 326 327 /* 328 finishMode: 329 It has meaning only if the decoding reaches output limit (*destLen). 330 CODER_FINISH_ANY - use smallest number of input bytes 331 CODER_FINISH_END - read EndOfStream marker after decoding 332 333 Returns: 334 SZ_OK 335 status: 336 CODER_STATUS_NOT_FINISHED, 337 CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases: 338 1) it needs more input data to finish current xz stream 339 2) xz stream was finished successfully. But the decoder supports multiple 340 concatented xz streams. So it expects more input data for new xz streams. 341 Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully. 342 343 SZ_ERROR_MEM - Memory allocation error 344 SZ_ERROR_DATA - Data error 345 SZ_ERROR_UNSUPPORTED - Unsupported method or method properties 346 SZ_ERROR_CRC - CRC error 347 // SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). 348 349 SZ_ERROR_NO_ARCHIVE - the error with xz Stream Header with one of the following reasons: 350 - xz Stream Signature failure 351 - CRC32 of xz Stream Header is failed 352 - The size of Stream padding is not multiple of four bytes. 353 It's possible to get that error, if xz stream was finished and the stream 354 contains some another data. In that case you can call XzUnpacker_GetExtraSize() 355 function to get real size of xz stream. 356 */ 357 358 359 SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, 360 const Byte *src, SizeT *srcLen, int srcFinished, 361 ECoderFinishMode finishMode, ECoderStatus *status); 362 363 SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen, 364 const Byte *src, SizeT *srcLen, 365 ECoderFinishMode finishMode, ECoderStatus *status); 366 367 /* 368 If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished() 369 after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code(). 370 */ 371 372 BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p); 373 374 /* 375 XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes, 376 if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state. 377 These bytes can be some data after xz archive, or 378 it can be start of new xz stream. 379 380 Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of 381 xz stream in two cases, if XzUnpacker_Code() returns: 382 res == SZ_OK && status == CODER_STATUS_NEEDS_MORE_INPUT 383 res == SZ_ERROR_NO_ARCHIVE 384 */ 385 386 UInt64 XzUnpacker_GetExtraSize(const CXzUnpacker *p); 387 388 389 /* 390 for random block decoding: 391 XzUnpacker_Init(); 392 set CXzUnpacker::streamFlags 393 XzUnpacker_PrepareToRandomBlockDecoding() 394 loop 395 { 396 XzUnpacker_Code() 397 XzUnpacker_IsBlockFinished() 398 } 399 */ 400 401 void XzUnpacker_PrepareToRandomBlockDecoding(CXzUnpacker *p); 402 BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p); 403 404 #define XzUnpacker_GetPackSizeForIndex(p) ((p)->packSize + (p)->blockHeaderSize + XzFlags_GetCheckSize((p)->streamFlags)) 405 406 407 408 409 410 411 /* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */ 412 413 /* 414 if (CXzDecMtProps::numThreads > 1), the decoder can try to use 415 Multi-Threading. The decoder analyses xz block header, and if 416 there are pack size and unpack size values stored in xz block header, 417 the decoder reads compressed data of block to internal buffers, 418 and then it can start parallel decoding, if there are another blocks. 419 The decoder can switch back to Single-Thread decoding after some conditions. 420 421 The sequence of calls for xz decoding with in/out Streams: 422 { 423 XzDecMt_Create() 424 XzDecMtProps_Init(XzDecMtProps) to set default values of properties 425 // then you can change some XzDecMtProps parameters with required values 426 // here you can set the number of threads and (memUseMax) - the maximum 427 Memory usage for multithreading decoding. 428 for() 429 { 430 XzDecMt_Decode() // one call per one file 431 } 432 XzDecMt_Destroy() 433 } 434 */ 435 436 437 typedef struct 438 { 439 size_t inBufSize_ST; // size of input buffer for Single-Thread decoding 440 size_t outStep_ST; // size of output buffer for Single-Thread decoding 441 BoolInt ignoreErrors; // if set to 1, the decoder can ignore some errors and it skips broken parts of data. 442 443 #ifndef Z7_ST 444 unsigned numThreads; // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding 445 size_t inBufSize_MT; // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created 446 size_t memUseMax; // the limit of total memory usage for Multi-Thread decoding. 447 // it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer. 448 #endif 449 } CXzDecMtProps; 450 451 void XzDecMtProps_Init(CXzDecMtProps *p); 452 453 typedef struct CXzDecMt CXzDecMt; 454 typedef CXzDecMt * CXzDecMtHandle; 455 // Z7_DECLARE_HANDLE(CXzDecMtHandle) 456 457 /* 458 alloc : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc). 459 allocMid : for big allocations, aligned allocation is better 460 */ 461 462 CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid); 463 void XzDecMt_Destroy(CXzDecMtHandle p); 464 465 466 typedef struct 467 { 468 Byte UnpackSize_Defined; 469 Byte NumStreams_Defined; 470 Byte NumBlocks_Defined; 471 472 Byte DataAfterEnd; // there are some additional data after good xz streams, and that data is not new xz stream. 473 Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data 474 475 UInt64 InSize; // pack size processed. That value doesn't include the data after 476 // end of xz stream, if that data was not correct 477 UInt64 OutSize; 478 479 UInt64 NumStreams; 480 UInt64 NumBlocks; 481 482 SRes DecodeRes; // the error code of xz streams data decoding 483 SRes ReadRes; // error code from ISeqInStream:Read() 484 SRes ProgressRes; // error code from ICompressProgress:Progress() 485 486 SRes CombinedRes; // Combined result error code that shows main rusult 487 // = S_OK, if there is no error. 488 // but check also (DataAfterEnd) that can show additional minor errors. 489 490 SRes CombinedRes_Type; // = SZ_ERROR_READ, if error from ISeqInStream 491 // = SZ_ERROR_PROGRESS, if error from ICompressProgress 492 // = SZ_ERROR_WRITE, if error from ISeqOutStream 493 // = SZ_ERROR_* codes for decoding 494 } CXzStatInfo; 495 496 void XzStatInfo_Clear(CXzStatInfo *p); 497 498 /* 499 500 XzDecMt_Decode() 501 SRes: it's combined decoding result. It also is equal to stat->CombinedRes. 502 503 SZ_OK - no error 504 check also output value in (stat->DataAfterEnd) 505 that can show additional possible error 506 507 SZ_ERROR_MEM - Memory allocation error 508 SZ_ERROR_NO_ARCHIVE - is not xz archive 509 SZ_ERROR_ARCHIVE - Headers error 510 SZ_ERROR_DATA - Data Error 511 SZ_ERROR_UNSUPPORTED - Unsupported method or method properties 512 SZ_ERROR_CRC - CRC Error 513 SZ_ERROR_INPUT_EOF - it needs more input data 514 SZ_ERROR_WRITE - ISeqOutStream error 515 (SZ_ERROR_READ) - ISeqInStream errors 516 (SZ_ERROR_PROGRESS) - ICompressProgress errors 517 // SZ_ERROR_THREAD - error in multi-threading functions 518 MY_SRes_HRESULT_FROM_WRes(WRes_error) - error in multi-threading function 519 */ 520 521 SRes XzDecMt_Decode(CXzDecMtHandle p, 522 const CXzDecMtProps *props, 523 const UInt64 *outDataSize, // NULL means undefined 524 int finishMode, // 0 - partial unpacking is allowed, 1 - xz stream(s) must be finished 525 ISeqOutStreamPtr outStream, 526 // Byte *outBuf, size_t *outBufSize, 527 ISeqInStreamPtr inStream, 528 // const Byte *inData, size_t inDataSize, 529 CXzStatInfo *stat, // out: decoding results and statistics 530 int *isMT, // out: 0 means that ST (Single-Thread) version was used 531 // 1 means that MT (Multi-Thread) version was used 532 ICompressProgressPtr progress); 533 534 EXTERN_C_END 535 536 #endif 537