xref: /aosp_15_r20/external/lzma/CPP/7zip/Compress/XpressDecoder.cpp (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1 // XpressDecoder.cpp
2 
3 #include "StdAfx.h"
4 
5 #include "../../../C/CpuArch.h"
6 #include "../../../C/RotateDefs.h"
7 
8 #include "HuffmanDecoder.h"
9 #include "XpressDecoder.h"
10 
11 #ifdef MY_CPU_LE_UNALIGN
12   #define Z7_XPRESS_DEC_USE_UNALIGNED_COPY
13 #endif
14 
15 #ifdef Z7_XPRESS_DEC_USE_UNALIGNED_COPY
16 
17   #define COPY_CHUNK_SIZE 16
18 
19     #define COPY_CHUNK_4_2(dest, src) \
20     { \
21       ((UInt32 *)(void *)dest)[0] = ((const UInt32 *)(const void *)src)[0]; \
22       ((UInt32 *)(void *)dest)[1] = ((const UInt32 *)(const void *)src)[1]; \
23       src += 4 * 2; \
24       dest += 4 * 2; \
25     }
26 
27   /* sse2 doesn't help here in GCC and CLANG.
28      so we disabled sse2 here */
29 #if 0
30   #if defined(MY_CPU_AMD64)
31     #define Z7_XPRESS_DEC_USE_SSE2
32   #elif defined(MY_CPU_X86)
33     #if defined(_MSC_VER) && _MSC_VER >= 1300 && defined(_M_IX86_FP) && (_M_IX86_FP >= 2) \
34       || defined(__SSE2__) \
35       // || 1 == 1  // for debug only
36       #define Z7_XPRESS_DEC_USE_SSE2
37     #endif
38   #endif
39 #endif
40 
41   #if defined(MY_CPU_ARM64)
42   #include <arm_neon.h>
43     #define COPY_OFFSET_MIN  16
44     #define COPY_CHUNK1(dest, src) \
45     { \
46       vst1q_u8((uint8_t *)(void *)dest, \
47       vld1q_u8((const uint8_t *)(const void *)src)); \
48       src += 16; \
49       dest += 16; \
50     }
51 
52     #define COPY_CHUNK(dest, src) \
53     { \
54       COPY_CHUNK1(dest, src) \
55       if (dest >= dest_lim) break; \
56       COPY_CHUNK1(dest, src) \
57     }
58 
59   #elif defined(Z7_XPRESS_DEC_USE_SSE2)
60     #include <emmintrin.h> // sse2
61     #define COPY_OFFSET_MIN  16
62 
63     #define COPY_CHUNK1(dest, src) \
64     { \
65       _mm_storeu_si128((__m128i *)(void *)dest, \
66       _mm_loadu_si128((const __m128i *)(const void *)src)); \
67       src += 16; \
68       dest += 16; \
69     }
70 
71     #define COPY_CHUNK(dest, src) \
72     { \
73       COPY_CHUNK1(dest, src) \
74       if (dest >= dest_lim) break; \
75       COPY_CHUNK1(dest, src) \
76     }
77 
78   #elif defined(MY_CPU_64BIT)
79     #define COPY_OFFSET_MIN  8
80 
81     #define COPY_CHUNK(dest, src) \
82     { \
83       ((UInt64 *)(void *)dest)[0] = ((const UInt64 *)(const void *)src)[0]; \
84       ((UInt64 *)(void *)dest)[1] = ((const UInt64 *)(const void *)src)[1]; \
85       src += 8 * 2; \
86       dest += 8 * 2; \
87     }
88 
89   #else
90     #define COPY_OFFSET_MIN  4
91 
92     #define COPY_CHUNK(dest, src) \
93     { \
94       COPY_CHUNK_4_2(dest, src); \
95       COPY_CHUNK_4_2(dest, src); \
96     }
97 
98   #endif
99 #endif
100 
101 
102 #ifndef COPY_CHUNK_SIZE
103     #define COPY_OFFSET_MIN  4
104     #define COPY_CHUNK_SIZE  8
105     #define COPY_CHUNK_2(dest, src) \
106     { \
107       const Byte a0 = src[0]; \
108       const Byte a1 = src[1]; \
109       dest[0] = a0; \
110       dest[1] = a1; \
111       src += 2; \
112       dest += 2; \
113     }
114     #define COPY_CHUNK(dest, src) \
115     { \
116       COPY_CHUNK_2(dest, src) \
117       COPY_CHUNK_2(dest, src) \
118       COPY_CHUNK_2(dest, src) \
119       COPY_CHUNK_2(dest, src) \
120     }
121 #endif
122 
123 
124 #define COPY_CHUNKS \
125 { \
126   Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
127   do { COPY_CHUNK(dest, src) } \
128   while (dest < dest_lim); \
129 }
130 
131 
132 static
133 Z7_FORCE_INLINE
134 // Z7_ATTRIB_NO_VECTOR
CopyMatch_1(Byte * dest,const Byte * dest_lim)135 void CopyMatch_1(Byte *dest, const Byte *dest_lim)
136 {
137       const unsigned b0 = dest[-1];
138       {
139 #if defined(Z7_XPRESS_DEC_USE_UNALIGNED_COPY) && (COPY_CHUNK_SIZE == 16)
140         #if defined(MY_CPU_64BIT)
141         {
142           const UInt64 v64 = (UInt64)b0 * 0x0101010101010101;
143           Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
144           do
145           {
146             ((UInt64 *)(void *)dest)[0] = v64;
147             ((UInt64 *)(void *)dest)[1] = v64;
148             dest += 16;
149           }
150           while (dest < dest_lim);
151         }
152         #else
153         {
154           UInt32 v = b0;
155           v |= v << 8;
156           v |= v << 16;
157           do
158           {
159             ((UInt32 *)(void *)dest)[0] = v;
160             ((UInt32 *)(void *)dest)[1] = v;
161             dest += 8;
162             ((UInt32 *)(void *)dest)[0] = v;
163             ((UInt32 *)(void *)dest)[1] = v;
164             dest += 8;
165           }
166           while (dest < dest_lim);
167         }
168         #endif
169 #else
170         do
171         {
172           dest[0] = (Byte)b0;
173           dest[1] = (Byte)b0;
174           dest += 2;
175           dest[0] = (Byte)b0;
176           dest[1] = (Byte)b0;
177           dest += 2;
178         }
179         while (dest < dest_lim);
180 #endif
181       }
182 }
183 
184 
185 // (offset != 1)
186 static
187 Z7_FORCE_INLINE
188 // Z7_ATTRIB_NO_VECTOR
CopyMatch_Non1(Byte * dest,size_t offset,const Byte * dest_lim)189 void CopyMatch_Non1(Byte *dest, size_t offset, const Byte *dest_lim)
190 {
191   const Byte *src = dest - offset;
192   {
193     // (COPY_OFFSET_MIN >= 4)
194     if (offset >= COPY_OFFSET_MIN)
195     {
196       COPY_CHUNKS
197       // return;
198     }
199     else
200 #if (COPY_OFFSET_MIN > 4)
201     #if COPY_CHUNK_SIZE < 8
202       #error Stop_Compiling_Bad_COPY_CHUNK_SIZE
203     #endif
204     if (offset >= 4)
205     {
206       Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
207       do
208       {
209         COPY_CHUNK_4_2(dest, src)
210         #if COPY_CHUNK_SIZE != 16
211           if (dest >= dest_lim) break;
212         #endif
213         COPY_CHUNK_4_2(dest, src)
214       }
215       while (dest < dest_lim);
216       // return;
217     }
218     else
219 #endif
220     {
221       // (offset < 4)
222       if (offset == 2)
223       {
224 #if defined(Z7_XPRESS_DEC_USE_UNALIGNED_COPY)
225         UInt32 w0 = GetUi16(src);
226         w0 += w0 << 16;
227         do
228         {
229           SetUi32(dest, w0)
230           dest += 4;
231         }
232         while (dest < dest_lim);
233 #else
234         const unsigned b0 = src[0];
235         const Byte b1 = src[1];
236         do
237         {
238           dest[0] = (Byte)b0;
239           dest[1] = b1;
240           dest += 2;
241         }
242         while (dest < dest_lim);
243 #endif
244       }
245       else // (offset == 3)
246       {
247         const unsigned b0 = src[0];
248 #if defined(Z7_XPRESS_DEC_USE_UNALIGNED_COPY)
249         const unsigned w1 = GetUi16(src + 1);
250         do
251         {
252           dest[0] = (Byte)b0;
253           SetUi16(dest + 1, (UInt16)w1)
254           dest += 3;
255         }
256         while (dest < dest_lim);
257 #else
258         const Byte b1 = src[1];
259         const Byte b2 = src[2];
260         do
261         {
262           dest[0] = (Byte)b0;
263           dest[1] = b1;
264           dest[2] = b2;
265           dest += 3;
266         }
267         while (dest < dest_lim);
268 #endif
269       }
270     }
271   }
272 }
273 
274 
275 namespace NCompress {
276 namespace NXpress {
277 
278 #define BIT_STREAM_NORMALIZE \
279     if (BitPos > 16) { \
280       if (in >= lim) return S_FALSE; \
281       BitPos -= 16; \
282       Value |= (UInt32)GetUi16(in) << BitPos; \
283       in += 2; }
284 
285 #define MOVE_POS(bs, numBits) \
286     BitPos += (unsigned)numBits; \
287     Value <<= numBits; \
288 
289 
290 static const unsigned kNumHuffBits = 15;
291 static const unsigned kNumTableBits = 10;
292 static const unsigned kNumLenBits = 4;
293 static const unsigned kLenMask = (1 << kNumLenBits) - 1;
294 static const unsigned kNumPosSlots = 16;
295 static const unsigned kNumSyms = 256 + (kNumPosSlots << kNumLenBits);
296 
Decode_WithExceedWrite(const Byte * in,size_t inSize,Byte * out,size_t outSize)297 HRESULT Decode_WithExceedWrite(const Byte *in, size_t inSize, Byte *out, size_t outSize)
298 {
299   NCompress::NHuffman::CDecoder<kNumHuffBits, kNumSyms, kNumTableBits> huff;
300 
301   if (inSize < kNumSyms / 2 + 4)
302     return S_FALSE;
303   {
304     Byte levels[kNumSyms];
305     for (unsigned i = 0; i < kNumSyms / 2; i++)
306     {
307       const unsigned b = in[i];
308       levels[(size_t)i * 2    ] = (Byte)(b & 0xf);
309       levels[(size_t)i * 2 + 1] = (Byte)(b >> 4);
310     }
311     if (!huff.Build(levels, NHuffman::k_BuildMode_Full))
312       return S_FALSE;
313   }
314 
315   UInt32 Value;
316   unsigned BitPos;  // how many bits in (Value) were processed
317 
318   const Byte *lim = in + inSize - 1;  // points to last byte
319   in += kNumSyms / 2;
320 #ifdef MY_CPU_LE_UNALIGN
321   Value = GetUi32(in);
322   Value = rotlFixed(Value, 16);
323 #else
324   Value = ((UInt32)GetUi16(in) << 16) | GetUi16(in + 2);
325 #endif
326 
327   in += 4;
328   BitPos = 0;
329   Byte *dest = out;
330   const Byte *outLim = out + outSize;
331 
332   for (;;)
333   {
334     unsigned sym;
335     Z7_HUFF_DECODE_VAL_IN_HIGH32(sym, &huff, kNumHuffBits, kNumTableBits,
336         Value, Z7_HUFF_DECODE_ERROR_SYM_CHECK_NO, {}, MOVE_POS, {}, bs)
337     // 0 < BitPos <= 31
338     BIT_STREAM_NORMALIZE
339     // 0 < BitPos <= 16
340 
341     if (dest >= outLim)
342       return (sym == 256 && Value == 0 && in == lim + 1) ? S_OK : S_FALSE;
343 
344     if (sym < 256)
345       *dest++ = (Byte)sym;
346     else
347     {
348       const unsigned distBits = (unsigned)(Byte)sym >> kNumLenBits; // (sym - 256) >> kNumLenBits;
349       UInt32 len = (UInt32)(sym & kLenMask);
350 
351       if (len == kLenMask)
352       {
353         if (in > lim)
354           return S_FALSE;
355         // here we read input bytes in out-of-order related to main input stream (bits in Value):
356         len = *in++;
357         if (len == 0xff)
358         {
359           if (in >= lim)
360             return S_FALSE;
361           len = GetUi16(in);
362           in += 2;
363         }
364         else
365           len += kLenMask;
366       }
367 
368       len += 3;
369       if (len > (size_t)(outLim - dest))
370         return S_FALSE;
371 
372       if (distBits == 0)
373       {
374         // d == 1
375         if (dest == out)
376           return S_FALSE;
377         Byte *destTemp = dest;
378         dest += len;
379         CopyMatch_1(destTemp, dest);
380       }
381       else
382       {
383         unsigned d = (unsigned)(Value >> (32 - distBits));
384         MOVE_POS(bs, distBits)
385         d += 1u << distBits;
386         // 0 < BitPos <= 31
387         BIT_STREAM_NORMALIZE
388         // 0 < BitPos <= 16
389         if (d > (size_t)(dest - out))
390           return S_FALSE;
391         Byte *destTemp = dest;
392         dest += len;
393         CopyMatch_Non1(destTemp, d, dest);
394       }
395     }
396   }
397 }
398 
399 }}
400