1*01826a49SYabin Cui /* ******************************************************************
2*01826a49SYabin Cui * bitstream
3*01826a49SYabin Cui * Part of FSE library
4*01826a49SYabin Cui * Copyright (c) Meta Platforms, Inc. and affiliates.
5*01826a49SYabin Cui *
6*01826a49SYabin Cui * You can contact the author at :
7*01826a49SYabin Cui * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8*01826a49SYabin Cui *
9*01826a49SYabin Cui * This source code is licensed under both the BSD-style license (found in the
10*01826a49SYabin Cui * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11*01826a49SYabin Cui * in the COPYING file in the root directory of this source tree).
12*01826a49SYabin Cui * You may select, at your option, one of the above-listed licenses.
13*01826a49SYabin Cui ****************************************************************** */
14*01826a49SYabin Cui #ifndef BITSTREAM_H_MODULE
15*01826a49SYabin Cui #define BITSTREAM_H_MODULE
16*01826a49SYabin Cui
17*01826a49SYabin Cui #if defined (__cplusplus)
18*01826a49SYabin Cui extern "C" {
19*01826a49SYabin Cui #endif
20*01826a49SYabin Cui /*
21*01826a49SYabin Cui * This API consists of small unitary functions, which must be inlined for best performance.
22*01826a49SYabin Cui * Since link-time-optimization is not available for all compilers,
23*01826a49SYabin Cui * these functions are defined into a .h to be included.
24*01826a49SYabin Cui */
25*01826a49SYabin Cui
26*01826a49SYabin Cui /*-****************************************
27*01826a49SYabin Cui * Dependencies
28*01826a49SYabin Cui ******************************************/
29*01826a49SYabin Cui #include "mem.h" /* unaligned access routines */
30*01826a49SYabin Cui #include "compiler.h" /* UNLIKELY() */
31*01826a49SYabin Cui #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
32*01826a49SYabin Cui #include "error_private.h" /* error codes and messages */
33*01826a49SYabin Cui #include "bits.h" /* ZSTD_highbit32 */
34*01826a49SYabin Cui
35*01826a49SYabin Cui
36*01826a49SYabin Cui /*=========================================
37*01826a49SYabin Cui * Target specific
38*01826a49SYabin Cui =========================================*/
39*01826a49SYabin Cui #ifndef ZSTD_NO_INTRINSICS
40*01826a49SYabin Cui # if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
41*01826a49SYabin Cui # include <immintrin.h> /* support for bextr (experimental)/bzhi */
42*01826a49SYabin Cui # elif defined(__ICCARM__)
43*01826a49SYabin Cui # include <intrinsics.h>
44*01826a49SYabin Cui # endif
45*01826a49SYabin Cui #endif
46*01826a49SYabin Cui
47*01826a49SYabin Cui #define STREAM_ACCUMULATOR_MIN_32 25
48*01826a49SYabin Cui #define STREAM_ACCUMULATOR_MIN_64 57
49*01826a49SYabin Cui #define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
50*01826a49SYabin Cui
51*01826a49SYabin Cui
52*01826a49SYabin Cui /*-******************************************
53*01826a49SYabin Cui * bitStream encoding API (write forward)
54*01826a49SYabin Cui ********************************************/
55*01826a49SYabin Cui /* bitStream can mix input from multiple sources.
56*01826a49SYabin Cui * A critical property of these streams is that they encode and decode in **reverse** direction.
57*01826a49SYabin Cui * So the first bit sequence you add will be the last to be read, like a LIFO stack.
58*01826a49SYabin Cui */
59*01826a49SYabin Cui typedef struct {
60*01826a49SYabin Cui size_t bitContainer;
61*01826a49SYabin Cui unsigned bitPos;
62*01826a49SYabin Cui char* startPtr;
63*01826a49SYabin Cui char* ptr;
64*01826a49SYabin Cui char* endPtr;
65*01826a49SYabin Cui } BIT_CStream_t;
66*01826a49SYabin Cui
67*01826a49SYabin Cui MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
68*01826a49SYabin Cui MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
69*01826a49SYabin Cui MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
70*01826a49SYabin Cui MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
71*01826a49SYabin Cui
72*01826a49SYabin Cui /* Start with initCStream, providing the size of buffer to write into.
73*01826a49SYabin Cui * bitStream will never write outside of this buffer.
74*01826a49SYabin Cui * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
75*01826a49SYabin Cui *
76*01826a49SYabin Cui * bits are first added to a local register.
77*01826a49SYabin Cui * Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
78*01826a49SYabin Cui * Writing data into memory is an explicit operation, performed by the flushBits function.
79*01826a49SYabin Cui * Hence keep track how many bits are potentially stored into local register to avoid register overflow.
80*01826a49SYabin Cui * After a flushBits, a maximum of 7 bits might still be stored into local register.
81*01826a49SYabin Cui *
82*01826a49SYabin Cui * Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
83*01826a49SYabin Cui *
84*01826a49SYabin Cui * Last operation is to close the bitStream.
85*01826a49SYabin Cui * The function returns the final size of CStream in bytes.
86*01826a49SYabin Cui * If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
87*01826a49SYabin Cui */
88*01826a49SYabin Cui
89*01826a49SYabin Cui
90*01826a49SYabin Cui /*-********************************************
91*01826a49SYabin Cui * bitStream decoding API (read backward)
92*01826a49SYabin Cui **********************************************/
93*01826a49SYabin Cui typedef size_t BitContainerType;
94*01826a49SYabin Cui typedef struct {
95*01826a49SYabin Cui BitContainerType bitContainer;
96*01826a49SYabin Cui unsigned bitsConsumed;
97*01826a49SYabin Cui const char* ptr;
98*01826a49SYabin Cui const char* start;
99*01826a49SYabin Cui const char* limitPtr;
100*01826a49SYabin Cui } BIT_DStream_t;
101*01826a49SYabin Cui
102*01826a49SYabin Cui typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
103*01826a49SYabin Cui BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
104*01826a49SYabin Cui BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
105*01826a49SYabin Cui BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
106*01826a49SYabin Cui } BIT_DStream_status; /* result of BIT_reloadDStream() */
107*01826a49SYabin Cui
108*01826a49SYabin Cui MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
109*01826a49SYabin Cui MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
110*01826a49SYabin Cui MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
111*01826a49SYabin Cui MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
112*01826a49SYabin Cui
113*01826a49SYabin Cui
114*01826a49SYabin Cui /* Start by invoking BIT_initDStream().
115*01826a49SYabin Cui * A chunk of the bitStream is then stored into a local register.
116*01826a49SYabin Cui * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
117*01826a49SYabin Cui * You can then retrieve bitFields stored into the local register, **in reverse order**.
118*01826a49SYabin Cui * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
119*01826a49SYabin Cui * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
120*01826a49SYabin Cui * Otherwise, it can be less than that, so proceed accordingly.
121*01826a49SYabin Cui * Checking if DStream has reached its end can be performed with BIT_endOfDStream().
122*01826a49SYabin Cui */
123*01826a49SYabin Cui
124*01826a49SYabin Cui
125*01826a49SYabin Cui /*-****************************************
126*01826a49SYabin Cui * unsafe API
127*01826a49SYabin Cui ******************************************/
128*01826a49SYabin Cui MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
129*01826a49SYabin Cui /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
130*01826a49SYabin Cui
131*01826a49SYabin Cui MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
132*01826a49SYabin Cui /* unsafe version; does not check buffer overflow */
133*01826a49SYabin Cui
134*01826a49SYabin Cui MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
135*01826a49SYabin Cui /* faster, but works only if nbBits >= 1 */
136*01826a49SYabin Cui
137*01826a49SYabin Cui /*===== Local Constants =====*/
138*01826a49SYabin Cui static const unsigned BIT_mask[] = {
139*01826a49SYabin Cui 0, 1, 3, 7, 0xF, 0x1F,
140*01826a49SYabin Cui 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
141*01826a49SYabin Cui 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,
142*01826a49SYabin Cui 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
143*01826a49SYabin Cui 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
144*01826a49SYabin Cui 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
145*01826a49SYabin Cui #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
146*01826a49SYabin Cui
147*01826a49SYabin Cui /*-**************************************************************
148*01826a49SYabin Cui * bitStream encoding
149*01826a49SYabin Cui ****************************************************************/
150*01826a49SYabin Cui /*! BIT_initCStream() :
151*01826a49SYabin Cui * `dstCapacity` must be > sizeof(size_t)
152*01826a49SYabin Cui * @return : 0 if success,
153*01826a49SYabin Cui * otherwise an error code (can be tested using ERR_isError()) */
BIT_initCStream(BIT_CStream_t * bitC,void * startPtr,size_t dstCapacity)154*01826a49SYabin Cui MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
155*01826a49SYabin Cui void* startPtr, size_t dstCapacity)
156*01826a49SYabin Cui {
157*01826a49SYabin Cui bitC->bitContainer = 0;
158*01826a49SYabin Cui bitC->bitPos = 0;
159*01826a49SYabin Cui bitC->startPtr = (char*)startPtr;
160*01826a49SYabin Cui bitC->ptr = bitC->startPtr;
161*01826a49SYabin Cui bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
162*01826a49SYabin Cui if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
163*01826a49SYabin Cui return 0;
164*01826a49SYabin Cui }
165*01826a49SYabin Cui
BIT_getLowerBits(size_t bitContainer,U32 const nbBits)166*01826a49SYabin Cui FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
167*01826a49SYabin Cui {
168*01826a49SYabin Cui #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
169*01826a49SYabin Cui return _bzhi_u64(bitContainer, nbBits);
170*01826a49SYabin Cui #else
171*01826a49SYabin Cui assert(nbBits < BIT_MASK_SIZE);
172*01826a49SYabin Cui return bitContainer & BIT_mask[nbBits];
173*01826a49SYabin Cui #endif
174*01826a49SYabin Cui }
175*01826a49SYabin Cui
176*01826a49SYabin Cui /*! BIT_addBits() :
177*01826a49SYabin Cui * can add up to 31 bits into `bitC`.
178*01826a49SYabin Cui * Note : does not check for register overflow ! */
BIT_addBits(BIT_CStream_t * bitC,size_t value,unsigned nbBits)179*01826a49SYabin Cui MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
180*01826a49SYabin Cui size_t value, unsigned nbBits)
181*01826a49SYabin Cui {
182*01826a49SYabin Cui DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
183*01826a49SYabin Cui assert(nbBits < BIT_MASK_SIZE);
184*01826a49SYabin Cui assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
185*01826a49SYabin Cui bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
186*01826a49SYabin Cui bitC->bitPos += nbBits;
187*01826a49SYabin Cui }
188*01826a49SYabin Cui
189*01826a49SYabin Cui /*! BIT_addBitsFast() :
190*01826a49SYabin Cui * works only if `value` is _clean_,
191*01826a49SYabin Cui * meaning all high bits above nbBits are 0 */
BIT_addBitsFast(BIT_CStream_t * bitC,size_t value,unsigned nbBits)192*01826a49SYabin Cui MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
193*01826a49SYabin Cui size_t value, unsigned nbBits)
194*01826a49SYabin Cui {
195*01826a49SYabin Cui assert((value>>nbBits) == 0);
196*01826a49SYabin Cui assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
197*01826a49SYabin Cui bitC->bitContainer |= value << bitC->bitPos;
198*01826a49SYabin Cui bitC->bitPos += nbBits;
199*01826a49SYabin Cui }
200*01826a49SYabin Cui
201*01826a49SYabin Cui /*! BIT_flushBitsFast() :
202*01826a49SYabin Cui * assumption : bitContainer has not overflowed
203*01826a49SYabin Cui * unsafe version; does not check buffer overflow */
BIT_flushBitsFast(BIT_CStream_t * bitC)204*01826a49SYabin Cui MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
205*01826a49SYabin Cui {
206*01826a49SYabin Cui size_t const nbBytes = bitC->bitPos >> 3;
207*01826a49SYabin Cui assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
208*01826a49SYabin Cui assert(bitC->ptr <= bitC->endPtr);
209*01826a49SYabin Cui MEM_writeLEST(bitC->ptr, bitC->bitContainer);
210*01826a49SYabin Cui bitC->ptr += nbBytes;
211*01826a49SYabin Cui bitC->bitPos &= 7;
212*01826a49SYabin Cui bitC->bitContainer >>= nbBytes*8;
213*01826a49SYabin Cui }
214*01826a49SYabin Cui
215*01826a49SYabin Cui /*! BIT_flushBits() :
216*01826a49SYabin Cui * assumption : bitContainer has not overflowed
217*01826a49SYabin Cui * safe version; check for buffer overflow, and prevents it.
218*01826a49SYabin Cui * note : does not signal buffer overflow.
219*01826a49SYabin Cui * overflow will be revealed later on using BIT_closeCStream() */
BIT_flushBits(BIT_CStream_t * bitC)220*01826a49SYabin Cui MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
221*01826a49SYabin Cui {
222*01826a49SYabin Cui size_t const nbBytes = bitC->bitPos >> 3;
223*01826a49SYabin Cui assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
224*01826a49SYabin Cui assert(bitC->ptr <= bitC->endPtr);
225*01826a49SYabin Cui MEM_writeLEST(bitC->ptr, bitC->bitContainer);
226*01826a49SYabin Cui bitC->ptr += nbBytes;
227*01826a49SYabin Cui if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
228*01826a49SYabin Cui bitC->bitPos &= 7;
229*01826a49SYabin Cui bitC->bitContainer >>= nbBytes*8;
230*01826a49SYabin Cui }
231*01826a49SYabin Cui
232*01826a49SYabin Cui /*! BIT_closeCStream() :
233*01826a49SYabin Cui * @return : size of CStream, in bytes,
234*01826a49SYabin Cui * or 0 if it could not fit into dstBuffer */
BIT_closeCStream(BIT_CStream_t * bitC)235*01826a49SYabin Cui MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
236*01826a49SYabin Cui {
237*01826a49SYabin Cui BIT_addBitsFast(bitC, 1, 1); /* endMark */
238*01826a49SYabin Cui BIT_flushBits(bitC);
239*01826a49SYabin Cui if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
240*01826a49SYabin Cui return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
241*01826a49SYabin Cui }
242*01826a49SYabin Cui
243*01826a49SYabin Cui
244*01826a49SYabin Cui /*-********************************************************
245*01826a49SYabin Cui * bitStream decoding
246*01826a49SYabin Cui **********************************************************/
247*01826a49SYabin Cui /*! BIT_initDStream() :
248*01826a49SYabin Cui * Initialize a BIT_DStream_t.
249*01826a49SYabin Cui * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
250*01826a49SYabin Cui * `srcSize` must be the *exact* size of the bitStream, in bytes.
251*01826a49SYabin Cui * @return : size of stream (== srcSize), or an errorCode if a problem is detected
252*01826a49SYabin Cui */
BIT_initDStream(BIT_DStream_t * bitD,const void * srcBuffer,size_t srcSize)253*01826a49SYabin Cui MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
254*01826a49SYabin Cui {
255*01826a49SYabin Cui if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
256*01826a49SYabin Cui
257*01826a49SYabin Cui bitD->start = (const char*)srcBuffer;
258*01826a49SYabin Cui bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
259*01826a49SYabin Cui
260*01826a49SYabin Cui if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
261*01826a49SYabin Cui bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
262*01826a49SYabin Cui bitD->bitContainer = MEM_readLEST(bitD->ptr);
263*01826a49SYabin Cui { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
264*01826a49SYabin Cui bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
265*01826a49SYabin Cui if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
266*01826a49SYabin Cui } else {
267*01826a49SYabin Cui bitD->ptr = bitD->start;
268*01826a49SYabin Cui bitD->bitContainer = *(const BYTE*)(bitD->start);
269*01826a49SYabin Cui switch(srcSize)
270*01826a49SYabin Cui {
271*01826a49SYabin Cui case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
272*01826a49SYabin Cui ZSTD_FALLTHROUGH;
273*01826a49SYabin Cui
274*01826a49SYabin Cui case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
275*01826a49SYabin Cui ZSTD_FALLTHROUGH;
276*01826a49SYabin Cui
277*01826a49SYabin Cui case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
278*01826a49SYabin Cui ZSTD_FALLTHROUGH;
279*01826a49SYabin Cui
280*01826a49SYabin Cui case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
281*01826a49SYabin Cui ZSTD_FALLTHROUGH;
282*01826a49SYabin Cui
283*01826a49SYabin Cui case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
284*01826a49SYabin Cui ZSTD_FALLTHROUGH;
285*01826a49SYabin Cui
286*01826a49SYabin Cui case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
287*01826a49SYabin Cui ZSTD_FALLTHROUGH;
288*01826a49SYabin Cui
289*01826a49SYabin Cui default: break;
290*01826a49SYabin Cui }
291*01826a49SYabin Cui { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
292*01826a49SYabin Cui bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
293*01826a49SYabin Cui if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
294*01826a49SYabin Cui }
295*01826a49SYabin Cui bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
296*01826a49SYabin Cui }
297*01826a49SYabin Cui
298*01826a49SYabin Cui return srcSize;
299*01826a49SYabin Cui }
300*01826a49SYabin Cui
BIT_getUpperBits(BitContainerType bitContainer,U32 const start)301*01826a49SYabin Cui FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
302*01826a49SYabin Cui {
303*01826a49SYabin Cui return bitContainer >> start;
304*01826a49SYabin Cui }
305*01826a49SYabin Cui
BIT_getMiddleBits(BitContainerType bitContainer,U32 const start,U32 const nbBits)306*01826a49SYabin Cui FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
307*01826a49SYabin Cui {
308*01826a49SYabin Cui U32 const regMask = sizeof(bitContainer)*8 - 1;
309*01826a49SYabin Cui /* if start > regMask, bitstream is corrupted, and result is undefined */
310*01826a49SYabin Cui assert(nbBits < BIT_MASK_SIZE);
311*01826a49SYabin Cui /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
312*01826a49SYabin Cui * than accessing memory. When bmi2 instruction is not present, we consider
313*01826a49SYabin Cui * such cpus old (pre-Haswell, 2013) and their performance is not of that
314*01826a49SYabin Cui * importance.
315*01826a49SYabin Cui */
316*01826a49SYabin Cui #if defined(__x86_64__) || defined(_M_X86)
317*01826a49SYabin Cui return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
318*01826a49SYabin Cui #else
319*01826a49SYabin Cui return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
320*01826a49SYabin Cui #endif
321*01826a49SYabin Cui }
322*01826a49SYabin Cui
323*01826a49SYabin Cui /*! BIT_lookBits() :
324*01826a49SYabin Cui * Provides next n bits from local register.
325*01826a49SYabin Cui * local register is not modified.
326*01826a49SYabin Cui * On 32-bits, maxNbBits==24.
327*01826a49SYabin Cui * On 64-bits, maxNbBits==56.
328*01826a49SYabin Cui * @return : value extracted */
BIT_lookBits(const BIT_DStream_t * bitD,U32 nbBits)329*01826a49SYabin Cui FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
330*01826a49SYabin Cui {
331*01826a49SYabin Cui /* arbitrate between double-shift and shift+mask */
332*01826a49SYabin Cui #if 1
333*01826a49SYabin Cui /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
334*01826a49SYabin Cui * bitstream is likely corrupted, and result is undefined */
335*01826a49SYabin Cui return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
336*01826a49SYabin Cui #else
337*01826a49SYabin Cui /* this code path is slower on my os-x laptop */
338*01826a49SYabin Cui U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
339*01826a49SYabin Cui return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
340*01826a49SYabin Cui #endif
341*01826a49SYabin Cui }
342*01826a49SYabin Cui
343*01826a49SYabin Cui /*! BIT_lookBitsFast() :
344*01826a49SYabin Cui * unsafe version; only works if nbBits >= 1 */
BIT_lookBitsFast(const BIT_DStream_t * bitD,U32 nbBits)345*01826a49SYabin Cui MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
346*01826a49SYabin Cui {
347*01826a49SYabin Cui U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
348*01826a49SYabin Cui assert(nbBits >= 1);
349*01826a49SYabin Cui return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
350*01826a49SYabin Cui }
351*01826a49SYabin Cui
BIT_skipBits(BIT_DStream_t * bitD,U32 nbBits)352*01826a49SYabin Cui FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
353*01826a49SYabin Cui {
354*01826a49SYabin Cui bitD->bitsConsumed += nbBits;
355*01826a49SYabin Cui }
356*01826a49SYabin Cui
357*01826a49SYabin Cui /*! BIT_readBits() :
358*01826a49SYabin Cui * Read (consume) next n bits from local register and update.
359*01826a49SYabin Cui * Pay attention to not read more than nbBits contained into local register.
360*01826a49SYabin Cui * @return : extracted value. */
BIT_readBits(BIT_DStream_t * bitD,unsigned nbBits)361*01826a49SYabin Cui FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
362*01826a49SYabin Cui {
363*01826a49SYabin Cui size_t const value = BIT_lookBits(bitD, nbBits);
364*01826a49SYabin Cui BIT_skipBits(bitD, nbBits);
365*01826a49SYabin Cui return value;
366*01826a49SYabin Cui }
367*01826a49SYabin Cui
368*01826a49SYabin Cui /*! BIT_readBitsFast() :
369*01826a49SYabin Cui * unsafe version; only works if nbBits >= 1 */
BIT_readBitsFast(BIT_DStream_t * bitD,unsigned nbBits)370*01826a49SYabin Cui MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
371*01826a49SYabin Cui {
372*01826a49SYabin Cui size_t const value = BIT_lookBitsFast(bitD, nbBits);
373*01826a49SYabin Cui assert(nbBits >= 1);
374*01826a49SYabin Cui BIT_skipBits(bitD, nbBits);
375*01826a49SYabin Cui return value;
376*01826a49SYabin Cui }
377*01826a49SYabin Cui
378*01826a49SYabin Cui /*! BIT_reloadDStream_internal() :
379*01826a49SYabin Cui * Simple variant of BIT_reloadDStream(), with two conditions:
380*01826a49SYabin Cui * 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
381*01826a49SYabin Cui * 2. look window is valid after shifted down : bitD->ptr >= bitD->start
382*01826a49SYabin Cui */
BIT_reloadDStream_internal(BIT_DStream_t * bitD)383*01826a49SYabin Cui MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
384*01826a49SYabin Cui {
385*01826a49SYabin Cui assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
386*01826a49SYabin Cui bitD->ptr -= bitD->bitsConsumed >> 3;
387*01826a49SYabin Cui assert(bitD->ptr >= bitD->start);
388*01826a49SYabin Cui bitD->bitsConsumed &= 7;
389*01826a49SYabin Cui bitD->bitContainer = MEM_readLEST(bitD->ptr);
390*01826a49SYabin Cui return BIT_DStream_unfinished;
391*01826a49SYabin Cui }
392*01826a49SYabin Cui
393*01826a49SYabin Cui /*! BIT_reloadDStreamFast() :
394*01826a49SYabin Cui * Similar to BIT_reloadDStream(), but with two differences:
395*01826a49SYabin Cui * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
396*01826a49SYabin Cui * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
397*01826a49SYabin Cui * point you must use BIT_reloadDStream() to reload.
398*01826a49SYabin Cui */
BIT_reloadDStreamFast(BIT_DStream_t * bitD)399*01826a49SYabin Cui MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
400*01826a49SYabin Cui {
401*01826a49SYabin Cui if (UNLIKELY(bitD->ptr < bitD->limitPtr))
402*01826a49SYabin Cui return BIT_DStream_overflow;
403*01826a49SYabin Cui return BIT_reloadDStream_internal(bitD);
404*01826a49SYabin Cui }
405*01826a49SYabin Cui
406*01826a49SYabin Cui /*! BIT_reloadDStream() :
407*01826a49SYabin Cui * Refill `bitD` from buffer previously set in BIT_initDStream() .
408*01826a49SYabin Cui * This function is safe, it guarantees it will not never beyond src buffer.
409*01826a49SYabin Cui * @return : status of `BIT_DStream_t` internal register.
410*01826a49SYabin Cui * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
BIT_reloadDStream(BIT_DStream_t * bitD)411*01826a49SYabin Cui FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
412*01826a49SYabin Cui {
413*01826a49SYabin Cui /* note : once in overflow mode, a bitstream remains in this mode until it's reset */
414*01826a49SYabin Cui if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
415*01826a49SYabin Cui static const BitContainerType zeroFilled = 0;
416*01826a49SYabin Cui bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
417*01826a49SYabin Cui /* overflow detected, erroneous scenario or end of stream: no update */
418*01826a49SYabin Cui return BIT_DStream_overflow;
419*01826a49SYabin Cui }
420*01826a49SYabin Cui
421*01826a49SYabin Cui assert(bitD->ptr >= bitD->start);
422*01826a49SYabin Cui
423*01826a49SYabin Cui if (bitD->ptr >= bitD->limitPtr) {
424*01826a49SYabin Cui return BIT_reloadDStream_internal(bitD);
425*01826a49SYabin Cui }
426*01826a49SYabin Cui if (bitD->ptr == bitD->start) {
427*01826a49SYabin Cui /* reached end of bitStream => no update */
428*01826a49SYabin Cui if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
429*01826a49SYabin Cui return BIT_DStream_completed;
430*01826a49SYabin Cui }
431*01826a49SYabin Cui /* start < ptr < limitPtr => cautious update */
432*01826a49SYabin Cui { U32 nbBytes = bitD->bitsConsumed >> 3;
433*01826a49SYabin Cui BIT_DStream_status result = BIT_DStream_unfinished;
434*01826a49SYabin Cui if (bitD->ptr - nbBytes < bitD->start) {
435*01826a49SYabin Cui nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
436*01826a49SYabin Cui result = BIT_DStream_endOfBuffer;
437*01826a49SYabin Cui }
438*01826a49SYabin Cui bitD->ptr -= nbBytes;
439*01826a49SYabin Cui bitD->bitsConsumed -= nbBytes*8;
440*01826a49SYabin Cui bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
441*01826a49SYabin Cui return result;
442*01826a49SYabin Cui }
443*01826a49SYabin Cui }
444*01826a49SYabin Cui
445*01826a49SYabin Cui /*! BIT_endOfDStream() :
446*01826a49SYabin Cui * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
447*01826a49SYabin Cui */
BIT_endOfDStream(const BIT_DStream_t * DStream)448*01826a49SYabin Cui MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
449*01826a49SYabin Cui {
450*01826a49SYabin Cui return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
451*01826a49SYabin Cui }
452*01826a49SYabin Cui
453*01826a49SYabin Cui #if defined (__cplusplus)
454*01826a49SYabin Cui }
455*01826a49SYabin Cui #endif
456*01826a49SYabin Cui
457*01826a49SYabin Cui #endif /* BITSTREAM_H_MODULE */
458