// Compress/HuffmanDecoder.h
#ifndef ZIP7_INC_COMPRESS_HUFFMAN_DECODER_H
#define ZIP7_INC_COMPRESS_HUFFMAN_DECODER_H
#include "../../../C/CpuArch.h"
#include "../../Common/MyTypes.h"
namespace NCompress {
namespace NHuffman {
// const unsigned kNumTableBits_Default = 9;
#if 0 || 0 && defined(MY_CPU_64BIT)
// for debug or optimization:
// 64-BIT limit array can be faster for some compilers.
// for debug or optimization:
#define Z7_HUFF_USE_64BIT_LIMIT
#else
// sizet value variable allows to eliminate some move operation in some compilers.
// for debug or optimization:
// #define Z7_HUFF_USE_SIZET_VALUE
#endif
// v0 must normalized to (32 bits) : (v0 < ((UInt64)1 << 32))
#ifdef Z7_HUFF_USE_64BIT_LIMIT
typedef UInt64 CLimitInt;
typedef UInt64 CValueInt;
// all _limits[*] are normalized and limited by ((UInt64)1 << 32).
// we don't use (v1) in this branch
#define Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax) 32
#define Z7_HUFF_TABLE_COMPARE(huf, kNumTableBits, v0, v1) \
((NCompress::NHuffman::CLimitInt)v0 >= (huf)->_limits[0])
#define Z7_HUFF_GET_VAL_FOR_LIMITS(v0, v1, kNumBitsMax, kNumTableBits) (v0)
#define Z7_HUFF_GET_VAL_FOR_TABLE( v0, v1, kNumBitsMax, kNumTableBits) ((v0) >> (32 - kNumTableBits))
#define Z7_HUFF_PRECALC_V1(kNumTableBits, v0, v1)
#else
typedef UInt32 CLimitInt;
typedef
#ifdef Z7_HUFF_USE_SIZET_VALUE
size_t
#else
UInt32
#endif
CValueInt;
// v1 must be precalculated from v0 in this branch
// _limits[0] and (v1) are normalized and limited by (1 << kNumTableBits).
// _limits[non_0] are normalized and limited by (1 << kNumBitsMax).
#define Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax) (kNumBitsMax)
#define Z7_HUFF_TABLE_COMPARE(huf, kNumTableBits, v0, v1) \
((NCompress::NHuffman::CLimitInt)v1 >= (huf)->_limits[0])
#define Z7_HUFF_GET_VAL_FOR_LIMITS(v0, v1, kNumBitsMax, kNumTableBits) ((v0) >> (32 - kNumBitsMax))
#define Z7_HUFF_GET_VAL_FOR_TABLE( v0, v1, kNumBitsMax, kNumTableBits) (v1)
#define Z7_HUFF_PRECALC_V1(kNumTableBits, v0, v1) const UInt32 v1 = ((v0) >> (32 - kNumTableBits));
#endif
enum enum_BuildMode
{
k_BuildMode_Partial = 0,
k_BuildMode_Full = 1,
k_BuildMode_Full_or_Empty = 2
};
template
struct CDecoderBase
{
CLimitInt _limits[kNumBitsMax + 2 - kNumTableBits];
UInt32 _poses[kNumBitsMax - kNumTableBits]; // unsigned
union
{
// if defined(MY_CPU_64BIT), we need 64-bit alignment for _symbols.
// if !defined(MY_CPU_64BIT), we need 32-bit alignment for _symbols
// but we provide alignment for _lens.
// _symbols also will be aligned, if _lens are aligned
#if defined(MY_CPU_64BIT)
UInt64
#else
UInt32
#endif
_pad_align[m_NumSymbols < (1u << sizeof(symType) * 8) ? 1 : -1];
/* if symType is Byte, we use 16-bytes padding to avoid cache
bank conflict between _lens and _symbols: */
Byte _lens[(1 << kNumTableBits) + (sizeof(symType) == 1 ? 16 : 0)];
} _u;
symType _symbols[(1 << kNumTableBits) + m_NumSymbols - (kNumTableBits + 1)];
/*
Z7_FORCE_INLINE
bool IsFull() const
{
return _limits[kNumBitsMax - kNumTableBits] ==
(CLimitInt)1u << Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax);
}
Z7_FORCE_INLINE
bool IsEmpty() const
{
return _limits[kNumBitsMax - kNumTableBits] == 0;
}
Z7_FORCE_INLINE
bool Is_Full_or_Empty() const
{
return 0 == (_limits[kNumBitsMax - kNumTableBits] &
~((CLimitInt)1 << Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax)));
}
*/
Z7_FORCE_INLINE
bool Build(const Byte *lens, enum_BuildMode buidMode = k_BuildMode_Partial) throw()
{
unsigned counts[kNumBitsMax + 1];
size_t i;
for (i = 0; i <= kNumBitsMax; i++)
counts[i] = 0;
for (i = 0; i < m_NumSymbols; i++)
counts[lens[i]]++;
UInt32 sum = 0;
for (i = 1; i <= kNumTableBits; i++)
{
sum <<= 1;
sum += counts[i];
}
CLimitInt startPos = (CLimitInt)sum;
_limits[0] =
#ifdef Z7_HUFF_USE_64BIT_LIMIT
startPos << (Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax) - kNumTableBits);
#else
startPos;
#endif
for (i = kNumTableBits + 1; i <= kNumBitsMax; i++)
{
startPos <<= 1;
_poses[i - (kNumTableBits + 1)] = (UInt32)(startPos - sum);
const unsigned cnt = counts[i];
counts[i] = sum;
sum += cnt;
startPos += cnt;
_limits[i - kNumTableBits] = startPos << (Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax) - i);
}
_limits[kNumBitsMax + 1 - kNumTableBits] =
(CLimitInt)1 << Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax);
if (buidMode == k_BuildMode_Partial)
{
if (startPos > (1u << kNumBitsMax)) return false;
}
else
{
if (buidMode != k_BuildMode_Full && startPos == 0) return true;
if (startPos != (1u << kNumBitsMax)) return false;
}
size_t sum2 = 0;
for (i = 1; i <= kNumTableBits; i++)
{
const unsigned cnt = counts[i] << (kNumTableBits - i);
counts[i] = (unsigned)sum2 >> (kNumTableBits - i);
memset(_u._lens + sum2, (int)i, cnt);
sum2 += cnt;
}
#ifdef MY_CPU_64BIT
symType4
// UInt64 // for symType = UInt16
// UInt32 // for symType = Byte
#else
UInt32
#endif
v = 0;
for (i = 0; i < m_NumSymbols; i++,
v +=
1
+ ( (UInt32)1 << (sizeof(symType) * 8 * 1))
// 0x00010001 // for symType = UInt16
// 0x00000101 // for symType = Byte
#ifdef MY_CPU_64BIT
+ ((symType4)1 << (sizeof(symType) * 8 * 2))
+ ((symType4)1 << (sizeof(symType) * 8 * 3))
// 0x0001000100010001 // for symType = UInt16
// 0x0000000001010101 // for symType = Byte
#endif
)
{
const unsigned len = lens[i];
if (len == 0)
continue;
const size_t offset = counts[len];
counts[len] = (unsigned)offset + 1;
if (len >= kNumTableBits)
_symbols[offset] = (symType)v;
else
{
Byte *s2 = (Byte *)(void *)_symbols + (offset <<
(kNumTableBits + sizeof(symType) / 2 - len));
Byte *lim = s2 + ((size_t)1 <<
(kNumTableBits + sizeof(symType) / 2 - len));
if (len >= kNumTableBits - 2)
{
*(symType2 *)(void *)(s2 ) = (symType2)v;
*(symType2 *)(void *)(lim - sizeof(symType) * 2) = (symType2)v;
}
else
{
#ifdef MY_CPU_64BIT
symType4 *s = (symType4 *)(void *)s2;
do
{
s[0] = v; s[1] = v; s += 2;
}
while (s != (const symType4 *)(const void *)lim);
#else
symType2 *s = (symType2 *)(void *)s2;
do
{
s[0] = (symType2)v; s[1] = (symType2)v; s += 2;
s[0] = (symType2)v; s[1] = (symType2)v; s += 2;
}
while (s != (const symType2 *)(const void *)lim);
#endif
}
}
}
return true;
}
#define Z7_HUFF_DECODE_ERROR_SYM_CHECK_YES(_numBits_, kNumBitsMax, error_op) if (_numBits_ > kNumBitsMax) { error_op }
#define Z7_HUFF_DECODE_ERROR_SYM_CHECK_NO( _numBits_, kNumBitsMax, error_op)
#define Z7_HUFF_DECODE_BASE_TREE_BRANCH(sym, huf, kNumBitsMax, kNumTableBits, \
v0, v1, \
get_val_for_limits, \
check_op, error_op, _numBits_) \
{ \
const NHuffman::CValueInt _val = get_val_for_limits(v0, v1, kNumBitsMax, kNumTableBits); \
_numBits_ = kNumTableBits + 1; \
if ((NCompress::NHuffman::CLimitInt)_val >= (huf)->_limits[1]) \
do { _numBits_++; } \
while ((NCompress::NHuffman::CLimitInt)_val >= (huf)->_limits[_numBits_ - kNumTableBits]); \
check_op(_numBits_, kNumBitsMax, error_op) \
sym = (huf)->_symbols[(/* (UInt32) */ (_val >> ((Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax) - (unsigned)_numBits_)))) \
- (huf)->_poses[_numBits_ - (kNumTableBits + 1)]]; \
}
/*
Z7_HUFF_DECODE_BASE_TREE_BRANCH(sym, huf, kNumBitsMax, kNumTableBits, \
v0, v1, \
get_val_for_limits, \
check_op, error_op, _numBits_) \
*/
#define Z7_HUFF_DECODE_BASE(sym, huf, kNumBitsMax, kNumTableBits, \
v0, v1, \
get_val_for_table, get_val_for_limits, \
check_op, error_op, move_pos_op, after_op, bs) \
{ \
if (Z7_HUFF_TABLE_COMPARE(huf, kNumTableBits, v0, v1)) \
{ \
const NHuffman::CValueInt _val = get_val_for_limits(v0, v1, kNumBitsMax, kNumTableBits); \
size_t _numBits_ = kNumTableBits + 1; \
if ((NCompress::NHuffman::CLimitInt)_val >= (huf)->_limits[1]) \
do { _numBits_++; } \
while ((NCompress::NHuffman::CLimitInt)_val >= (huf)->_limits[_numBits_ - kNumTableBits]); \
check_op(_numBits_, kNumBitsMax, error_op) \
sym = (huf)->_symbols[(/* (UInt32) */ (_val >> ((Z7_HUFF_NUM_LIMIT_BITS(kNumBitsMax) - (unsigned)_numBits_)))) \
- (huf)->_poses[_numBits_ - (kNumTableBits + 1)]]; \
move_pos_op(bs, _numBits_); \
} \
else \
{ \
const size_t _val = get_val_for_table(v0, v1, kNumBitsMax, kNumTableBits); \
const size_t _numBits_ = (huf)->_u._lens[_val]; \
sym = (huf)->_symbols[_val]; \
move_pos_op(bs, _numBits_); \
} \
after_op \
}
#define Z7_HUFF_DECODE_10(sym, huf, kNumBitsMax, kNumTableBits, \
v0, v1, \
check_op, error_op, move_pos_op, after_op, bs) \
Z7_HUFF_DECODE_BASE(sym, huf, kNumBitsMax, kNumTableBits, \
v0, v1, \
Z7_HUFF_GET_VAL_FOR_TABLE, \
Z7_HUFF_GET_VAL_FOR_LIMITS, \
check_op, error_op, move_pos_op, after_op, bs) \
#define Z7_HUFF_DECODE_VAL_IN_HIGH32(sym, huf, kNumBitsMax, kNumTableBits, \
v0, \
check_op, error_op, move_pos_op, after_op, bs) \
{ \
Z7_HUFF_PRECALC_V1(kNumTableBits, v0, _v1_temp) \
Z7_HUFF_DECODE_10(sym, huf, kNumBitsMax, kNumTableBits, \
v0, _v1_temp, \
check_op, error_op, move_pos_op, after_op, bs) \
}
#if 0 || defined(Z7_HUFF_USE_64BIT_LIMIT)
// this branch uses bitStream->GetValue_InHigh32bits().
#define Z7_HUFF_DECODE_0(sym, huf, kNumBitsMax, kNumTableBits, bitStream, \
check_op, error_op, move_pos_op) \
{ \
const UInt32 v0 = (bitStream)->GetValue_InHigh32bits(); \
Z7_HUFF_PRECALC_V1(kNumTableBits, v0, v1); \
Z7_HUFF_DECODE_BASE(sym, huf, kNumBitsMax, kNumTableBits, \
v0, v1, \
Z7_HUFF_GET_VAL_FOR_TABLE, \
Z7_HUFF_GET_VAL_FOR_LIMITS, \
check_op, error_op, move_pos_op, {}, bitStream) \
}
#else
/*
this branch uses bitStream->GetValue().
So we use SIMPLE versions for v0, v1 calculation:
v0 is normalized for kNumBitsMax
v1 is normalized for kNumTableBits
*/
#define Z7_HUFF_GET_VAL_FOR_LIMITS_SIMPLE(v0, v1, kNumBitsMax, kNumTableBits) v0
#define Z7_HUFF_GET_VAL_FOR_TABLE_SIMPLE( v0, v1, kNumBitsMax, kNumTableBits) v1
#define Z7_HUFF_DECODE_0(sym, huf, kNumBitsMax, kNumTableBits, bitStream, check_op, error_op, move_pos_op) \
{ \
const UInt32 v0 = (bitStream)->GetValue(kNumBitsMax); \
const UInt32 v1 = v0 >> (kNumBitsMax - kNumTableBits); \
Z7_HUFF_DECODE_BASE(sym, huf, kNumBitsMax, kNumTableBits, \
v0, v1, \
Z7_HUFF_GET_VAL_FOR_TABLE_SIMPLE, \
Z7_HUFF_GET_VAL_FOR_LIMITS_SIMPLE, \
check_op, error_op, move_pos_op, {}, bitStream) \
}
#endif
#define Z7_HUFF_bitStream_MovePos(bitStream, numBits) (bitStream)->MovePos((unsigned)(numBits))
#define Z7_HUFF_DECODE_1(sym, huf, kNumBitsMax, kNumTableBits, bitStream, check_op, error_op) \
Z7_HUFF_DECODE_0(sym, huf, kNumBitsMax, kNumTableBits, bitStream, check_op, error_op, \
Z7_HUFF_bitStream_MovePos)
// MovePosCheck
#define Z7_HUFF_DECODE_2(sym, huf, kNumBitsMax, kNumTableBits, bitStream, check_op, error_op) \
Z7_HUFF_DECODE_0(sym, huf, kNumBitsMax, kNumTableBits, bitStream, check_op, error_op, \
Z7_HUFF_bitStream_MovePos)
// MovePosCheck
#define Z7_HUFF_DECODE_CHECK(sym, huf, kNumBitsMax, kNumTableBits, bitStream, error_op) \
Z7_HUFF_DECODE_1( sym, huf, kNumBitsMax, kNumTableBits, bitStream, \
Z7_HUFF_DECODE_ERROR_SYM_CHECK_YES, error_op)
template
Z7_FORCE_INLINE
bool Decode2(TBitDecoder *bitStream, unsigned &sym) const
{
Z7_HUFF_DECODE_CHECK(sym, this, kNumBitsMax, kNumTableBits, bitStream,
{ return false; }
)
return true;
}
template
Z7_FORCE_INLINE
bool Decode_SymCheck_MovePosCheck(TBitDecoder *bitStream, unsigned &sym) const
{
Z7_HUFF_DECODE_0(sym, this, kNumBitsMax, kNumTableBits, bitStream,
Z7_HUFF_DECODE_ERROR_SYM_CHECK_YES,
{ return false; },
{ return (bitStream)->MovePosCheck; }
)
}
template
Z7_FORCE_INLINE
unsigned Decode(TBitDecoder *bitStream) const
{
unsigned sym;
Z7_HUFF_DECODE_CHECK(sym, this, kNumBitsMax, kNumTableBits, bitStream,
{ return (unsigned)(int)(Int32)0xffffffff; }
)
return sym;
}
template
Z7_FORCE_INLINE
unsigned DecodeFull(TBitDecoder *bitStream) const
{
/*
const UInt32 val = bitStream->GetValue(kNumBitsMax);
if (val < _limits[kNumTableBits])
{
const unsigned pair = _u._lens[(size_t)(val >> (kNumBitsMax - kNumTableBits))];
bitStream->MovePos(pair & kPairLenMask);
return pair >> kNumPairLenBits;
}
unsigned numBits;
for (numBits = kNumTableBits + 1; val >= _limits[numBits]; numBits++);
bitStream->MovePos(numBits);
return _symbols[_poses[numBits] + (unsigned)
((val - _limits[(size_t)numBits - 1]) >> (kNumBitsMax - numBits))];
*/
unsigned sym;
Z7_HUFF_DECODE_2(sym, this, kNumBitsMax, kNumTableBits, bitStream,
Z7_HUFF_DECODE_ERROR_SYM_CHECK_NO, {}
)
return sym;
}
};
template
struct CDecoder: public CDecoderBase
{};
template
struct CDecoder256: public CDecoderBase
{};
template
class CDecoder7b
{
public:
Byte _lens[1 << 7];
bool Build(const Byte *lens, bool full) throw()
{
const unsigned kNumBitsMax = 7;
unsigned counts[kNumBitsMax + 1];
unsigned _poses[kNumBitsMax + 1];
unsigned _limits[kNumBitsMax + 1];
unsigned i;
for (i = 0; i <= kNumBitsMax; i++)
counts[i] = 0;
for (i = 0; i < numSymbols; i++)
counts[lens[i]]++;
_limits[0] = 0;
const unsigned kMaxValue = 1u << kNumBitsMax;
unsigned startPos = 0;
unsigned sum = 0;
for (i = 1; i <= kNumBitsMax; i++)
{
const unsigned cnt = counts[i];
startPos += cnt << (kNumBitsMax - i);
_limits[i] = startPos;
counts[i] = sum;
_poses[i] = sum;
sum += cnt;
}
counts[0] = sum;
_poses[0] = sum;
if (full)
{
if (startPos != kMaxValue)
return false;
}
else
{
if (startPos > kMaxValue)
return false;
}
for (i = 0; i < numSymbols; i++)
{
const unsigned len = lens[i];
if (len == 0)
continue;
const unsigned offset = counts[len]++;
{
Byte *dest = _lens + _limits[(size_t)len - 1]
+ ((offset - _poses[len]) << (kNumBitsMax - len));
const unsigned num = (unsigned)1 << (kNumBitsMax - len);
const unsigned val = (i << 3) + len;
for (unsigned k = 0; k < num; k++)
dest[k] = (Byte)val;
}
}
if (!full)
{
const unsigned limit = _limits[kNumBitsMax];
const unsigned num = ((unsigned)1 << kNumBitsMax) - limit;
Byte *dest = _lens + limit;
for (unsigned k = 0; k < num; k++)
dest[k] = (Byte)
// (0x1f << 3);
((0x1f << 3) + 0x7);
}
return true;
}
#define Z7_HUFF_DECODER_7B_DECODE(dest, huf, get_val, move_pos, bs) \
{ \
const unsigned pair = huf->_lens[(size_t)get_val(7)]; \
const unsigned numBits = pair & 0x7; \
move_pos(bs, numBits); \
dest = pair >> 3; \
}
template
unsigned Decode(TBitDecoder *bitStream) const
{
const unsigned pair = _lens[(size_t)bitStream->GetValue(7)];
bitStream->MovePos(pair & 0x7);
return pair >> 3;
}
};
}}
#endif