1*01826a49SYabin Cui /*
2*01826a49SYabin Cui * Copyright (c) Meta Platforms, Inc. and affiliates.
3*01826a49SYabin Cui * All rights reserved.
4*01826a49SYabin Cui *
5*01826a49SYabin Cui * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui */
10*01826a49SYabin Cui
11*01826a49SYabin Cui #ifndef ZSTD_BITS_H
12*01826a49SYabin Cui #define ZSTD_BITS_H
13*01826a49SYabin Cui
14*01826a49SYabin Cui #include "mem.h"
15*01826a49SYabin Cui
ZSTD_countTrailingZeros32_fallback(U32 val)16*01826a49SYabin Cui MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
17*01826a49SYabin Cui {
18*01826a49SYabin Cui assert(val != 0);
19*01826a49SYabin Cui {
20*01826a49SYabin Cui static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
21*01826a49SYabin Cui 30, 22, 20, 15, 25, 17, 4, 8,
22*01826a49SYabin Cui 31, 27, 13, 23, 21, 19, 16, 7,
23*01826a49SYabin Cui 26, 12, 18, 6, 11, 5, 10, 9};
24*01826a49SYabin Cui return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
25*01826a49SYabin Cui }
26*01826a49SYabin Cui }
27*01826a49SYabin Cui
ZSTD_countTrailingZeros32(U32 val)28*01826a49SYabin Cui MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
29*01826a49SYabin Cui {
30*01826a49SYabin Cui assert(val != 0);
31*01826a49SYabin Cui # if defined(_MSC_VER)
32*01826a49SYabin Cui # if STATIC_BMI2 == 1
33*01826a49SYabin Cui return (unsigned)_tzcnt_u32(val);
34*01826a49SYabin Cui # else
35*01826a49SYabin Cui if (val != 0) {
36*01826a49SYabin Cui unsigned long r;
37*01826a49SYabin Cui _BitScanForward(&r, val);
38*01826a49SYabin Cui return (unsigned)r;
39*01826a49SYabin Cui } else {
40*01826a49SYabin Cui /* Should not reach this code path */
41*01826a49SYabin Cui __assume(0);
42*01826a49SYabin Cui }
43*01826a49SYabin Cui # endif
44*01826a49SYabin Cui # elif defined(__GNUC__) && (__GNUC__ >= 4)
45*01826a49SYabin Cui return (unsigned)__builtin_ctz(val);
46*01826a49SYabin Cui # else
47*01826a49SYabin Cui return ZSTD_countTrailingZeros32_fallback(val);
48*01826a49SYabin Cui # endif
49*01826a49SYabin Cui }
50*01826a49SYabin Cui
ZSTD_countLeadingZeros32_fallback(U32 val)51*01826a49SYabin Cui MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
52*01826a49SYabin Cui assert(val != 0);
53*01826a49SYabin Cui {
54*01826a49SYabin Cui static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
55*01826a49SYabin Cui 11, 14, 16, 18, 22, 25, 3, 30,
56*01826a49SYabin Cui 8, 12, 20, 28, 15, 17, 24, 7,
57*01826a49SYabin Cui 19, 27, 23, 6, 26, 5, 4, 31};
58*01826a49SYabin Cui val |= val >> 1;
59*01826a49SYabin Cui val |= val >> 2;
60*01826a49SYabin Cui val |= val >> 4;
61*01826a49SYabin Cui val |= val >> 8;
62*01826a49SYabin Cui val |= val >> 16;
63*01826a49SYabin Cui return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
64*01826a49SYabin Cui }
65*01826a49SYabin Cui }
66*01826a49SYabin Cui
ZSTD_countLeadingZeros32(U32 val)67*01826a49SYabin Cui MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
68*01826a49SYabin Cui {
69*01826a49SYabin Cui assert(val != 0);
70*01826a49SYabin Cui # if defined(_MSC_VER)
71*01826a49SYabin Cui # if STATIC_BMI2 == 1
72*01826a49SYabin Cui return (unsigned)_lzcnt_u32(val);
73*01826a49SYabin Cui # else
74*01826a49SYabin Cui if (val != 0) {
75*01826a49SYabin Cui unsigned long r;
76*01826a49SYabin Cui _BitScanReverse(&r, val);
77*01826a49SYabin Cui return (unsigned)(31 - r);
78*01826a49SYabin Cui } else {
79*01826a49SYabin Cui /* Should not reach this code path */
80*01826a49SYabin Cui __assume(0);
81*01826a49SYabin Cui }
82*01826a49SYabin Cui # endif
83*01826a49SYabin Cui # elif defined(__GNUC__) && (__GNUC__ >= 4)
84*01826a49SYabin Cui return (unsigned)__builtin_clz(val);
85*01826a49SYabin Cui # else
86*01826a49SYabin Cui return ZSTD_countLeadingZeros32_fallback(val);
87*01826a49SYabin Cui # endif
88*01826a49SYabin Cui }
89*01826a49SYabin Cui
ZSTD_countTrailingZeros64(U64 val)90*01826a49SYabin Cui MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
91*01826a49SYabin Cui {
92*01826a49SYabin Cui assert(val != 0);
93*01826a49SYabin Cui # if defined(_MSC_VER) && defined(_WIN64)
94*01826a49SYabin Cui # if STATIC_BMI2 == 1
95*01826a49SYabin Cui return (unsigned)_tzcnt_u64(val);
96*01826a49SYabin Cui # else
97*01826a49SYabin Cui if (val != 0) {
98*01826a49SYabin Cui unsigned long r;
99*01826a49SYabin Cui _BitScanForward64(&r, val);
100*01826a49SYabin Cui return (unsigned)r;
101*01826a49SYabin Cui } else {
102*01826a49SYabin Cui /* Should not reach this code path */
103*01826a49SYabin Cui __assume(0);
104*01826a49SYabin Cui }
105*01826a49SYabin Cui # endif
106*01826a49SYabin Cui # elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
107*01826a49SYabin Cui return (unsigned)__builtin_ctzll(val);
108*01826a49SYabin Cui # else
109*01826a49SYabin Cui {
110*01826a49SYabin Cui U32 mostSignificantWord = (U32)(val >> 32);
111*01826a49SYabin Cui U32 leastSignificantWord = (U32)val;
112*01826a49SYabin Cui if (leastSignificantWord == 0) {
113*01826a49SYabin Cui return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
114*01826a49SYabin Cui } else {
115*01826a49SYabin Cui return ZSTD_countTrailingZeros32(leastSignificantWord);
116*01826a49SYabin Cui }
117*01826a49SYabin Cui }
118*01826a49SYabin Cui # endif
119*01826a49SYabin Cui }
120*01826a49SYabin Cui
ZSTD_countLeadingZeros64(U64 val)121*01826a49SYabin Cui MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
122*01826a49SYabin Cui {
123*01826a49SYabin Cui assert(val != 0);
124*01826a49SYabin Cui # if defined(_MSC_VER) && defined(_WIN64)
125*01826a49SYabin Cui # if STATIC_BMI2 == 1
126*01826a49SYabin Cui return (unsigned)_lzcnt_u64(val);
127*01826a49SYabin Cui # else
128*01826a49SYabin Cui if (val != 0) {
129*01826a49SYabin Cui unsigned long r;
130*01826a49SYabin Cui _BitScanReverse64(&r, val);
131*01826a49SYabin Cui return (unsigned)(63 - r);
132*01826a49SYabin Cui } else {
133*01826a49SYabin Cui /* Should not reach this code path */
134*01826a49SYabin Cui __assume(0);
135*01826a49SYabin Cui }
136*01826a49SYabin Cui # endif
137*01826a49SYabin Cui # elif defined(__GNUC__) && (__GNUC__ >= 4)
138*01826a49SYabin Cui return (unsigned)(__builtin_clzll(val));
139*01826a49SYabin Cui # else
140*01826a49SYabin Cui {
141*01826a49SYabin Cui U32 mostSignificantWord = (U32)(val >> 32);
142*01826a49SYabin Cui U32 leastSignificantWord = (U32)val;
143*01826a49SYabin Cui if (mostSignificantWord == 0) {
144*01826a49SYabin Cui return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
145*01826a49SYabin Cui } else {
146*01826a49SYabin Cui return ZSTD_countLeadingZeros32(mostSignificantWord);
147*01826a49SYabin Cui }
148*01826a49SYabin Cui }
149*01826a49SYabin Cui # endif
150*01826a49SYabin Cui }
151*01826a49SYabin Cui
ZSTD_NbCommonBytes(size_t val)152*01826a49SYabin Cui MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
153*01826a49SYabin Cui {
154*01826a49SYabin Cui if (MEM_isLittleEndian()) {
155*01826a49SYabin Cui if (MEM_64bits()) {
156*01826a49SYabin Cui return ZSTD_countTrailingZeros64((U64)val) >> 3;
157*01826a49SYabin Cui } else {
158*01826a49SYabin Cui return ZSTD_countTrailingZeros32((U32)val) >> 3;
159*01826a49SYabin Cui }
160*01826a49SYabin Cui } else { /* Big Endian CPU */
161*01826a49SYabin Cui if (MEM_64bits()) {
162*01826a49SYabin Cui return ZSTD_countLeadingZeros64((U64)val) >> 3;
163*01826a49SYabin Cui } else {
164*01826a49SYabin Cui return ZSTD_countLeadingZeros32((U32)val) >> 3;
165*01826a49SYabin Cui }
166*01826a49SYabin Cui }
167*01826a49SYabin Cui }
168*01826a49SYabin Cui
ZSTD_highbit32(U32 val)169*01826a49SYabin Cui MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
170*01826a49SYabin Cui {
171*01826a49SYabin Cui assert(val != 0);
172*01826a49SYabin Cui return 31 - ZSTD_countLeadingZeros32(val);
173*01826a49SYabin Cui }
174*01826a49SYabin Cui
175*01826a49SYabin Cui /* ZSTD_rotateRight_*():
176*01826a49SYabin Cui * Rotates a bitfield to the right by "count" bits.
177*01826a49SYabin Cui * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
178*01826a49SYabin Cui */
179*01826a49SYabin Cui MEM_STATIC
ZSTD_rotateRight_U64(U64 const value,U32 count)180*01826a49SYabin Cui U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
181*01826a49SYabin Cui assert(count < 64);
182*01826a49SYabin Cui count &= 0x3F; /* for fickle pattern recognition */
183*01826a49SYabin Cui return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
184*01826a49SYabin Cui }
185*01826a49SYabin Cui
186*01826a49SYabin Cui MEM_STATIC
ZSTD_rotateRight_U32(U32 const value,U32 count)187*01826a49SYabin Cui U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
188*01826a49SYabin Cui assert(count < 32);
189*01826a49SYabin Cui count &= 0x1F; /* for fickle pattern recognition */
190*01826a49SYabin Cui return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
191*01826a49SYabin Cui }
192*01826a49SYabin Cui
193*01826a49SYabin Cui MEM_STATIC
ZSTD_rotateRight_U16(U16 const value,U32 count)194*01826a49SYabin Cui U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
195*01826a49SYabin Cui assert(count < 16);
196*01826a49SYabin Cui count &= 0x0F; /* for fickle pattern recognition */
197*01826a49SYabin Cui return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
198*01826a49SYabin Cui }
199*01826a49SYabin Cui
200*01826a49SYabin Cui #endif /* ZSTD_BITS_H */
201