xref: /aosp_15_r20/external/flac/src/libFLAC/include/private/bitmath.h (revision 600f14f40d737144c998e2ec7a483122d3776fbc)
1*600f14f4SXin Li /* libFLAC - Free Lossless Audio Codec library
2*600f14f4SXin Li  * Copyright (C) 2001-2009  Josh Coalson
3*600f14f4SXin Li  * Copyright (C) 2011-2023  Xiph.Org Foundation
4*600f14f4SXin Li  *
5*600f14f4SXin Li  * Redistribution and use in source and binary forms, with or without
6*600f14f4SXin Li  * modification, are permitted provided that the following conditions
7*600f14f4SXin Li  * are met:
8*600f14f4SXin Li  *
9*600f14f4SXin Li  * - Redistributions of source code must retain the above copyright
10*600f14f4SXin Li  * notice, this list of conditions and the following disclaimer.
11*600f14f4SXin Li  *
12*600f14f4SXin Li  * - Redistributions in binary form must reproduce the above copyright
13*600f14f4SXin Li  * notice, this list of conditions and the following disclaimer in the
14*600f14f4SXin Li  * documentation and/or other materials provided with the distribution.
15*600f14f4SXin Li  *
16*600f14f4SXin Li  * - Neither the name of the Xiph.org Foundation nor the names of its
17*600f14f4SXin Li  * contributors may be used to endorse or promote products derived from
18*600f14f4SXin Li  * this software without specific prior written permission.
19*600f14f4SXin Li  *
20*600f14f4SXin Li  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21*600f14f4SXin Li  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22*600f14f4SXin Li  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23*600f14f4SXin Li  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
24*600f14f4SXin Li  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25*600f14f4SXin Li  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26*600f14f4SXin Li  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27*600f14f4SXin Li  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28*600f14f4SXin Li  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29*600f14f4SXin Li  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30*600f14f4SXin Li  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*600f14f4SXin Li  */
32*600f14f4SXin Li 
33*600f14f4SXin Li #ifndef FLAC__PRIVATE__BITMATH_H
34*600f14f4SXin Li #define FLAC__PRIVATE__BITMATH_H
35*600f14f4SXin Li 
36*600f14f4SXin Li #include "FLAC/ordinals.h"
37*600f14f4SXin Li #include "FLAC/assert.h"
38*600f14f4SXin Li 
39*600f14f4SXin Li #include "share/compat.h"
40*600f14f4SXin Li 
41*600f14f4SXin Li #if defined(_MSC_VER)
42*600f14f4SXin Li #include <intrin.h> /* for _BitScanReverse* */
43*600f14f4SXin Li #endif
44*600f14f4SXin Li 
45*600f14f4SXin Li /* Will never be emitted for MSVC, GCC, Intel compilers */
FLAC__clz_soft_uint32(FLAC__uint32 word)46*600f14f4SXin Li static inline uint32_t FLAC__clz_soft_uint32(FLAC__uint32 word)
47*600f14f4SXin Li {
48*600f14f4SXin Li 	static const uint8_t byte_to_unary_table[] = {
49*600f14f4SXin Li 	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
50*600f14f4SXin Li 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
51*600f14f4SXin Li 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52*600f14f4SXin Li 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
53*600f14f4SXin Li 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54*600f14f4SXin Li 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55*600f14f4SXin Li 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56*600f14f4SXin Li 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57*600f14f4SXin Li 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58*600f14f4SXin Li 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59*600f14f4SXin Li 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60*600f14f4SXin Li 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61*600f14f4SXin Li 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62*600f14f4SXin Li 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63*600f14f4SXin Li 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64*600f14f4SXin Li 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
65*600f14f4SXin Li 	};
66*600f14f4SXin Li 
67*600f14f4SXin Li 	return word > 0xffffff ? byte_to_unary_table[word >> 24] :
68*600f14f4SXin Li 		word > 0xffff ? byte_to_unary_table[word >> 16] + 8 :
69*600f14f4SXin Li 		word > 0xff ? byte_to_unary_table[word >> 8] + 16 :
70*600f14f4SXin Li 		byte_to_unary_table[word] + 24;
71*600f14f4SXin Li }
72*600f14f4SXin Li 
FLAC__clz_uint32(FLAC__uint32 v)73*600f14f4SXin Li static inline uint32_t FLAC__clz_uint32(FLAC__uint32 v)
74*600f14f4SXin Li {
75*600f14f4SXin Li /* Never used with input 0 */
76*600f14f4SXin Li 	FLAC__ASSERT(v > 0);
77*600f14f4SXin Li #if defined(__INTEL_COMPILER)
78*600f14f4SXin Li 	return _bit_scan_reverse(v) ^ 31U;
79*600f14f4SXin Li #elif defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
80*600f14f4SXin Li /* This will translate either to (bsr ^ 31U), clz , ctlz, cntlz, lzcnt depending on
81*600f14f4SXin Li  * -march= setting or to a software routine in exotic machines. */
82*600f14f4SXin Li 	return __builtin_clz(v);
83*600f14f4SXin Li #elif defined(_MSC_VER)
84*600f14f4SXin Li 	{
85*600f14f4SXin Li 		uint32_t idx;
86*600f14f4SXin Li 		_BitScanReverse(&idx, v);
87*600f14f4SXin Li 		return idx ^ 31U;
88*600f14f4SXin Li 	}
89*600f14f4SXin Li #else
90*600f14f4SXin Li 	return FLAC__clz_soft_uint32(v);
91*600f14f4SXin Li #endif
92*600f14f4SXin Li }
93*600f14f4SXin Li 
94*600f14f4SXin Li /* Used when 64-bit bsr/clz is unavailable; can use 32-bit bsr/clz when possible */
FLAC__clz_soft_uint64(FLAC__uint64 word)95*600f14f4SXin Li static inline uint32_t FLAC__clz_soft_uint64(FLAC__uint64 word)
96*600f14f4SXin Li {
97*600f14f4SXin Li 	return (FLAC__uint32)(word>>32) ? FLAC__clz_uint32((FLAC__uint32)(word>>32)) :
98*600f14f4SXin Li 		FLAC__clz_uint32((FLAC__uint32)word) + 32;
99*600f14f4SXin Li }
100*600f14f4SXin Li 
FLAC__clz_uint64(FLAC__uint64 v)101*600f14f4SXin Li static inline uint32_t FLAC__clz_uint64(FLAC__uint64 v)
102*600f14f4SXin Li {
103*600f14f4SXin Li 	/* Never used with input 0 */
104*600f14f4SXin Li 	FLAC__ASSERT(v > 0);
105*600f14f4SXin Li #if defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
106*600f14f4SXin Li 	return __builtin_clzll(v);
107*600f14f4SXin Li #elif (defined(__INTEL_COMPILER) || defined(_MSC_VER)) && (defined(_M_IA64) || defined(_M_X64))
108*600f14f4SXin Li 	{
109*600f14f4SXin Li 		uint32_t idx;
110*600f14f4SXin Li 		_BitScanReverse64(&idx, v);
111*600f14f4SXin Li 		return idx ^ 63U;
112*600f14f4SXin Li 	}
113*600f14f4SXin Li #else
114*600f14f4SXin Li 	return FLAC__clz_soft_uint64(v);
115*600f14f4SXin Li #endif
116*600f14f4SXin Li }
117*600f14f4SXin Li 
118*600f14f4SXin Li /* These two functions work with input 0 */
FLAC__clz2_uint32(FLAC__uint32 v)119*600f14f4SXin Li static inline uint32_t FLAC__clz2_uint32(FLAC__uint32 v)
120*600f14f4SXin Li {
121*600f14f4SXin Li 	if (!v)
122*600f14f4SXin Li 		return 32;
123*600f14f4SXin Li 	return FLAC__clz_uint32(v);
124*600f14f4SXin Li }
125*600f14f4SXin Li 
FLAC__clz2_uint64(FLAC__uint64 v)126*600f14f4SXin Li static inline uint32_t FLAC__clz2_uint64(FLAC__uint64 v)
127*600f14f4SXin Li {
128*600f14f4SXin Li 	if (!v)
129*600f14f4SXin Li 		return 64;
130*600f14f4SXin Li 	return FLAC__clz_uint64(v);
131*600f14f4SXin Li }
132*600f14f4SXin Li 
133*600f14f4SXin Li /* An example of what FLAC__bitmath_ilog2() computes:
134*600f14f4SXin Li  *
135*600f14f4SXin Li  * ilog2( 0) = assertion failure
136*600f14f4SXin Li  * ilog2( 1) = 0
137*600f14f4SXin Li  * ilog2( 2) = 1
138*600f14f4SXin Li  * ilog2( 3) = 1
139*600f14f4SXin Li  * ilog2( 4) = 2
140*600f14f4SXin Li  * ilog2( 5) = 2
141*600f14f4SXin Li  * ilog2( 6) = 2
142*600f14f4SXin Li  * ilog2( 7) = 2
143*600f14f4SXin Li  * ilog2( 8) = 3
144*600f14f4SXin Li  * ilog2( 9) = 3
145*600f14f4SXin Li  * ilog2(10) = 3
146*600f14f4SXin Li  * ilog2(11) = 3
147*600f14f4SXin Li  * ilog2(12) = 3
148*600f14f4SXin Li  * ilog2(13) = 3
149*600f14f4SXin Li  * ilog2(14) = 3
150*600f14f4SXin Li  * ilog2(15) = 3
151*600f14f4SXin Li  * ilog2(16) = 4
152*600f14f4SXin Li  * ilog2(17) = 4
153*600f14f4SXin Li  * ilog2(18) = 4
154*600f14f4SXin Li  */
155*600f14f4SXin Li 
FLAC__bitmath_ilog2(FLAC__uint32 v)156*600f14f4SXin Li static inline uint32_t FLAC__bitmath_ilog2(FLAC__uint32 v)
157*600f14f4SXin Li {
158*600f14f4SXin Li 	FLAC__ASSERT(v > 0);
159*600f14f4SXin Li #if defined(__INTEL_COMPILER)
160*600f14f4SXin Li 	return _bit_scan_reverse(v);
161*600f14f4SXin Li #elif defined(_MSC_VER)
162*600f14f4SXin Li 	{
163*600f14f4SXin Li 		uint32_t idx;
164*600f14f4SXin Li 		_BitScanReverse(&idx, v);
165*600f14f4SXin Li 		return idx;
166*600f14f4SXin Li 	}
167*600f14f4SXin Li #else
168*600f14f4SXin Li 	return FLAC__clz_uint32(v) ^ 31U;
169*600f14f4SXin Li #endif
170*600f14f4SXin Li }
171*600f14f4SXin Li 
FLAC__bitmath_ilog2_wide(FLAC__uint64 v)172*600f14f4SXin Li static inline uint32_t FLAC__bitmath_ilog2_wide(FLAC__uint64 v)
173*600f14f4SXin Li {
174*600f14f4SXin Li 	FLAC__ASSERT(v > 0);
175*600f14f4SXin Li #if defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
176*600f14f4SXin Li 	return __builtin_clzll(v) ^ 63U;
177*600f14f4SXin Li /* Sorry, only supported in x64/Itanium.. and both have fast FPU which makes integer-only encoder pointless */
178*600f14f4SXin Li #elif (defined(__INTEL_COMPILER) || defined(_MSC_VER)) && (defined(_M_IA64) || defined(_M_X64))
179*600f14f4SXin Li 	{
180*600f14f4SXin Li 		uint32_t idx;
181*600f14f4SXin Li 		_BitScanReverse64(&idx, v);
182*600f14f4SXin Li 		return idx;
183*600f14f4SXin Li 	}
184*600f14f4SXin Li #else
185*600f14f4SXin Li /*  Brain-damaged compilers will use the fastest possible way that is,
186*600f14f4SXin Li 	de Bruijn sequences (http://supertech.csail.mit.edu/papers/debruijn.pdf)
187*600f14f4SXin Li 	(C) Timothy B. Terriberry ([email protected]) 2001-2009 CC0 (Public domain).
188*600f14f4SXin Li */
189*600f14f4SXin Li 	{
190*600f14f4SXin Li 		static const uint8_t DEBRUIJN_IDX64[64]={
191*600f14f4SXin Li 			0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
192*600f14f4SXin Li 			5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
193*600f14f4SXin Li 			63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
194*600f14f4SXin Li 			62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
195*600f14f4SXin Li 		};
196*600f14f4SXin Li 		v|= v>>1;
197*600f14f4SXin Li 		v|= v>>2;
198*600f14f4SXin Li 		v|= v>>4;
199*600f14f4SXin Li 		v|= v>>8;
200*600f14f4SXin Li 		v|= v>>16;
201*600f14f4SXin Li 		v|= v>>32;
202*600f14f4SXin Li 		v= (v>>1)+1;
203*600f14f4SXin Li 		return DEBRUIJN_IDX64[v*FLAC__U64L(0x218A392CD3D5DBF)>>58&0x3F];
204*600f14f4SXin Li 	}
205*600f14f4SXin Li #endif
206*600f14f4SXin Li }
207*600f14f4SXin Li 
208*600f14f4SXin Li uint32_t FLAC__bitmath_silog2(FLAC__int64 v);
209*600f14f4SXin Li 
210*600f14f4SXin Li #endif
211