xref: /aosp_15_r20/external/libopus/silk/SigProc_FIX.h (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /***********************************************************************
2*a58d3d2aSXin Li Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without
4*a58d3d2aSXin Li modification, are permitted provided that the following conditions
5*a58d3d2aSXin Li are met:
6*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright notice,
7*a58d3d2aSXin Li this list of conditions and the following disclaimer.
8*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright
9*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the
10*a58d3d2aSXin Li documentation and/or other materials provided with the distribution.
11*a58d3d2aSXin Li - Neither the name of Internet Society, IETF or IETF Trust, nor the
12*a58d3d2aSXin Li names of specific contributors, may be used to endorse or promote
13*a58d3d2aSXin Li products derived from this software without specific prior written
14*a58d3d2aSXin Li permission.
15*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16*a58d3d2aSXin Li AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17*a58d3d2aSXin Li IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18*a58d3d2aSXin Li ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19*a58d3d2aSXin Li LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20*a58d3d2aSXin Li CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21*a58d3d2aSXin Li SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22*a58d3d2aSXin Li INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23*a58d3d2aSXin Li CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24*a58d3d2aSXin Li ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25*a58d3d2aSXin Li POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li ***********************************************************************/
27*a58d3d2aSXin Li 
28*a58d3d2aSXin Li #ifndef SILK_SIGPROC_FIX_H
29*a58d3d2aSXin Li #define SILK_SIGPROC_FIX_H
30*a58d3d2aSXin Li 
31*a58d3d2aSXin Li #ifdef  __cplusplus
32*a58d3d2aSXin Li extern "C"
33*a58d3d2aSXin Li {
34*a58d3d2aSXin Li #endif
35*a58d3d2aSXin Li 
36*a58d3d2aSXin Li /*#define silk_MACRO_COUNT */          /* Used to enable WMOPS counting */
37*a58d3d2aSXin Li 
38*a58d3d2aSXin Li #define SILK_MAX_ORDER_LPC            24            /* max order of the LPC analysis in schur() and k2a() */
39*a58d3d2aSXin Li 
40*a58d3d2aSXin Li #include <string.h>                                 /* for memset(), memcpy(), memmove() */
41*a58d3d2aSXin Li #include "typedef.h"
42*a58d3d2aSXin Li #include "resampler_structs.h"
43*a58d3d2aSXin Li #include "macros.h"
44*a58d3d2aSXin Li #include "cpu_support.h"
45*a58d3d2aSXin Li 
46*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_SSE4_1)
47*a58d3d2aSXin Li #include "x86/SigProc_FIX_sse.h"
48*a58d3d2aSXin Li #endif
49*a58d3d2aSXin Li 
50*a58d3d2aSXin Li #if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
51*a58d3d2aSXin Li #include "arm/biquad_alt_arm.h"
52*a58d3d2aSXin Li #include "arm/LPC_inv_pred_gain_arm.h"
53*a58d3d2aSXin Li #endif
54*a58d3d2aSXin Li 
55*a58d3d2aSXin Li /********************************************************************/
56*a58d3d2aSXin Li /*                    SIGNAL PROCESSING FUNCTIONS                   */
57*a58d3d2aSXin Li /********************************************************************/
58*a58d3d2aSXin Li 
59*a58d3d2aSXin Li /*!
60*a58d3d2aSXin Li  * Initialize/reset the resampler state for a given pair of input/output sampling rates
61*a58d3d2aSXin Li */
62*a58d3d2aSXin Li opus_int silk_resampler_init(
63*a58d3d2aSXin Li     silk_resampler_state_struct *S,                 /* I/O  Resampler state                                             */
64*a58d3d2aSXin Li     opus_int32                  Fs_Hz_in,           /* I    Input sampling rate (Hz)                                    */
65*a58d3d2aSXin Li     opus_int32                  Fs_Hz_out,          /* I    Output sampling rate (Hz)                                   */
66*a58d3d2aSXin Li     opus_int                    forEnc              /* I    If 1: encoder; if 0: decoder                                */
67*a58d3d2aSXin Li );
68*a58d3d2aSXin Li 
69*a58d3d2aSXin Li /*!
70*a58d3d2aSXin Li  * Resampler: convert from one sampling rate to another
71*a58d3d2aSXin Li  */
72*a58d3d2aSXin Li opus_int silk_resampler(
73*a58d3d2aSXin Li     silk_resampler_state_struct *S,                 /* I/O  Resampler state                                             */
74*a58d3d2aSXin Li     opus_int16                  out[],              /* O    Output signal                                               */
75*a58d3d2aSXin Li     const opus_int16            in[],               /* I    Input signal                                                */
76*a58d3d2aSXin Li     opus_int32                  inLen               /* I    Number of input samples                                     */
77*a58d3d2aSXin Li );
78*a58d3d2aSXin Li 
79*a58d3d2aSXin Li /*!
80*a58d3d2aSXin Li * Downsample 2x, mediocre quality
81*a58d3d2aSXin Li */
82*a58d3d2aSXin Li void silk_resampler_down2(
83*a58d3d2aSXin Li     opus_int32                  *S,                 /* I/O  State vector [ 2 ]                                          */
84*a58d3d2aSXin Li     opus_int16                  *out,               /* O    Output signal [ len ]                                       */
85*a58d3d2aSXin Li     const opus_int16            *in,                /* I    Input signal [ floor(len/2) ]                               */
86*a58d3d2aSXin Li     opus_int32                  inLen               /* I    Number of input samples                                     */
87*a58d3d2aSXin Li );
88*a58d3d2aSXin Li 
89*a58d3d2aSXin Li /*!
90*a58d3d2aSXin Li  * Downsample by a factor 2/3, low quality
91*a58d3d2aSXin Li */
92*a58d3d2aSXin Li void silk_resampler_down2_3(
93*a58d3d2aSXin Li     opus_int32                  *S,                 /* I/O  State vector [ 6 ]                                          */
94*a58d3d2aSXin Li     opus_int16                  *out,               /* O    Output signal [ floor(2*inLen/3) ]                          */
95*a58d3d2aSXin Li     const opus_int16            *in,                /* I    Input signal [ inLen ]                                      */
96*a58d3d2aSXin Li     opus_int32                  inLen               /* I    Number of input samples                                     */
97*a58d3d2aSXin Li );
98*a58d3d2aSXin Li 
99*a58d3d2aSXin Li /*!
100*a58d3d2aSXin Li  * second order ARMA filter;
101*a58d3d2aSXin Li  * slower than biquad() but uses more precise coefficients
102*a58d3d2aSXin Li  * can handle (slowly) varying coefficients
103*a58d3d2aSXin Li  */
104*a58d3d2aSXin Li void silk_biquad_alt_stride1(
105*a58d3d2aSXin Li     const opus_int16            *in,                /* I     input signal                                               */
106*a58d3d2aSXin Li     const opus_int32            *B_Q28,             /* I     MA coefficients [3]                                        */
107*a58d3d2aSXin Li     const opus_int32            *A_Q28,             /* I     AR coefficients [2]                                        */
108*a58d3d2aSXin Li     opus_int32                  *S,                 /* I/O   State vector [2]                                           */
109*a58d3d2aSXin Li     opus_int16                  *out,               /* O     output signal                                              */
110*a58d3d2aSXin Li     const opus_int32            len                 /* I     signal length (must be even)                               */
111*a58d3d2aSXin Li );
112*a58d3d2aSXin Li 
113*a58d3d2aSXin Li void silk_biquad_alt_stride2_c(
114*a58d3d2aSXin Li     const opus_int16            *in,                /* I     input signal                                               */
115*a58d3d2aSXin Li     const opus_int32            *B_Q28,             /* I     MA coefficients [3]                                        */
116*a58d3d2aSXin Li     const opus_int32            *A_Q28,             /* I     AR coefficients [2]                                        */
117*a58d3d2aSXin Li     opus_int32                  *S,                 /* I/O   State vector [4]                                           */
118*a58d3d2aSXin Li     opus_int16                  *out,               /* O     output signal                                              */
119*a58d3d2aSXin Li     const opus_int32            len                 /* I     signal length (must be even)                               */
120*a58d3d2aSXin Li );
121*a58d3d2aSXin Li 
122*a58d3d2aSXin Li /* Variable order MA prediction error filter. */
123*a58d3d2aSXin Li void silk_LPC_analysis_filter(
124*a58d3d2aSXin Li     opus_int16                  *out,               /* O    Output signal                                               */
125*a58d3d2aSXin Li     const opus_int16            *in,                /* I    Input signal                                                */
126*a58d3d2aSXin Li     const opus_int16            *B,                 /* I    MA prediction coefficients, Q12 [order]                     */
127*a58d3d2aSXin Li     const opus_int32            len,                /* I    Signal length                                               */
128*a58d3d2aSXin Li     const opus_int32            d,                  /* I    Filter order                                                */
129*a58d3d2aSXin Li     int                         arch                /* I    Run-time architecture                                       */
130*a58d3d2aSXin Li );
131*a58d3d2aSXin Li 
132*a58d3d2aSXin Li /* Chirp (bandwidth expand) LP AR filter */
133*a58d3d2aSXin Li void silk_bwexpander(
134*a58d3d2aSXin Li     opus_int16                  *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
135*a58d3d2aSXin Li     const opus_int              d,                  /* I    Length of ar                                                */
136*a58d3d2aSXin Li     opus_int32                  chirp_Q16           /* I    Chirp factor (typically in the range 0 to 1)                */
137*a58d3d2aSXin Li );
138*a58d3d2aSXin Li 
139*a58d3d2aSXin Li /* Chirp (bandwidth expand) LP AR filter */
140*a58d3d2aSXin Li void silk_bwexpander_32(
141*a58d3d2aSXin Li     opus_int32                  *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
142*a58d3d2aSXin Li     const opus_int              d,                  /* I    Length of ar                                                */
143*a58d3d2aSXin Li     opus_int32                  chirp_Q16           /* I    Chirp factor in Q16                                         */
144*a58d3d2aSXin Li );
145*a58d3d2aSXin Li 
146*a58d3d2aSXin Li /* Compute inverse of LPC prediction gain, and                           */
147*a58d3d2aSXin Li /* test if LPC coefficients are stable (all poles within unit circle)    */
148*a58d3d2aSXin Li opus_int32 silk_LPC_inverse_pred_gain_c(            /* O   Returns inverse prediction gain in energy domain, Q30        */
149*a58d3d2aSXin Li     const opus_int16            *A_Q12,             /* I   Prediction coefficients, Q12 [order]                         */
150*a58d3d2aSXin Li     const opus_int              order               /* I   Prediction order                                             */
151*a58d3d2aSXin Li );
152*a58d3d2aSXin Li 
153*a58d3d2aSXin Li /* Split signal in two decimated bands using first-order allpass filters */
154*a58d3d2aSXin Li void silk_ana_filt_bank_1(
155*a58d3d2aSXin Li     const opus_int16            *in,                /* I    Input signal [N]                                            */
156*a58d3d2aSXin Li     opus_int32                  *S,                 /* I/O  State vector [2]                                            */
157*a58d3d2aSXin Li     opus_int16                  *outL,              /* O    Low band [N/2]                                              */
158*a58d3d2aSXin Li     opus_int16                  *outH,              /* O    High band [N/2]                                             */
159*a58d3d2aSXin Li     const opus_int32            N                   /* I    Number of input samples                                     */
160*a58d3d2aSXin Li );
161*a58d3d2aSXin Li 
162*a58d3d2aSXin Li #if !defined(OVERRIDE_silk_biquad_alt_stride2)
163*a58d3d2aSXin Li #define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), silk_biquad_alt_stride2_c(in, B_Q28, A_Q28, S, out, len))
164*a58d3d2aSXin Li #endif
165*a58d3d2aSXin Li 
166*a58d3d2aSXin Li #if !defined(OVERRIDE_silk_LPC_inverse_pred_gain)
167*a58d3d2aSXin Li #define silk_LPC_inverse_pred_gain(A_Q12, order, arch)     ((void)(arch), silk_LPC_inverse_pred_gain_c(A_Q12, order))
168*a58d3d2aSXin Li #endif
169*a58d3d2aSXin Li 
170*a58d3d2aSXin Li /********************************************************************/
171*a58d3d2aSXin Li /*                        SCALAR FUNCTIONS                          */
172*a58d3d2aSXin Li /********************************************************************/
173*a58d3d2aSXin Li 
174*a58d3d2aSXin Li /* Approximation of 128 * log2() (exact inverse of approx 2^() below) */
175*a58d3d2aSXin Li /* Convert input to a log scale    */
176*a58d3d2aSXin Li opus_int32 silk_lin2log(
177*a58d3d2aSXin Li     const opus_int32            inLin               /* I  input in linear scale                                         */
178*a58d3d2aSXin Li );
179*a58d3d2aSXin Li 
180*a58d3d2aSXin Li /* Approximation of a sigmoid function */
181*a58d3d2aSXin Li opus_int silk_sigm_Q15(
182*a58d3d2aSXin Li     opus_int                    in_Q5               /* I                                                                */
183*a58d3d2aSXin Li );
184*a58d3d2aSXin Li 
185*a58d3d2aSXin Li /* Approximation of 2^() (exact inverse of approx log2() above) */
186*a58d3d2aSXin Li /* Convert input to a linear scale */
187*a58d3d2aSXin Li opus_int32 silk_log2lin(
188*a58d3d2aSXin Li     const opus_int32            inLog_Q7            /* I  input on log scale                                            */
189*a58d3d2aSXin Li );
190*a58d3d2aSXin Li 
191*a58d3d2aSXin Li /* Compute number of bits to right shift the sum of squares of a vector    */
192*a58d3d2aSXin Li /* of int16s to make it fit in an int32                                    */
193*a58d3d2aSXin Li void silk_sum_sqr_shift(
194*a58d3d2aSXin Li     opus_int32                  *energy,            /* O   Energy of x, after shifting to the right                     */
195*a58d3d2aSXin Li     opus_int                    *shift,             /* O   Number of bits right shift applied to energy                 */
196*a58d3d2aSXin Li     const opus_int16            *x,                 /* I   Input vector                                                 */
197*a58d3d2aSXin Li     opus_int                    len                 /* I   Length of input vector                                       */
198*a58d3d2aSXin Li );
199*a58d3d2aSXin Li 
200*a58d3d2aSXin Li /* Calculates the reflection coefficients from the correlation sequence    */
201*a58d3d2aSXin Li /* Faster than schur64(), but much less accurate.                          */
202*a58d3d2aSXin Li /* uses SMLAWB(), requiring armv5E and higher.                             */
203*a58d3d2aSXin Li opus_int32 silk_schur(                              /* O    Returns residual energy                                     */
204*a58d3d2aSXin Li     opus_int16                  *rc_Q15,            /* O    reflection coefficients [order] Q15                         */
205*a58d3d2aSXin Li     const opus_int32            *c,                 /* I    correlations [order+1]                                      */
206*a58d3d2aSXin Li     const opus_int32            order               /* I    prediction order                                            */
207*a58d3d2aSXin Li );
208*a58d3d2aSXin Li 
209*a58d3d2aSXin Li /* Calculates the reflection coefficients from the correlation sequence    */
210*a58d3d2aSXin Li /* Slower than schur(), but more accurate.                                 */
211*a58d3d2aSXin Li /* Uses SMULL(), available on armv4                                        */
212*a58d3d2aSXin Li opus_int32 silk_schur64(                            /* O    returns residual energy                                     */
213*a58d3d2aSXin Li     opus_int32                  rc_Q16[],           /* O    Reflection coefficients [order] Q16                         */
214*a58d3d2aSXin Li     const opus_int32            c[],                /* I    Correlations [order+1]                                      */
215*a58d3d2aSXin Li     opus_int32                  order               /* I    Prediction order                                            */
216*a58d3d2aSXin Li );
217*a58d3d2aSXin Li 
218*a58d3d2aSXin Li /* Step up function, converts reflection coefficients to prediction coefficients */
219*a58d3d2aSXin Li void silk_k2a(
220*a58d3d2aSXin Li     opus_int32                  *A_Q24,             /* O    Prediction coefficients [order] Q24                         */
221*a58d3d2aSXin Li     const opus_int16            *rc_Q15,            /* I    Reflection coefficients [order] Q15                         */
222*a58d3d2aSXin Li     const opus_int32            order               /* I    Prediction order                                            */
223*a58d3d2aSXin Li );
224*a58d3d2aSXin Li 
225*a58d3d2aSXin Li /* Step up function, converts reflection coefficients to prediction coefficients */
226*a58d3d2aSXin Li void silk_k2a_Q16(
227*a58d3d2aSXin Li     opus_int32                  *A_Q24,             /* O    Prediction coefficients [order] Q24                         */
228*a58d3d2aSXin Li     const opus_int32            *rc_Q16,            /* I    Reflection coefficients [order] Q16                         */
229*a58d3d2aSXin Li     const opus_int32            order               /* I    Prediction order                                            */
230*a58d3d2aSXin Li );
231*a58d3d2aSXin Li 
232*a58d3d2aSXin Li /* Apply sine window to signal vector.                              */
233*a58d3d2aSXin Li /* Window types:                                                    */
234*a58d3d2aSXin Li /*    1 -> sine window from 0 to pi/2                               */
235*a58d3d2aSXin Li /*    2 -> sine window from pi/2 to pi                              */
236*a58d3d2aSXin Li /* every other sample of window is linearly interpolated, for speed */
237*a58d3d2aSXin Li void silk_apply_sine_window(
238*a58d3d2aSXin Li     opus_int16                  px_win[],           /* O    Pointer to windowed signal                                  */
239*a58d3d2aSXin Li     const opus_int16            px[],               /* I    Pointer to input signal                                     */
240*a58d3d2aSXin Li     const opus_int              win_type,           /* I    Selects a window type                                       */
241*a58d3d2aSXin Li     const opus_int              length              /* I    Window length, multiple of 4                                */
242*a58d3d2aSXin Li );
243*a58d3d2aSXin Li 
244*a58d3d2aSXin Li /* Compute autocorrelation */
245*a58d3d2aSXin Li void silk_autocorr(
246*a58d3d2aSXin Li     opus_int32                  *results,           /* O    Result (length correlationCount)                            */
247*a58d3d2aSXin Li     opus_int                    *scale,             /* O    Scaling of the correlation vector                           */
248*a58d3d2aSXin Li     const opus_int16            *inputData,         /* I    Input data to correlate                                     */
249*a58d3d2aSXin Li     const opus_int              inputDataSize,      /* I    Length of input                                             */
250*a58d3d2aSXin Li     const opus_int              correlationCount,   /* I    Number of correlation taps to compute                       */
251*a58d3d2aSXin Li     int                         arch                /* I    Run-time architecture                                       */
252*a58d3d2aSXin Li );
253*a58d3d2aSXin Li 
254*a58d3d2aSXin Li void silk_decode_pitch(
255*a58d3d2aSXin Li     opus_int16                  lagIndex,           /* I                                                                */
256*a58d3d2aSXin Li     opus_int8                   contourIndex,       /* O                                                                */
257*a58d3d2aSXin Li     opus_int                    pitch_lags[],       /* O    4 pitch values                                              */
258*a58d3d2aSXin Li     const opus_int              Fs_kHz,             /* I    sampling frequency (kHz)                                    */
259*a58d3d2aSXin Li     const opus_int              nb_subfr            /* I    number of sub frames                                        */
260*a58d3d2aSXin Li );
261*a58d3d2aSXin Li 
262*a58d3d2aSXin Li opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
263*a58d3d2aSXin Li     const opus_int16            *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
264*a58d3d2aSXin Li     opus_int                    *pitch_out,         /* O    4 pitch lag values                                          */
265*a58d3d2aSXin Li     opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
266*a58d3d2aSXin Li     opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
267*a58d3d2aSXin Li     opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
268*a58d3d2aSXin Li     opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
269*a58d3d2aSXin Li     const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
270*a58d3d2aSXin Li     const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
271*a58d3d2aSXin Li     const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
272*a58d3d2aSXin Li     const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
273*a58d3d2aSXin Li     const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
274*a58d3d2aSXin Li     int                         arch                /* I    Run-time architecture                                       */
275*a58d3d2aSXin Li );
276*a58d3d2aSXin Li 
277*a58d3d2aSXin Li /* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients      */
278*a58d3d2aSXin Li /* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */
279*a58d3d2aSXin Li void silk_A2NLSF(
280*a58d3d2aSXin Li     opus_int16                  *NLSF,              /* O    Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */
281*a58d3d2aSXin Li     opus_int32                  *a_Q16,             /* I/O  Monic whitening filter coefficients in Q16 [d]              */
282*a58d3d2aSXin Li     const opus_int              d                   /* I    Filter order (must be even)                                 */
283*a58d3d2aSXin Li );
284*a58d3d2aSXin Li 
285*a58d3d2aSXin Li /* compute whitening filter coefficients from normalized line spectral frequencies */
286*a58d3d2aSXin Li void silk_NLSF2A(
287*a58d3d2aSXin Li     opus_int16                  *a_Q12,             /* O    monic whitening filter coefficients in Q12,  [ d ]          */
288*a58d3d2aSXin Li     const opus_int16            *NLSF,              /* I    normalized line spectral frequencies in Q15, [ d ]          */
289*a58d3d2aSXin Li     const opus_int              d,                  /* I    filter order (should be even)                               */
290*a58d3d2aSXin Li     int                         arch                /* I    Run-time architecture                                       */
291*a58d3d2aSXin Li );
292*a58d3d2aSXin Li 
293*a58d3d2aSXin Li /* Convert int32 coefficients to int16 coefs and make sure there's no wrap-around */
294*a58d3d2aSXin Li void silk_LPC_fit(
295*a58d3d2aSXin Li     opus_int16                  *a_QOUT,            /* O    Output signal                                               */
296*a58d3d2aSXin Li     opus_int32                  *a_QIN,             /* I/O  Input signal                                                */
297*a58d3d2aSXin Li     const opus_int              QOUT,               /* I    Input Q domain                                              */
298*a58d3d2aSXin Li     const opus_int              QIN,                /* I    Input Q domain                                              */
299*a58d3d2aSXin Li     const opus_int              d                   /* I    Filter order                                                */
300*a58d3d2aSXin Li );
301*a58d3d2aSXin Li 
302*a58d3d2aSXin Li void silk_insertion_sort_increasing(
303*a58d3d2aSXin Li     opus_int32                  *a,                 /* I/O   Unsorted / Sorted vector                                   */
304*a58d3d2aSXin Li     opus_int                    *idx,               /* O     Index vector for the sorted elements                       */
305*a58d3d2aSXin Li     const opus_int              L,                  /* I     Vector length                                              */
306*a58d3d2aSXin Li     const opus_int              K                   /* I     Number of correctly sorted positions                       */
307*a58d3d2aSXin Li );
308*a58d3d2aSXin Li 
309*a58d3d2aSXin Li void silk_insertion_sort_decreasing_int16(
310*a58d3d2aSXin Li     opus_int16                  *a,                 /* I/O   Unsorted / Sorted vector                                   */
311*a58d3d2aSXin Li     opus_int                    *idx,               /* O     Index vector for the sorted elements                       */
312*a58d3d2aSXin Li     const opus_int              L,                  /* I     Vector length                                              */
313*a58d3d2aSXin Li     const opus_int              K                   /* I     Number of correctly sorted positions                       */
314*a58d3d2aSXin Li );
315*a58d3d2aSXin Li 
316*a58d3d2aSXin Li void silk_insertion_sort_increasing_all_values_int16(
317*a58d3d2aSXin Li      opus_int16                 *a,                 /* I/O   Unsorted / Sorted vector                                   */
318*a58d3d2aSXin Li      const opus_int             L                   /* I     Vector length                                              */
319*a58d3d2aSXin Li );
320*a58d3d2aSXin Li 
321*a58d3d2aSXin Li /* NLSF stabilizer, for a single input data vector */
322*a58d3d2aSXin Li void silk_NLSF_stabilize(
323*a58d3d2aSXin Li           opus_int16            *NLSF_Q15,          /* I/O   Unstable/stabilized normalized LSF vector in Q15 [L]       */
324*a58d3d2aSXin Li     const opus_int16            *NDeltaMin_Q15,     /* I     Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1]   */
325*a58d3d2aSXin Li     const opus_int              L                   /* I     Number of NLSF parameters in the input vector              */
326*a58d3d2aSXin Li );
327*a58d3d2aSXin Li 
328*a58d3d2aSXin Li /* Laroia low complexity NLSF weights */
329*a58d3d2aSXin Li void silk_NLSF_VQ_weights_laroia(
330*a58d3d2aSXin Li     opus_int16                  *pNLSFW_Q_OUT,      /* O     Pointer to input vector weights [D]                        */
331*a58d3d2aSXin Li     const opus_int16            *pNLSF_Q15,         /* I     Pointer to input vector         [D]                        */
332*a58d3d2aSXin Li     const opus_int              D                   /* I     Input vector dimension (even)                              */
333*a58d3d2aSXin Li );
334*a58d3d2aSXin Li 
335*a58d3d2aSXin Li /* Compute reflection coefficients from input signal */
336*a58d3d2aSXin Li void silk_burg_modified_c(
337*a58d3d2aSXin Li     opus_int32                  *res_nrg,           /* O    Residual energy                                             */
338*a58d3d2aSXin Li     opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
339*a58d3d2aSXin Li     opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
340*a58d3d2aSXin Li     const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
341*a58d3d2aSXin Li     const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
342*a58d3d2aSXin Li     const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
343*a58d3d2aSXin Li     const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
344*a58d3d2aSXin Li     const opus_int              D,                  /* I    Order                                                       */
345*a58d3d2aSXin Li     int                         arch                /* I    Run-time architecture                                       */
346*a58d3d2aSXin Li );
347*a58d3d2aSXin Li 
348*a58d3d2aSXin Li /* Copy and multiply a vector by a constant */
349*a58d3d2aSXin Li void silk_scale_copy_vector16(
350*a58d3d2aSXin Li     opus_int16                  *data_out,
351*a58d3d2aSXin Li     const opus_int16            *data_in,
352*a58d3d2aSXin Li     opus_int32                  gain_Q16,           /* I    Gain in Q16                                                 */
353*a58d3d2aSXin Li     const opus_int              dataSize            /* I    Length                                                      */
354*a58d3d2aSXin Li );
355*a58d3d2aSXin Li 
356*a58d3d2aSXin Li /* Some for the LTP related function requires Q26 to work.*/
357*a58d3d2aSXin Li void silk_scale_vector32_Q26_lshift_18(
358*a58d3d2aSXin Li     opus_int32                  *data1,             /* I/O  Q0/Q18                                                      */
359*a58d3d2aSXin Li     opus_int32                  gain_Q26,           /* I    Q26                                                         */
360*a58d3d2aSXin Li     opus_int                    dataSize            /* I    length                                                      */
361*a58d3d2aSXin Li );
362*a58d3d2aSXin Li 
363*a58d3d2aSXin Li /********************************************************************/
364*a58d3d2aSXin Li /*                        INLINE ARM MATH                           */
365*a58d3d2aSXin Li /********************************************************************/
366*a58d3d2aSXin Li 
367*a58d3d2aSXin Li /*    return sum( inVec1[i] * inVec2[i] ) */
368*a58d3d2aSXin Li 
369*a58d3d2aSXin Li opus_int32 silk_inner_prod_aligned(
370*a58d3d2aSXin Li     const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
371*a58d3d2aSXin Li     const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
372*a58d3d2aSXin Li     const opus_int              len,                /*    I vector lengths                                              */
373*a58d3d2aSXin Li     int                         arch                /*    I Run-time architecture                                       */
374*a58d3d2aSXin Li );
375*a58d3d2aSXin Li 
376*a58d3d2aSXin Li 
377*a58d3d2aSXin Li opus_int32 silk_inner_prod_aligned_scale(
378*a58d3d2aSXin Li     const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
379*a58d3d2aSXin Li     const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
380*a58d3d2aSXin Li     const opus_int              scale,              /*    I number of bits to shift                                     */
381*a58d3d2aSXin Li     const opus_int              len                 /*    I vector lengths                                              */
382*a58d3d2aSXin Li );
383*a58d3d2aSXin Li 
384*a58d3d2aSXin Li opus_int64 silk_inner_prod16_c(
385*a58d3d2aSXin Li     const opus_int16            *inVec1,            /*    I input vector 1                                              */
386*a58d3d2aSXin Li     const opus_int16            *inVec2,            /*    I input vector 2                                              */
387*a58d3d2aSXin Li     const opus_int              len                 /*    I vector lengths                                              */
388*a58d3d2aSXin Li );
389*a58d3d2aSXin Li 
390*a58d3d2aSXin Li /********************************************************************/
391*a58d3d2aSXin Li /*                                MACROS                            */
392*a58d3d2aSXin Li /********************************************************************/
393*a58d3d2aSXin Li 
394*a58d3d2aSXin Li /* Rotate a32 right by 'rot' bits. Negative rot values result in rotating
395*a58d3d2aSXin Li    left. Output is 32bit int.
396*a58d3d2aSXin Li    Note: contemporary compilers recognize the C expression below and
397*a58d3d2aSXin Li    compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */
silk_ROR32(opus_int32 a32,opus_int rot)398*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
399*a58d3d2aSXin Li {
400*a58d3d2aSXin Li     opus_uint32 x = (opus_uint32) a32;
401*a58d3d2aSXin Li     opus_uint32 r = (opus_uint32) rot;
402*a58d3d2aSXin Li     opus_uint32 m = (opus_uint32) -rot;
403*a58d3d2aSXin Li     if( rot == 0 ) {
404*a58d3d2aSXin Li         return a32;
405*a58d3d2aSXin Li     } else if( rot < 0 ) {
406*a58d3d2aSXin Li         return (opus_int32) ((x << m) | (x >> (32 - m)));
407*a58d3d2aSXin Li     } else {
408*a58d3d2aSXin Li         return (opus_int32) ((x << (32 - r)) | (x >> r));
409*a58d3d2aSXin Li     }
410*a58d3d2aSXin Li }
411*a58d3d2aSXin Li 
412*a58d3d2aSXin Li /* Allocate opus_int16 aligned to 4-byte memory address */
413*a58d3d2aSXin Li #if EMBEDDED_ARM
414*a58d3d2aSXin Li #define silk_DWORD_ALIGN __attribute__((aligned(4)))
415*a58d3d2aSXin Li #else
416*a58d3d2aSXin Li #define silk_DWORD_ALIGN
417*a58d3d2aSXin Li #endif
418*a58d3d2aSXin Li 
419*a58d3d2aSXin Li /* Useful Macros that can be adjusted to other platforms */
420*a58d3d2aSXin Li #define silk_memcpy(dest, src, size)        memcpy((dest), (src), (size))
421*a58d3d2aSXin Li #define silk_memset(dest, src, size)        memset((dest), (src), (size))
422*a58d3d2aSXin Li #define silk_memmove(dest, src, size)       memmove((dest), (src), (size))
423*a58d3d2aSXin Li 
424*a58d3d2aSXin Li /* Fixed point macros */
425*a58d3d2aSXin Li 
426*a58d3d2aSXin Li /* (a32 * b32) output have to be 32bit int */
427*a58d3d2aSXin Li #define silk_MUL(a32, b32)                  ((a32) * (b32))
428*a58d3d2aSXin Li 
429*a58d3d2aSXin Li /* (a32 * b32) output have to be 32bit uint */
430*a58d3d2aSXin Li #define silk_MUL_uint(a32, b32)             silk_MUL(a32, b32)
431*a58d3d2aSXin Li 
432*a58d3d2aSXin Li /* a32 + (b32 * c32) output have to be 32bit int */
433*a58d3d2aSXin Li #define silk_MLA(a32, b32, c32)             silk_ADD32((a32),((b32) * (c32)))
434*a58d3d2aSXin Li 
435*a58d3d2aSXin Li /* a32 + (b32 * c32) output have to be 32bit uint */
436*a58d3d2aSXin Li #define silk_MLA_uint(a32, b32, c32)        silk_MLA(a32, b32, c32)
437*a58d3d2aSXin Li 
438*a58d3d2aSXin Li /* ((a32 >> 16)  * (b32 >> 16)) output have to be 32bit int */
439*a58d3d2aSXin Li #define silk_SMULTT(a32, b32)               (((a32) >> 16) * ((b32) >> 16))
440*a58d3d2aSXin Li 
441*a58d3d2aSXin Li /* a32 + ((a32 >> 16)  * (b32 >> 16)) output have to be 32bit int */
442*a58d3d2aSXin Li #define silk_SMLATT(a32, b32, c32)          silk_ADD32((a32),((b32) >> 16) * ((c32) >> 16))
443*a58d3d2aSXin Li 
444*a58d3d2aSXin Li #define silk_SMLALBB(a64, b16, c16)         silk_ADD64((a64),(opus_int64)((opus_int32)(b16) * (opus_int32)(c16)))
445*a58d3d2aSXin Li 
446*a58d3d2aSXin Li /* (a32 * b32) */
447*a58d3d2aSXin Li #define silk_SMULL(a32, b32)                ((opus_int64)(a32) * /*(opus_int64)*/(b32))
448*a58d3d2aSXin Li 
449*a58d3d2aSXin Li /* Adds two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
450*a58d3d2aSXin Li    (just standard two's complement implementation-specific behaviour) */
silk_ADD32_ovflw(opus_int32 a,opus_int32 b)451*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_ADD32_ovflw(opus_int32 a, opus_int32 b) {
452*a58d3d2aSXin Li     opus_int32  _c;
453*a58d3d2aSXin Li     __builtin_add_overflow(a, b, &_c);
454*a58d3d2aSXin Li     return _c;
455*a58d3d2aSXin Li }
456*a58d3d2aSXin Li 
457*a58d3d2aSXin Li /* Subtractss two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
458*a58d3d2aSXin Li    (just standard two's complement implementation-specific behaviour) */
silk_SUB32_ovflw(opus_int32 a,opus_int32 b)459*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_SUB32_ovflw(opus_int32 a, opus_int32 b) {
460*a58d3d2aSXin Li     opus_int32  _c;
461*a58d3d2aSXin Li     __builtin_sub_overflow(a, b, &_c);
462*a58d3d2aSXin Li     return _c;
463*a58d3d2aSXin Li }
464*a58d3d2aSXin Li 
465*a58d3d2aSXin Li /* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */
466*a58d3d2aSXin Li /* .. also ignoring multiply overflows; caller has comment about this happening occasionally */
silk_MLA_ovflw(opus_int32 a,opus_int32 b,opus_int32 c)467*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_MLA_ovflw(opus_int32 a, opus_int32 b, opus_int32 c) {
468*a58d3d2aSXin Li     opus_int32 _d, _e;
469*a58d3d2aSXin Li     __builtin_mul_overflow(b, c, &_d);
470*a58d3d2aSXin Li     __builtin_add_overflow(a, _d, &_e);
471*a58d3d2aSXin Li     return _e;
472*a58d3d2aSXin Li }
473*a58d3d2aSXin Li 
474*a58d3d2aSXin Li #define silk_SMLABB_ovflw(a32, b32, c32)    (silk_ADD32_ovflw((a32) , ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))))
475*a58d3d2aSXin Li 
476*a58d3d2aSXin Li #define silk_DIV32_16(a32, b16)             ((opus_int32)((a32) / (b16)))
477*a58d3d2aSXin Li #define silk_DIV32(a32, b32)                ((opus_int32)((a32) / (b32)))
478*a58d3d2aSXin Li 
479*a58d3d2aSXin Li /* These macros enables checking for overflow in silk_API_Debug.h*/
480*a58d3d2aSXin Li #define silk_ADD16(a, b)                    ((a) + (b))
481*a58d3d2aSXin Li #define silk_ADD32(a, b)                    ((a) + (b))
482*a58d3d2aSXin Li #define silk_ADD64(a, b)                    ((a) + (b))
483*a58d3d2aSXin Li 
484*a58d3d2aSXin Li #define silk_SUB16(a, b)                    ((a) - (b))
485*a58d3d2aSXin Li #define silk_SUB32(a, b)                    ((a) - (b))
486*a58d3d2aSXin Li #define silk_SUB64(a, b)                    ((a) - (b))
487*a58d3d2aSXin Li 
488*a58d3d2aSXin Li #define silk_SAT8(a)                        ((a) > silk_int8_MAX ? silk_int8_MAX  :       \
489*a58d3d2aSXin Li                                             ((a) < silk_int8_MIN ? silk_int8_MIN  : (a)))
490*a58d3d2aSXin Li #define silk_SAT16(a)                       ((a) > silk_int16_MAX ? silk_int16_MAX :      \
491*a58d3d2aSXin Li                                             ((a) < silk_int16_MIN ? silk_int16_MIN : (a)))
492*a58d3d2aSXin Li #define silk_SAT32(a)                       ((a) > silk_int32_MAX ? silk_int32_MAX :      \
493*a58d3d2aSXin Li                                             ((a) < silk_int32_MIN ? silk_int32_MIN : (a)))
494*a58d3d2aSXin Li 
495*a58d3d2aSXin Li #define silk_CHECK_FIT8(a)                  (a)
496*a58d3d2aSXin Li #define silk_CHECK_FIT16(a)                 (a)
497*a58d3d2aSXin Li #define silk_CHECK_FIT32(a)                 (a)
498*a58d3d2aSXin Li 
499*a58d3d2aSXin Li #define silk_ADD_SAT16(a, b)                (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a), (b) ) )
500*a58d3d2aSXin Li #define silk_ADD_SAT64(a, b)                ((((a) + (b)) & 0x8000000000000000LL) == 0 ?                            \
501*a58d3d2aSXin Li                                             ((((a) & (b)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a)+(b)) : \
502*a58d3d2aSXin Li                                             ((((a) | (b)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a)+(b)) )
503*a58d3d2aSXin Li 
504*a58d3d2aSXin Li #define silk_SUB_SAT16(a, b)                (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a), (b) ) )
505*a58d3d2aSXin Li #define silk_SUB_SAT64(a, b)                ((((a)-(b)) & 0x8000000000000000LL) == 0 ?                                               \
506*a58d3d2aSXin Li                                             (( (a) & ((b)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a)-(b)) : \
507*a58d3d2aSXin Li                                             ((((a)^0x8000000000000000LL) & (b)  & 0x8000000000000000LL) ? silk_int64_MAX : (a)-(b)) )
508*a58d3d2aSXin Li 
509*a58d3d2aSXin Li /* Saturation for positive input values */
510*a58d3d2aSXin Li #define silk_POS_SAT32(a)                   ((a) > silk_int32_MAX ? silk_int32_MAX : (a))
511*a58d3d2aSXin Li 
512*a58d3d2aSXin Li /* Add with saturation for positive input values */
513*a58d3d2aSXin Li #define silk_ADD_POS_SAT8(a, b)             ((((a)+(b)) & 0x80)                 ? silk_int8_MAX  : ((a)+(b)))
514*a58d3d2aSXin Li #define silk_ADD_POS_SAT16(a, b)            ((((a)+(b)) & 0x8000)               ? silk_int16_MAX : ((a)+(b)))
silk_ADD_POS_SAT32(opus_int32 a,opus_int32 b)515*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int32 a, opus_int32 b) {
516*a58d3d2aSXin Li     opus_int32  _c;
517*a58d3d2aSXin Li     if (__builtin_add_overflow(a, b, &_c))
518*a58d3d2aSXin Li         return silk_int32_MAX;
519*a58d3d2aSXin Li     return _c;
520*a58d3d2aSXin Li }
521*a58d3d2aSXin Li 
522*a58d3d2aSXin Li #define silk_LSHIFT8(a, shift)              ((opus_int8)((opus_uint8)(a)<<(shift)))         /* shift >= 0, shift < 8  */
523*a58d3d2aSXin Li #define silk_LSHIFT16(a, shift)             ((opus_int16)((opus_uint16)(a)<<(shift)))       /* shift >= 0, shift < 16 */
524*a58d3d2aSXin Li #define silk_LSHIFT32(a, shift)             ((opus_int32)((opus_uint32)(a)<<(shift)))       /* shift >= 0, shift < 32 */
525*a58d3d2aSXin Li #define silk_LSHIFT64(a, shift)             ((opus_int64)((opus_uint64)(a)<<(shift)))       /* shift >= 0, shift < 64 */
526*a58d3d2aSXin Li #define silk_LSHIFT(a, shift)               silk_LSHIFT32(a, shift)                         /* shift >= 0, shift < 32 */
527*a58d3d2aSXin Li 
528*a58d3d2aSXin Li #define silk_RSHIFT8(a, shift)              ((a)>>(shift))                                  /* shift >= 0, shift < 8  */
529*a58d3d2aSXin Li #define silk_RSHIFT16(a, shift)             ((a)>>(shift))                                  /* shift >= 0, shift < 16 */
530*a58d3d2aSXin Li #define silk_RSHIFT32(a, shift)             ((a)>>(shift))                                  /* shift >= 0, shift < 32 */
531*a58d3d2aSXin Li #define silk_RSHIFT64(a, shift)             ((a)>>(shift))                                  /* shift >= 0, shift < 64 */
532*a58d3d2aSXin Li #define silk_RSHIFT(a, shift)               silk_RSHIFT32(a, shift)                         /* shift >= 0, shift < 32 */
533*a58d3d2aSXin Li 
534*a58d3d2aSXin Li /* saturates before shifting */
535*a58d3d2aSXin Li #define silk_LSHIFT_SAT32(a, shift)         (silk_LSHIFT32( silk_LIMIT( (a), silk_RSHIFT32( silk_int32_MIN, (shift) ), \
536*a58d3d2aSXin Li                                                     silk_RSHIFT32( silk_int32_MAX, (shift) ) ), (shift) ))
537*a58d3d2aSXin Li 
538*a58d3d2aSXin Li #define silk_LSHIFT_ovflw(a, shift)         ((opus_int32)((opus_uint32)(a) << (shift)))     /* shift >= 0, allowed to overflow */
539*a58d3d2aSXin Li #define silk_LSHIFT_uint(a, shift)          ((a) << (shift))                                /* shift >= 0 */
540*a58d3d2aSXin Li #define silk_RSHIFT_uint(a, shift)          ((a) >> (shift))                                /* shift >= 0 */
541*a58d3d2aSXin Li 
542*a58d3d2aSXin Li #define silk_ADD_LSHIFT(a, b, shift)        ((a) + silk_LSHIFT((b), (shift)))               /* shift >= 0 */
543*a58d3d2aSXin Li #define silk_ADD_LSHIFT32(a, b, shift)      silk_ADD32((a), silk_LSHIFT32((b), (shift)))    /* shift >= 0 */
544*a58d3d2aSXin Li #define silk_ADD_LSHIFT_uint(a, b, shift)   ((a) + silk_LSHIFT_uint((b), (shift)))          /* shift >= 0 */
545*a58d3d2aSXin Li #define silk_ADD_RSHIFT(a, b, shift)        ((a) + silk_RSHIFT((b), (shift)))               /* shift >= 0 */
546*a58d3d2aSXin Li #define silk_ADD_RSHIFT32(a, b, shift)      silk_ADD32((a), silk_RSHIFT32((b), (shift)))    /* shift >= 0 */
547*a58d3d2aSXin Li #define silk_ADD_RSHIFT_uint(a, b, shift)   ((a) + silk_RSHIFT_uint((b), (shift)))          /* shift >= 0 */
548*a58d3d2aSXin Li #define silk_SUB_LSHIFT32(a, b, shift)      silk_SUB32((a), silk_LSHIFT32((b), (shift)))    /* shift >= 0 */
549*a58d3d2aSXin Li #define silk_SUB_RSHIFT32(a, b, shift)      silk_SUB32((a), silk_RSHIFT32((b), (shift)))    /* shift >= 0 */
550*a58d3d2aSXin Li 
551*a58d3d2aSXin Li /* Requires that shift > 0 */
552*a58d3d2aSXin Li #define silk_RSHIFT_ROUND(a, shift)         ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
553*a58d3d2aSXin Li #define silk_RSHIFT_ROUND64(a, shift)       ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
554*a58d3d2aSXin Li 
555*a58d3d2aSXin Li /* Number of rightshift required to fit the multiplication */
556*a58d3d2aSXin Li #define silk_NSHIFT_MUL_32_32(a, b)         ( -(31- (32-silk_CLZ32(silk_abs(a)) + (32-silk_CLZ32(silk_abs(b))))) )
557*a58d3d2aSXin Li #define silk_NSHIFT_MUL_16_16(a, b)         ( -(15- (16-silk_CLZ16(silk_abs(a)) + (16-silk_CLZ16(silk_abs(b))))) )
558*a58d3d2aSXin Li 
559*a58d3d2aSXin Li 
560*a58d3d2aSXin Li #define silk_min(a, b)                      (((a) < (b)) ? (a) : (b))
561*a58d3d2aSXin Li #define silk_max(a, b)                      (((a) > (b)) ? (a) : (b))
562*a58d3d2aSXin Li 
563*a58d3d2aSXin Li /* Macro to convert floating-point constants to fixed-point */
564*a58d3d2aSXin Li #define SILK_FIX_CONST( C, Q )              ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5))
565*a58d3d2aSXin Li 
566*a58d3d2aSXin Li /* silk_min() versions with typecast in the function call */
silk_min_int(opus_int a,opus_int b)567*a58d3d2aSXin Li static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
568*a58d3d2aSXin Li {
569*a58d3d2aSXin Li     return (((a) < (b)) ? (a) : (b));
570*a58d3d2aSXin Li }
silk_min_16(opus_int16 a,opus_int16 b)571*a58d3d2aSXin Li static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
572*a58d3d2aSXin Li {
573*a58d3d2aSXin Li     return (((a) < (b)) ? (a) : (b));
574*a58d3d2aSXin Li }
silk_min_32(opus_int32 a,opus_int32 b)575*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
576*a58d3d2aSXin Li {
577*a58d3d2aSXin Li     return (((a) < (b)) ? (a) : (b));
578*a58d3d2aSXin Li }
silk_min_64(opus_int64 a,opus_int64 b)579*a58d3d2aSXin Li static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
580*a58d3d2aSXin Li {
581*a58d3d2aSXin Li     return (((a) < (b)) ? (a) : (b));
582*a58d3d2aSXin Li }
583*a58d3d2aSXin Li 
584*a58d3d2aSXin Li /* silk_min() versions with typecast in the function call */
silk_max_int(opus_int a,opus_int b)585*a58d3d2aSXin Li static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
586*a58d3d2aSXin Li {
587*a58d3d2aSXin Li     return (((a) > (b)) ? (a) : (b));
588*a58d3d2aSXin Li }
silk_max_16(opus_int16 a,opus_int16 b)589*a58d3d2aSXin Li static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
590*a58d3d2aSXin Li {
591*a58d3d2aSXin Li     return (((a) > (b)) ? (a) : (b));
592*a58d3d2aSXin Li }
silk_max_32(opus_int32 a,opus_int32 b)593*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
594*a58d3d2aSXin Li {
595*a58d3d2aSXin Li     return (((a) > (b)) ? (a) : (b));
596*a58d3d2aSXin Li }
silk_max_64(opus_int64 a,opus_int64 b)597*a58d3d2aSXin Li static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
598*a58d3d2aSXin Li {
599*a58d3d2aSXin Li     return (((a) > (b)) ? (a) : (b));
600*a58d3d2aSXin Li }
601*a58d3d2aSXin Li 
602*a58d3d2aSXin Li #define silk_LIMIT( a, limit1, limit2)      ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
603*a58d3d2aSXin Li                                                                  : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))))
604*a58d3d2aSXin Li 
605*a58d3d2aSXin Li #define silk_LIMIT_int                      silk_LIMIT
606*a58d3d2aSXin Li #define silk_LIMIT_16                       silk_LIMIT
607*a58d3d2aSXin Li #define silk_LIMIT_32                       silk_LIMIT
608*a58d3d2aSXin Li 
609*a58d3d2aSXin Li #define silk_abs(a)                         (((a) >  0)  ? (a) : -(a))            /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */
610*a58d3d2aSXin Li #define silk_abs_int(a)                     (((a) ^ ((a) >> (8 * sizeof(a) - 1))) - ((a) >> (8 * sizeof(a) - 1)))
611*a58d3d2aSXin Li #define silk_abs_int32(a)                   (((a) ^ ((a) >> 31)) - ((a) >> 31))
612*a58d3d2aSXin Li #define silk_abs_int64(a)                   (((a) >  0)  ? (a) : -(a))
613*a58d3d2aSXin Li 
614*a58d3d2aSXin Li #define silk_sign(a)                        ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 ))
615*a58d3d2aSXin Li 
616*a58d3d2aSXin Li /* PSEUDO-RANDOM GENERATOR                                                          */
617*a58d3d2aSXin Li /* Make sure to store the result as the seed for the next call (also in between     */
618*a58d3d2aSXin Li /* frames), otherwise result won't be random at all. When only using some of the    */
619*a58d3d2aSXin Li /* bits, take the most significant bits by right-shifting.                          */
620*a58d3d2aSXin Li #define RAND_MULTIPLIER                     196314165
621*a58d3d2aSXin Li #define RAND_INCREMENT                      907633515
622*a58d3d2aSXin Li #define silk_RAND(seed)                     (silk_MLA_ovflw((RAND_INCREMENT), (seed), (RAND_MULTIPLIER)))
623*a58d3d2aSXin Li 
624*a58d3d2aSXin Li /*  Add some multiplication functions that can be easily mapped to ARM. */
625*a58d3d2aSXin Li 
626*a58d3d2aSXin Li /*    silk_SMMUL: Signed top word multiply.
627*a58d3d2aSXin Li           ARMv6        2 instruction cycles.
628*a58d3d2aSXin Li           ARMv3M+      3 instruction cycles. use SMULL and ignore LSB registers.(except xM)*/
629*a58d3d2aSXin Li /*#define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/
630*a58d3d2aSXin Li /* the following seems faster on x86 */
631*a58d3d2aSXin Li #define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
632*a58d3d2aSXin Li 
633*a58d3d2aSXin Li #if !defined(OVERRIDE_silk_burg_modified)
634*a58d3d2aSXin Li #define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
635*a58d3d2aSXin Li     ((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
636*a58d3d2aSXin Li #endif
637*a58d3d2aSXin Li 
638*a58d3d2aSXin Li #if !defined(OVERRIDE_silk_inner_prod16)
639*a58d3d2aSXin Li #define silk_inner_prod16(inVec1, inVec2, len, arch) \
640*a58d3d2aSXin Li     ((void)(arch),silk_inner_prod16_c(inVec1, inVec2, len))
641*a58d3d2aSXin Li #endif
642*a58d3d2aSXin Li 
643*a58d3d2aSXin Li #include "Inlines.h"
644*a58d3d2aSXin Li #include "MacroCount.h"
645*a58d3d2aSXin Li #include "MacroDebug.h"
646*a58d3d2aSXin Li 
647*a58d3d2aSXin Li #ifdef OPUS_ARM_INLINE_ASM
648*a58d3d2aSXin Li #include "arm/SigProc_FIX_armv4.h"
649*a58d3d2aSXin Li #endif
650*a58d3d2aSXin Li 
651*a58d3d2aSXin Li #ifdef OPUS_ARM_INLINE_EDSP
652*a58d3d2aSXin Li #include "arm/SigProc_FIX_armv5e.h"
653*a58d3d2aSXin Li #endif
654*a58d3d2aSXin Li 
655*a58d3d2aSXin Li #if defined(MIPSr1_ASM)
656*a58d3d2aSXin Li #include "mips/sigproc_fix_mipsr1.h"
657*a58d3d2aSXin Li #endif
658*a58d3d2aSXin Li 
659*a58d3d2aSXin Li 
660*a58d3d2aSXin Li #ifdef  __cplusplus
661*a58d3d2aSXin Li }
662*a58d3d2aSXin Li #endif
663*a58d3d2aSXin Li 
664*a58d3d2aSXin Li #endif /* SILK_SIGPROC_FIX_H */
665