xref: /aosp_15_r20/external/libopus/silk/x86/main_sse.h (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2014, Cisco Systems, INC
2*a58d3d2aSXin Li    Written by XiangMingZhu WeiZhou MinPeng YanWang
3*a58d3d2aSXin Li 
4*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
5*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
6*a58d3d2aSXin Li    are met:
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
9*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
10*a58d3d2aSXin Li 
11*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
12*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
13*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
14*a58d3d2aSXin Li 
15*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19*a58d3d2aSXin Li    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li */
27*a58d3d2aSXin Li 
28*a58d3d2aSXin Li #ifndef MAIN_SSE_H
29*a58d3d2aSXin Li # define MAIN_SSE_H
30*a58d3d2aSXin Li 
31*a58d3d2aSXin Li # ifdef HAVE_CONFIG_H
32*a58d3d2aSXin Li #  include "config.h"
33*a58d3d2aSXin Li # endif
34*a58d3d2aSXin Li 
35*a58d3d2aSXin Li # if defined(OPUS_X86_MAY_HAVE_SSE4_1)
36*a58d3d2aSXin Li 
37*a58d3d2aSXin Li void silk_VQ_WMat_EC_sse4_1(
38*a58d3d2aSXin Li     opus_int8                   *ind,                           /* O    index of best codebook vector               */
39*a58d3d2aSXin Li     opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
40*a58d3d2aSXin Li     opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
41*a58d3d2aSXin Li     opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
42*a58d3d2aSXin Li     const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
43*a58d3d2aSXin Li     const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
44*a58d3d2aSXin Li     const opus_int8             *cb_Q7,                         /* I    codebook                                    */
45*a58d3d2aSXin Li     const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
46*a58d3d2aSXin Li     const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
47*a58d3d2aSXin Li     const opus_int              subfr_len,                      /* I    number of samples per subframe              */
48*a58d3d2aSXin Li     const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
49*a58d3d2aSXin Li     const opus_int              L                               /* I    number of vectors in codebook               */
50*a58d3d2aSXin Li );
51*a58d3d2aSXin Li 
52*a58d3d2aSXin Li #  if defined OPUS_X86_PRESUME_SSE4_1
53*a58d3d2aSXin Li 
54*a58d3d2aSXin Li #   define OVERRIDE_silk_VQ_WMat_EC
55*a58d3d2aSXin Li #   define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
56*a58d3d2aSXin Li                            subfr_len, max_gain_Q7, L, arch) \
57*a58d3d2aSXin Li     ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
58*a58d3d2aSXin Li                           subfr_len, max_gain_Q7, L))
59*a58d3d2aSXin Li 
60*a58d3d2aSXin Li #  elif defined(OPUS_HAVE_RTCD)
61*a58d3d2aSXin Li 
62*a58d3d2aSXin Li extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
63*a58d3d2aSXin Li     opus_int8                   *ind,                           /* O    index of best codebook vector               */
64*a58d3d2aSXin Li     opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
65*a58d3d2aSXin Li     opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
66*a58d3d2aSXin Li     opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
67*a58d3d2aSXin Li     const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
68*a58d3d2aSXin Li     const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
69*a58d3d2aSXin Li     const opus_int8             *cb_Q7,                         /* I    codebook                                    */
70*a58d3d2aSXin Li     const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
71*a58d3d2aSXin Li     const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
72*a58d3d2aSXin Li     const opus_int              subfr_len,                      /* I    number of samples per subframe              */
73*a58d3d2aSXin Li     const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
74*a58d3d2aSXin Li     const opus_int              L                               /* I    number of vectors in codebook               */
75*a58d3d2aSXin Li );
76*a58d3d2aSXin Li 
77*a58d3d2aSXin Li #   define OVERRIDE_silk_VQ_WMat_EC
78*a58d3d2aSXin Li #   define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
79*a58d3d2aSXin Li                            subfr_len, max_gain_Q7, L, arch) \
80*a58d3d2aSXin Li     ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
81*a58d3d2aSXin Li                           subfr_len, max_gain_Q7, L))
82*a58d3d2aSXin Li 
83*a58d3d2aSXin Li #  endif
84*a58d3d2aSXin Li 
85*a58d3d2aSXin Li void silk_NSQ_sse4_1(
86*a58d3d2aSXin Li     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
87*a58d3d2aSXin Li     silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
88*a58d3d2aSXin Li     SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
89*a58d3d2aSXin Li     const opus_int16            x16[],                                        /* I    Input                           */
90*a58d3d2aSXin Li     opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
91*a58d3d2aSXin Li     const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
92*a58d3d2aSXin Li     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
93*a58d3d2aSXin Li     const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
94*a58d3d2aSXin Li     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
95*a58d3d2aSXin Li     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
96*a58d3d2aSXin Li     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
97*a58d3d2aSXin Li     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
98*a58d3d2aSXin Li     const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
99*a58d3d2aSXin Li     const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
100*a58d3d2aSXin Li     const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
101*a58d3d2aSXin Li );
102*a58d3d2aSXin Li 
103*a58d3d2aSXin Li #  if defined OPUS_X86_PRESUME_SSE4_1
104*a58d3d2aSXin Li 
105*a58d3d2aSXin Li #   define OVERRIDE_silk_NSQ
106*a58d3d2aSXin Li #   define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
107*a58d3d2aSXin Li                     HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
108*a58d3d2aSXin Li     ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
109*a58d3d2aSXin Li                    HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
110*a58d3d2aSXin Li 
111*a58d3d2aSXin Li #  elif defined(OPUS_HAVE_RTCD)
112*a58d3d2aSXin Li 
113*a58d3d2aSXin Li extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
114*a58d3d2aSXin Li     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
115*a58d3d2aSXin Li     silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
116*a58d3d2aSXin Li     SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
117*a58d3d2aSXin Li     const opus_int16            x16[],                                        /* I    Input                           */
118*a58d3d2aSXin Li     opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
119*a58d3d2aSXin Li     const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
120*a58d3d2aSXin Li     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
121*a58d3d2aSXin Li     const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
122*a58d3d2aSXin Li     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
123*a58d3d2aSXin Li     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
124*a58d3d2aSXin Li     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
125*a58d3d2aSXin Li     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
126*a58d3d2aSXin Li     const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
127*a58d3d2aSXin Li     const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
128*a58d3d2aSXin Li     const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
129*a58d3d2aSXin Li );
130*a58d3d2aSXin Li 
131*a58d3d2aSXin Li #   define OVERRIDE_silk_NSQ
132*a58d3d2aSXin Li #   define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
133*a58d3d2aSXin Li                     HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
134*a58d3d2aSXin Li     ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
135*a58d3d2aSXin Li                    HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
136*a58d3d2aSXin Li 
137*a58d3d2aSXin Li #  endif
138*a58d3d2aSXin Li 
139*a58d3d2aSXin Li void silk_NSQ_del_dec_sse4_1(
140*a58d3d2aSXin Li     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
141*a58d3d2aSXin Li     silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
142*a58d3d2aSXin Li     SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
143*a58d3d2aSXin Li     const opus_int16            x16[],                                        /* I    Input                           */
144*a58d3d2aSXin Li     opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
145*a58d3d2aSXin Li     const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
146*a58d3d2aSXin Li     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
147*a58d3d2aSXin Li     const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
148*a58d3d2aSXin Li     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
149*a58d3d2aSXin Li     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
150*a58d3d2aSXin Li     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
151*a58d3d2aSXin Li     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
152*a58d3d2aSXin Li     const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
153*a58d3d2aSXin Li     const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
154*a58d3d2aSXin Li     const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
155*a58d3d2aSXin Li );
156*a58d3d2aSXin Li 
157*a58d3d2aSXin Li void silk_NSQ_del_dec_avx2(
158*a58d3d2aSXin Li     const silk_encoder_state *psEncC,                            /* I    Encoder State               */
159*a58d3d2aSXin Li     silk_nsq_state *NSQ,                                         /* I/O  NSQ state                   */
160*a58d3d2aSXin Li     SideInfoIndices *psIndices,                                  /* I/O  Quantization Indices        */
161*a58d3d2aSXin Li     const opus_int16 x16[],                                      /* I    Input                       */
162*a58d3d2aSXin Li     opus_int8 pulses[],                                          /* O    Quantized pulse signal      */
163*a58d3d2aSXin Li     const opus_int16 *PredCoef_Q12,                              /* I    Short term prediction coefs */
164*a58d3d2aSXin Li     const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],      /* I    Long term prediction coefs  */
165*a58d3d2aSXin Li     const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER], /* I    Noise shaping coefs         */
166*a58d3d2aSXin Li     const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],              /* I    Long term shaping coefs     */
167*a58d3d2aSXin Li     const opus_int Tilt_Q14[MAX_NB_SUBFR],                       /* I    Spectral tilt               */
168*a58d3d2aSXin Li     const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],                   /* I    Low frequency shaping coefs */
169*a58d3d2aSXin Li     const opus_int32 Gains_Q16[MAX_NB_SUBFR],                    /* I    Quantization step sizes     */
170*a58d3d2aSXin Li     const opus_int32 pitchL[MAX_NB_SUBFR],                       /* I    Pitch lags                  */
171*a58d3d2aSXin Li     const opus_int Lambda_Q10,                                   /* I    Rate/distortion tradeoff    */
172*a58d3d2aSXin Li     const opus_int LTP_scale_Q14                                 /* I    LTP state scaling           */
173*a58d3d2aSXin Li );
174*a58d3d2aSXin Li 
175*a58d3d2aSXin Li #  if defined (OPUS_X86_PRESUME_AVX2)
176*a58d3d2aSXin Li 
177*a58d3d2aSXin Li #   define OVERRIDE_silk_NSQ_del_dec
178*a58d3d2aSXin Li #   define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
179*a58d3d2aSXin Li                             HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
180*a58d3d2aSXin Li     ((void)(arch),silk_NSQ_del_dec_avx2(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
181*a58d3d2aSXin Li                            HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
182*a58d3d2aSXin Li 
183*a58d3d2aSXin Li #  elif defined (OPUS_X86_PRESUME_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
184*a58d3d2aSXin Li 
185*a58d3d2aSXin Li #   define OVERRIDE_silk_NSQ_del_dec
186*a58d3d2aSXin Li #   define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
187*a58d3d2aSXin Li                             HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
188*a58d3d2aSXin Li     ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
189*a58d3d2aSXin Li                            HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
190*a58d3d2aSXin Li 
191*a58d3d2aSXin Li #  elif defined(OPUS_HAVE_RTCD)
192*a58d3d2aSXin Li 
193*a58d3d2aSXin Li extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
194*a58d3d2aSXin Li     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
195*a58d3d2aSXin Li     silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
196*a58d3d2aSXin Li     SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
197*a58d3d2aSXin Li     const opus_int16            x16[],                                        /* I    Input                           */
198*a58d3d2aSXin Li     opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
199*a58d3d2aSXin Li     const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
200*a58d3d2aSXin Li     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
201*a58d3d2aSXin Li     const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
202*a58d3d2aSXin Li     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
203*a58d3d2aSXin Li     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
204*a58d3d2aSXin Li     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
205*a58d3d2aSXin Li     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
206*a58d3d2aSXin Li     const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
207*a58d3d2aSXin Li     const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
208*a58d3d2aSXin Li     const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
209*a58d3d2aSXin Li );
210*a58d3d2aSXin Li 
211*a58d3d2aSXin Li #   define OVERRIDE_silk_NSQ_del_dec
212*a58d3d2aSXin Li #   define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
213*a58d3d2aSXin Li                             HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
214*a58d3d2aSXin Li     ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
215*a58d3d2aSXin Li                            HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
216*a58d3d2aSXin Li 
217*a58d3d2aSXin Li #  endif
218*a58d3d2aSXin Li 
219*a58d3d2aSXin Li void silk_noise_shape_quantizer(
220*a58d3d2aSXin Li     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
221*a58d3d2aSXin Li     opus_int            signalType,             /* I    Signal type                     */
222*a58d3d2aSXin Li     const opus_int32    x_sc_Q10[],             /* I                                    */
223*a58d3d2aSXin Li     opus_int8           pulses[],               /* O                                    */
224*a58d3d2aSXin Li     opus_int16          xq[],                   /* O                                    */
225*a58d3d2aSXin Li     opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
226*a58d3d2aSXin Li     const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
227*a58d3d2aSXin Li     const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
228*a58d3d2aSXin Li     const opus_int16    AR_shp_Q13[],           /* I    Noise shaping AR coefs          */
229*a58d3d2aSXin Li     opus_int            lag,                    /* I    Pitch lag                       */
230*a58d3d2aSXin Li     opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
231*a58d3d2aSXin Li     opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
232*a58d3d2aSXin Li     opus_int32          LF_shp_Q14,             /* I                                    */
233*a58d3d2aSXin Li     opus_int32          Gain_Q16,               /* I                                    */
234*a58d3d2aSXin Li     opus_int            Lambda_Q10,             /* I                                    */
235*a58d3d2aSXin Li     opus_int            offset_Q10,             /* I                                    */
236*a58d3d2aSXin Li     opus_int            length,                 /* I    Input length                    */
237*a58d3d2aSXin Li     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
238*a58d3d2aSXin Li     opus_int            predictLPCOrder,        /* I    Prediction filter order         */
239*a58d3d2aSXin Li     int                 arch                    /* I    Architecture                    */
240*a58d3d2aSXin Li );
241*a58d3d2aSXin Li 
242*a58d3d2aSXin Li /**************************/
243*a58d3d2aSXin Li /* Noise level estimation */
244*a58d3d2aSXin Li /**************************/
245*a58d3d2aSXin Li void silk_VAD_GetNoiseLevels(
246*a58d3d2aSXin Li     const opus_int32            pX[ VAD_N_BANDS ],  /* I    subband energies                            */
247*a58d3d2aSXin Li     silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
248*a58d3d2aSXin Li );
249*a58d3d2aSXin Li 
250*a58d3d2aSXin Li opus_int silk_VAD_GetSA_Q8_sse4_1(
251*a58d3d2aSXin Li     silk_encoder_state *psEnC,
252*a58d3d2aSXin Li     const opus_int16   pIn[]
253*a58d3d2aSXin Li );
254*a58d3d2aSXin Li 
255*a58d3d2aSXin Li #  if defined(OPUS_X86_PRESUME_SSE4_1)
256*a58d3d2aSXin Li 
257*a58d3d2aSXin Li #   define OVERRIDE_silk_VAD_GetSA_Q8
258*a58d3d2aSXin Li #   define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn))
259*a58d3d2aSXin Li 
260*a58d3d2aSXin Li #  elif defined(OPUS_HAVE_RTCD)
261*a58d3d2aSXin Li 
262*a58d3d2aSXin Li extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(
263*a58d3d2aSXin Li      silk_encoder_state *psEnC,
264*a58d3d2aSXin Li      const opus_int16   pIn[]);
265*a58d3d2aSXin Li 
266*a58d3d2aSXin Li #   define OVERRIDE_silk_VAD_GetSA_Q8
267*a58d3d2aSXin Li #   define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
268*a58d3d2aSXin Li       ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
269*a58d3d2aSXin Li 
270*a58d3d2aSXin Li #  endif
271*a58d3d2aSXin Li 
272*a58d3d2aSXin Li #ifndef FIXED_POINT
273*a58d3d2aSXin Li double silk_inner_product_FLP_avx2(
274*a58d3d2aSXin Li     const silk_float    *data1,
275*a58d3d2aSXin Li     const silk_float    *data2,
276*a58d3d2aSXin Li     opus_int            dataSize
277*a58d3d2aSXin Li );
278*a58d3d2aSXin Li 
279*a58d3d2aSXin Li #if defined (OPUS_X86_PRESUME_AVX2)
280*a58d3d2aSXin Li 
281*a58d3d2aSXin Li #define OVERRIDE_inner_product_FLP
282*a58d3d2aSXin Li #define silk_inner_product_FLP(data1, data2, dataSize, arch) ((void)arch,silk_inner_product_FLP_avx2(data1, data2, dataSize))
283*a58d3d2aSXin Li 
284*a58d3d2aSXin Li #elif defined(OPUS_HAVE_RTCD) && defined(OPUS_X86_MAY_HAVE_AVX2)
285*a58d3d2aSXin Li 
286*a58d3d2aSXin Li #define OVERRIDE_inner_product_FLP
287*a58d3d2aSXin Li extern double (*const SILK_INNER_PRODUCT_FLP_IMPL[OPUS_ARCHMASK + 1])(
288*a58d3d2aSXin Li     const silk_float    *data1,
289*a58d3d2aSXin Li     const silk_float    *data2,
290*a58d3d2aSXin Li     opus_int            dataSize
291*a58d3d2aSXin Li );
292*a58d3d2aSXin Li 
293*a58d3d2aSXin Li #define silk_inner_product_FLP(data1, data2, dataSize, arch) ((void)arch,(*SILK_INNER_PRODUCT_FLP_IMPL[(arch) & OPUS_ARCHMASK])(data1, data2, dataSize))
294*a58d3d2aSXin Li 
295*a58d3d2aSXin Li #endif
296*a58d3d2aSXin Li #endif
297*a58d3d2aSXin Li 
298*a58d3d2aSXin Li # endif
299*a58d3d2aSXin Li #endif
300