xref: /aosp_15_r20/external/libopus/silk/x86/x86_silk_map.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2014, Cisco Systems, INC
2*a58d3d2aSXin Li    Written by XiangMingZhu WeiZhou MinPeng YanWang
3*a58d3d2aSXin Li 
4*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
5*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
6*a58d3d2aSXin Li    are met:
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
9*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
10*a58d3d2aSXin Li 
11*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
12*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
13*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
14*a58d3d2aSXin Li 
15*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19*a58d3d2aSXin Li    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li */
27*a58d3d2aSXin Li 
28*a58d3d2aSXin Li #if defined(HAVE_CONFIG_H)
29*a58d3d2aSXin Li #include "config.h"
30*a58d3d2aSXin Li #endif
31*a58d3d2aSXin Li 
32*a58d3d2aSXin Li #include "celt/x86/x86cpu.h"
33*a58d3d2aSXin Li #include "structs.h"
34*a58d3d2aSXin Li #include "SigProc_FIX.h"
35*a58d3d2aSXin Li #ifndef FIXED_POINT
36*a58d3d2aSXin Li #include "SigProc_FLP.h"
37*a58d3d2aSXin Li #endif
38*a58d3d2aSXin Li #include "pitch.h"
39*a58d3d2aSXin Li #include "main.h"
40*a58d3d2aSXin Li 
41*a58d3d2aSXin Li #if defined(OPUS_HAVE_RTCD) && !defined(OPUS_X86_PRESUME_AVX2)
42*a58d3d2aSXin Li 
43*a58d3d2aSXin Li #if defined(FIXED_POINT)
44*a58d3d2aSXin Li 
45*a58d3d2aSXin Li #include "fixed/main_FIX.h"
46*a58d3d2aSXin Li 
47*a58d3d2aSXin Li opus_int64 (*const SILK_INNER_PROD16_IMPL[ OPUS_ARCHMASK + 1 ] )(
48*a58d3d2aSXin Li     const opus_int16 *inVec1,
49*a58d3d2aSXin Li     const opus_int16 *inVec2,
50*a58d3d2aSXin Li     const opus_int   len
51*a58d3d2aSXin Li ) = {
52*a58d3d2aSXin Li   silk_inner_prod16_c,                  /* non-sse */
53*a58d3d2aSXin Li   silk_inner_prod16_c,
54*a58d3d2aSXin Li   silk_inner_prod16_c,
55*a58d3d2aSXin Li   MAY_HAVE_SSE4_1( silk_inner_prod16 ), /* sse4.1 */
56*a58d3d2aSXin Li   MAY_HAVE_SSE4_1( silk_inner_prod16 )  /* avx */
57*a58d3d2aSXin Li };
58*a58d3d2aSXin Li 
59*a58d3d2aSXin Li #endif
60*a58d3d2aSXin Li 
61*a58d3d2aSXin Li opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )(
62*a58d3d2aSXin Li     silk_encoder_state *psEncC,
63*a58d3d2aSXin Li     const opus_int16   pIn[]
64*a58d3d2aSXin Li ) = {
65*a58d3d2aSXin Li   silk_VAD_GetSA_Q8_c,                  /* non-sse */
66*a58d3d2aSXin Li   silk_VAD_GetSA_Q8_c,
67*a58d3d2aSXin Li   silk_VAD_GetSA_Q8_c,
68*a58d3d2aSXin Li   MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ), /* sse4.1 */
69*a58d3d2aSXin Li   MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 )  /* avx */
70*a58d3d2aSXin Li };
71*a58d3d2aSXin Li 
72*a58d3d2aSXin Li void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )(
73*a58d3d2aSXin Li     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
74*a58d3d2aSXin Li     silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
75*a58d3d2aSXin Li     SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
76*a58d3d2aSXin Li     const opus_int16            x16[],                                        /* I    Input                           */
77*a58d3d2aSXin Li     opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
78*a58d3d2aSXin Li     const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
79*a58d3d2aSXin Li     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
80*a58d3d2aSXin Li     const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
81*a58d3d2aSXin Li     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
82*a58d3d2aSXin Li     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
83*a58d3d2aSXin Li     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
84*a58d3d2aSXin Li     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
85*a58d3d2aSXin Li     const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
86*a58d3d2aSXin Li     const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
87*a58d3d2aSXin Li     const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
88*a58d3d2aSXin Li ) = {
89*a58d3d2aSXin Li   silk_NSQ_c,                  /* non-sse */
90*a58d3d2aSXin Li   silk_NSQ_c,
91*a58d3d2aSXin Li   silk_NSQ_c,
92*a58d3d2aSXin Li   MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */
93*a58d3d2aSXin Li   MAY_HAVE_SSE4_1( silk_NSQ )  /* avx */
94*a58d3d2aSXin Li };
95*a58d3d2aSXin Li 
96*a58d3d2aSXin Li void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )(
97*a58d3d2aSXin Li     opus_int8                   *ind,                           /* O    index of best codebook vector               */
98*a58d3d2aSXin Li     opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
99*a58d3d2aSXin Li     opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
100*a58d3d2aSXin Li     opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
101*a58d3d2aSXin Li     const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
102*a58d3d2aSXin Li     const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
103*a58d3d2aSXin Li     const opus_int8             *cb_Q7,                         /* I    codebook                                    */
104*a58d3d2aSXin Li     const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
105*a58d3d2aSXin Li     const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
106*a58d3d2aSXin Li     const opus_int              subfr_len,                      /* I    number of samples per subframe              */
107*a58d3d2aSXin Li     const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
108*a58d3d2aSXin Li     const opus_int              L                               /* I    number of vectors in codebook               */
109*a58d3d2aSXin Li ) = {
110*a58d3d2aSXin Li   silk_VQ_WMat_EC_c,                  /* non-sse */
111*a58d3d2aSXin Li   silk_VQ_WMat_EC_c,
112*a58d3d2aSXin Li   silk_VQ_WMat_EC_c,
113*a58d3d2aSXin Li   MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */
114*a58d3d2aSXin Li   MAY_HAVE_SSE4_1( silk_VQ_WMat_EC )  /* avx */
115*a58d3d2aSXin Li };
116*a58d3d2aSXin Li 
117*a58d3d2aSXin Li void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )(
118*a58d3d2aSXin Li     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
119*a58d3d2aSXin Li     silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
120*a58d3d2aSXin Li     SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
121*a58d3d2aSXin Li     const opus_int16            x16[],                                        /* I    Input                           */
122*a58d3d2aSXin Li     opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
123*a58d3d2aSXin Li     const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
124*a58d3d2aSXin Li     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
125*a58d3d2aSXin Li     const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
126*a58d3d2aSXin Li     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
127*a58d3d2aSXin Li     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
128*a58d3d2aSXin Li     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
129*a58d3d2aSXin Li     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
130*a58d3d2aSXin Li     const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
131*a58d3d2aSXin Li     const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
132*a58d3d2aSXin Li     const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
133*a58d3d2aSXin Li ) = {
134*a58d3d2aSXin Li   silk_NSQ_del_dec_c,                  /* non-sse */
135*a58d3d2aSXin Li   silk_NSQ_del_dec_c,
136*a58d3d2aSXin Li   silk_NSQ_del_dec_c,
137*a58d3d2aSXin Li   MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */
138*a58d3d2aSXin Li   MAY_HAVE_AVX2( silk_NSQ_del_dec )  /* avx */
139*a58d3d2aSXin Li };
140*a58d3d2aSXin Li 
141*a58d3d2aSXin Li #if defined(FIXED_POINT)
142*a58d3d2aSXin Li 
143*a58d3d2aSXin Li void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )(
144*a58d3d2aSXin Li     opus_int32                  *res_nrg,           /* O    Residual energy                                             */
145*a58d3d2aSXin Li     opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
146*a58d3d2aSXin Li     opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
147*a58d3d2aSXin Li     const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
148*a58d3d2aSXin Li     const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
149*a58d3d2aSXin Li     const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
150*a58d3d2aSXin Li     const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
151*a58d3d2aSXin Li     const opus_int              D,                  /* I    Order                                                       */
152*a58d3d2aSXin Li     int                         arch                /* I    Run-time architecture                                       */
153*a58d3d2aSXin Li ) = {
154*a58d3d2aSXin Li   silk_burg_modified_c,                  /* non-sse */
155*a58d3d2aSXin Li   silk_burg_modified_c,
156*a58d3d2aSXin Li   silk_burg_modified_c,
157*a58d3d2aSXin Li   MAY_HAVE_SSE4_1( silk_burg_modified ), /* sse4.1 */
158*a58d3d2aSXin Li   MAY_HAVE_SSE4_1( silk_burg_modified )  /* avx */
159*a58d3d2aSXin Li };
160*a58d3d2aSXin Li 
161*a58d3d2aSXin Li #endif
162*a58d3d2aSXin Li 
163*a58d3d2aSXin Li #ifndef FIXED_POINT
164*a58d3d2aSXin Li 
165*a58d3d2aSXin Li double (*const SILK_INNER_PRODUCT_FLP_IMPL[ OPUS_ARCHMASK + 1 ] )(
166*a58d3d2aSXin Li     const silk_float    *data1,
167*a58d3d2aSXin Li     const silk_float    *data2,
168*a58d3d2aSXin Li     opus_int            dataSize
169*a58d3d2aSXin Li ) = {
170*a58d3d2aSXin Li   silk_inner_product_FLP_c,                  /* non-sse */
171*a58d3d2aSXin Li   silk_inner_product_FLP_c,
172*a58d3d2aSXin Li   silk_inner_product_FLP_c,
173*a58d3d2aSXin Li   silk_inner_product_FLP_c, /* sse4.1 */
174*a58d3d2aSXin Li   MAY_HAVE_AVX2( silk_inner_product_FLP )  /* avx */
175*a58d3d2aSXin Li };
176*a58d3d2aSXin Li 
177*a58d3d2aSXin Li #endif
178*a58d3d2aSXin Li 
179*a58d3d2aSXin Li #endif
180