1*a58d3d2aSXin Li /* Copyright (c) 2014, Cisco Systems, INC 2*a58d3d2aSXin Li Written by XiangMingZhu WeiZhou MinPeng YanWang 3*a58d3d2aSXin Li 4*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without 5*a58d3d2aSXin Li modification, are permitted provided that the following conditions 6*a58d3d2aSXin Li are met: 7*a58d3d2aSXin Li 8*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright 9*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer. 10*a58d3d2aSXin Li 11*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright 12*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the 13*a58d3d2aSXin Li documentation and/or other materials provided with the distribution. 14*a58d3d2aSXin Li 15*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16*a58d3d2aSXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17*a58d3d2aSXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18*a58d3d2aSXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 19*a58d3d2aSXin Li OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20*a58d3d2aSXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21*a58d3d2aSXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22*a58d3d2aSXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23*a58d3d2aSXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24*a58d3d2aSXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25*a58d3d2aSXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*a58d3d2aSXin Li */ 27*a58d3d2aSXin Li 28*a58d3d2aSXin Li #if defined(HAVE_CONFIG_H) 29*a58d3d2aSXin Li #include "config.h" 30*a58d3d2aSXin Li #endif 31*a58d3d2aSXin Li 32*a58d3d2aSXin Li #include "celt/x86/x86cpu.h" 33*a58d3d2aSXin Li #include "structs.h" 34*a58d3d2aSXin Li #include "SigProc_FIX.h" 35*a58d3d2aSXin Li #ifndef FIXED_POINT 36*a58d3d2aSXin Li #include "SigProc_FLP.h" 37*a58d3d2aSXin Li #endif 38*a58d3d2aSXin Li #include "pitch.h" 39*a58d3d2aSXin Li #include "main.h" 40*a58d3d2aSXin Li 41*a58d3d2aSXin Li #if defined(OPUS_HAVE_RTCD) && !defined(OPUS_X86_PRESUME_AVX2) 42*a58d3d2aSXin Li 43*a58d3d2aSXin Li #if defined(FIXED_POINT) 44*a58d3d2aSXin Li 45*a58d3d2aSXin Li #include "fixed/main_FIX.h" 46*a58d3d2aSXin Li 47*a58d3d2aSXin Li opus_int64 (*const SILK_INNER_PROD16_IMPL[ OPUS_ARCHMASK + 1 ] )( 48*a58d3d2aSXin Li const opus_int16 *inVec1, 49*a58d3d2aSXin Li const opus_int16 *inVec2, 50*a58d3d2aSXin Li const opus_int len 51*a58d3d2aSXin Li ) = { 52*a58d3d2aSXin Li silk_inner_prod16_c, /* non-sse */ 53*a58d3d2aSXin Li silk_inner_prod16_c, 54*a58d3d2aSXin Li silk_inner_prod16_c, 55*a58d3d2aSXin Li MAY_HAVE_SSE4_1( silk_inner_prod16 ), /* sse4.1 */ 56*a58d3d2aSXin Li MAY_HAVE_SSE4_1( silk_inner_prod16 ) /* avx */ 57*a58d3d2aSXin Li }; 58*a58d3d2aSXin Li 59*a58d3d2aSXin Li #endif 60*a58d3d2aSXin Li 61*a58d3d2aSXin Li opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )( 62*a58d3d2aSXin Li silk_encoder_state *psEncC, 63*a58d3d2aSXin Li const opus_int16 pIn[] 64*a58d3d2aSXin Li ) = { 65*a58d3d2aSXin Li silk_VAD_GetSA_Q8_c, /* non-sse */ 66*a58d3d2aSXin Li silk_VAD_GetSA_Q8_c, 67*a58d3d2aSXin Li silk_VAD_GetSA_Q8_c, 68*a58d3d2aSXin Li MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ), /* sse4.1 */ 69*a58d3d2aSXin Li MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ) /* avx */ 70*a58d3d2aSXin Li }; 71*a58d3d2aSXin Li 72*a58d3d2aSXin Li void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( 73*a58d3d2aSXin Li const silk_encoder_state *psEncC, /* I Encoder State */ 74*a58d3d2aSXin Li silk_nsq_state *NSQ, /* I/O NSQ state */ 75*a58d3d2aSXin Li SideInfoIndices *psIndices, /* I/O Quantization Indices */ 76*a58d3d2aSXin Li const opus_int16 x16[], /* I Input */ 77*a58d3d2aSXin Li opus_int8 pulses[], /* O Quantized pulse signal */ 78*a58d3d2aSXin Li const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */ 79*a58d3d2aSXin Li const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ 80*a58d3d2aSXin Li const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ 81*a58d3d2aSXin Li const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ 82*a58d3d2aSXin Li const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ 83*a58d3d2aSXin Li const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ 84*a58d3d2aSXin Li const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ 85*a58d3d2aSXin Li const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ 86*a58d3d2aSXin Li const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ 87*a58d3d2aSXin Li const opus_int LTP_scale_Q14 /* I LTP state scaling */ 88*a58d3d2aSXin Li ) = { 89*a58d3d2aSXin Li silk_NSQ_c, /* non-sse */ 90*a58d3d2aSXin Li silk_NSQ_c, 91*a58d3d2aSXin Li silk_NSQ_c, 92*a58d3d2aSXin Li MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */ 93*a58d3d2aSXin Li MAY_HAVE_SSE4_1( silk_NSQ ) /* avx */ 94*a58d3d2aSXin Li }; 95*a58d3d2aSXin Li 96*a58d3d2aSXin Li void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( 97*a58d3d2aSXin Li opus_int8 *ind, /* O index of best codebook vector */ 98*a58d3d2aSXin Li opus_int32 *res_nrg_Q15, /* O best residual energy */ 99*a58d3d2aSXin Li opus_int32 *rate_dist_Q8, /* O best total bitrate */ 100*a58d3d2aSXin Li opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ 101*a58d3d2aSXin Li const opus_int32 *XX_Q17, /* I correlation matrix */ 102*a58d3d2aSXin Li const opus_int32 *xX_Q17, /* I correlation vector */ 103*a58d3d2aSXin Li const opus_int8 *cb_Q7, /* I codebook */ 104*a58d3d2aSXin Li const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ 105*a58d3d2aSXin Li const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ 106*a58d3d2aSXin Li const opus_int subfr_len, /* I number of samples per subframe */ 107*a58d3d2aSXin Li const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ 108*a58d3d2aSXin Li const opus_int L /* I number of vectors in codebook */ 109*a58d3d2aSXin Li ) = { 110*a58d3d2aSXin Li silk_VQ_WMat_EC_c, /* non-sse */ 111*a58d3d2aSXin Li silk_VQ_WMat_EC_c, 112*a58d3d2aSXin Li silk_VQ_WMat_EC_c, 113*a58d3d2aSXin Li MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */ 114*a58d3d2aSXin Li MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ) /* avx */ 115*a58d3d2aSXin Li }; 116*a58d3d2aSXin Li 117*a58d3d2aSXin Li void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( 118*a58d3d2aSXin Li const silk_encoder_state *psEncC, /* I Encoder State */ 119*a58d3d2aSXin Li silk_nsq_state *NSQ, /* I/O NSQ state */ 120*a58d3d2aSXin Li SideInfoIndices *psIndices, /* I/O Quantization Indices */ 121*a58d3d2aSXin Li const opus_int16 x16[], /* I Input */ 122*a58d3d2aSXin Li opus_int8 pulses[], /* O Quantized pulse signal */ 123*a58d3d2aSXin Li const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */ 124*a58d3d2aSXin Li const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ 125*a58d3d2aSXin Li const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ 126*a58d3d2aSXin Li const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ 127*a58d3d2aSXin Li const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ 128*a58d3d2aSXin Li const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ 129*a58d3d2aSXin Li const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ 130*a58d3d2aSXin Li const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ 131*a58d3d2aSXin Li const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ 132*a58d3d2aSXin Li const opus_int LTP_scale_Q14 /* I LTP state scaling */ 133*a58d3d2aSXin Li ) = { 134*a58d3d2aSXin Li silk_NSQ_del_dec_c, /* non-sse */ 135*a58d3d2aSXin Li silk_NSQ_del_dec_c, 136*a58d3d2aSXin Li silk_NSQ_del_dec_c, 137*a58d3d2aSXin Li MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */ 138*a58d3d2aSXin Li MAY_HAVE_AVX2( silk_NSQ_del_dec ) /* avx */ 139*a58d3d2aSXin Li }; 140*a58d3d2aSXin Li 141*a58d3d2aSXin Li #if defined(FIXED_POINT) 142*a58d3d2aSXin Li 143*a58d3d2aSXin Li void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )( 144*a58d3d2aSXin Li opus_int32 *res_nrg, /* O Residual energy */ 145*a58d3d2aSXin Li opus_int *res_nrg_Q, /* O Residual energy Q value */ 146*a58d3d2aSXin Li opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ 147*a58d3d2aSXin Li const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ 148*a58d3d2aSXin Li const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ 149*a58d3d2aSXin Li const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ 150*a58d3d2aSXin Li const opus_int nb_subfr, /* I Number of subframes stacked in x */ 151*a58d3d2aSXin Li const opus_int D, /* I Order */ 152*a58d3d2aSXin Li int arch /* I Run-time architecture */ 153*a58d3d2aSXin Li ) = { 154*a58d3d2aSXin Li silk_burg_modified_c, /* non-sse */ 155*a58d3d2aSXin Li silk_burg_modified_c, 156*a58d3d2aSXin Li silk_burg_modified_c, 157*a58d3d2aSXin Li MAY_HAVE_SSE4_1( silk_burg_modified ), /* sse4.1 */ 158*a58d3d2aSXin Li MAY_HAVE_SSE4_1( silk_burg_modified ) /* avx */ 159*a58d3d2aSXin Li }; 160*a58d3d2aSXin Li 161*a58d3d2aSXin Li #endif 162*a58d3d2aSXin Li 163*a58d3d2aSXin Li #ifndef FIXED_POINT 164*a58d3d2aSXin Li 165*a58d3d2aSXin Li double (*const SILK_INNER_PRODUCT_FLP_IMPL[ OPUS_ARCHMASK + 1 ] )( 166*a58d3d2aSXin Li const silk_float *data1, 167*a58d3d2aSXin Li const silk_float *data2, 168*a58d3d2aSXin Li opus_int dataSize 169*a58d3d2aSXin Li ) = { 170*a58d3d2aSXin Li silk_inner_product_FLP_c, /* non-sse */ 171*a58d3d2aSXin Li silk_inner_product_FLP_c, 172*a58d3d2aSXin Li silk_inner_product_FLP_c, 173*a58d3d2aSXin Li silk_inner_product_FLP_c, /* sse4.1 */ 174*a58d3d2aSXin Li MAY_HAVE_AVX2( silk_inner_product_FLP ) /* avx */ 175*a58d3d2aSXin Li }; 176*a58d3d2aSXin Li 177*a58d3d2aSXin Li #endif 178*a58d3d2aSXin Li 179*a58d3d2aSXin Li #endif 180