1*a58d3d2aSXin Li /* Copyright (c) 2014, Cisco Systems, INC 2*a58d3d2aSXin Li Written by XiangMingZhu WeiZhou MinPeng YanWang 3*a58d3d2aSXin Li 4*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without 5*a58d3d2aSXin Li modification, are permitted provided that the following conditions 6*a58d3d2aSXin Li are met: 7*a58d3d2aSXin Li 8*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright 9*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer. 10*a58d3d2aSXin Li 11*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright 12*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the 13*a58d3d2aSXin Li documentation and/or other materials provided with the distribution. 14*a58d3d2aSXin Li 15*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16*a58d3d2aSXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17*a58d3d2aSXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18*a58d3d2aSXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 19*a58d3d2aSXin Li OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20*a58d3d2aSXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21*a58d3d2aSXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22*a58d3d2aSXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23*a58d3d2aSXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24*a58d3d2aSXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25*a58d3d2aSXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*a58d3d2aSXin Li */ 27*a58d3d2aSXin Li 28*a58d3d2aSXin Li #if defined(HAVE_CONFIG_H) 29*a58d3d2aSXin Li #include "config.h" 30*a58d3d2aSXin Li #endif 31*a58d3d2aSXin Li 32*a58d3d2aSXin Li #include "x86/x86cpu.h" 33*a58d3d2aSXin Li #include "celt_lpc.h" 34*a58d3d2aSXin Li #include "pitch.h" 35*a58d3d2aSXin Li #include "pitch_sse.h" 36*a58d3d2aSXin Li #include "vq.h" 37*a58d3d2aSXin Li 38*a58d3d2aSXin Li #if defined(OPUS_HAVE_RTCD) 39*a58d3d2aSXin Li 40*a58d3d2aSXin Li # if defined(FIXED_POINT) 41*a58d3d2aSXin Li 42*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1) 43*a58d3d2aSXin Li 44*a58d3d2aSXin Li void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])( 45*a58d3d2aSXin Li const opus_val16 *x, 46*a58d3d2aSXin Li const opus_val16 *num, 47*a58d3d2aSXin Li opus_val16 *y, 48*a58d3d2aSXin Li int N, 49*a58d3d2aSXin Li int ord, 50*a58d3d2aSXin Li int arch 51*a58d3d2aSXin Li ) = { 52*a58d3d2aSXin Li celt_fir_c, /* non-sse */ 53*a58d3d2aSXin Li celt_fir_c, 54*a58d3d2aSXin Li celt_fir_c, 55*a58d3d2aSXin Li MAY_HAVE_SSE4_1(celt_fir), /* sse4.1 */ 56*a58d3d2aSXin Li MAY_HAVE_SSE4_1(celt_fir) /* avx */ 57*a58d3d2aSXin Li }; 58*a58d3d2aSXin Li 59*a58d3d2aSXin Li void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( 60*a58d3d2aSXin Li const opus_val16 *x, 61*a58d3d2aSXin Li const opus_val16 *y, 62*a58d3d2aSXin Li opus_val32 sum[4], 63*a58d3d2aSXin Li int len 64*a58d3d2aSXin Li ) = { 65*a58d3d2aSXin Li xcorr_kernel_c, /* non-sse */ 66*a58d3d2aSXin Li xcorr_kernel_c, 67*a58d3d2aSXin Li xcorr_kernel_c, 68*a58d3d2aSXin Li MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1 */ 69*a58d3d2aSXin Li MAY_HAVE_SSE4_1(xcorr_kernel) /* avx */ 70*a58d3d2aSXin Li }; 71*a58d3d2aSXin Li 72*a58d3d2aSXin Li #endif 73*a58d3d2aSXin Li 74*a58d3d2aSXin Li #if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ 75*a58d3d2aSXin Li (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) 76*a58d3d2aSXin Li 77*a58d3d2aSXin Li opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( 78*a58d3d2aSXin Li const opus_val16 *x, 79*a58d3d2aSXin Li const opus_val16 *y, 80*a58d3d2aSXin Li int N 81*a58d3d2aSXin Li ) = { 82*a58d3d2aSXin Li celt_inner_prod_c, /* non-sse */ 83*a58d3d2aSXin Li celt_inner_prod_c, 84*a58d3d2aSXin Li MAY_HAVE_SSE2(celt_inner_prod), 85*a58d3d2aSXin Li MAY_HAVE_SSE4_1(celt_inner_prod), /* sse4.1 */ 86*a58d3d2aSXin Li MAY_HAVE_SSE4_1(celt_inner_prod) /* avx */ 87*a58d3d2aSXin Li }; 88*a58d3d2aSXin Li 89*a58d3d2aSXin Li #endif 90*a58d3d2aSXin Li 91*a58d3d2aSXin Li # else 92*a58d3d2aSXin Li 93*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2) 94*a58d3d2aSXin Li 95*a58d3d2aSXin Li void (*const PITCH_XCORR_IMPL[OPUS_ARCHMASK + 1])( 96*a58d3d2aSXin Li const float *_x, 97*a58d3d2aSXin Li const float *_y, 98*a58d3d2aSXin Li float *xcorr, 99*a58d3d2aSXin Li int len, 100*a58d3d2aSXin Li int max_pitch, 101*a58d3d2aSXin Li int arch 102*a58d3d2aSXin Li ) = { 103*a58d3d2aSXin Li celt_pitch_xcorr_c, /* non-sse */ 104*a58d3d2aSXin Li celt_pitch_xcorr_c, 105*a58d3d2aSXin Li celt_pitch_xcorr_c, 106*a58d3d2aSXin Li celt_pitch_xcorr_c, 107*a58d3d2aSXin Li MAY_HAVE_AVX2(celt_pitch_xcorr) 108*a58d3d2aSXin Li }; 109*a58d3d2aSXin Li 110*a58d3d2aSXin Li #endif 111*a58d3d2aSXin Li 112*a58d3d2aSXin Li 113*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE) 114*a58d3d2aSXin Li 115*a58d3d2aSXin Li void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( 116*a58d3d2aSXin Li const opus_val16 *x, 117*a58d3d2aSXin Li const opus_val16 *y, 118*a58d3d2aSXin Li opus_val32 sum[4], 119*a58d3d2aSXin Li int len 120*a58d3d2aSXin Li ) = { 121*a58d3d2aSXin Li xcorr_kernel_c, /* non-sse */ 122*a58d3d2aSXin Li MAY_HAVE_SSE(xcorr_kernel), 123*a58d3d2aSXin Li MAY_HAVE_SSE(xcorr_kernel), 124*a58d3d2aSXin Li MAY_HAVE_SSE(xcorr_kernel), 125*a58d3d2aSXin Li MAY_HAVE_SSE(xcorr_kernel) 126*a58d3d2aSXin Li }; 127*a58d3d2aSXin Li 128*a58d3d2aSXin Li opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( 129*a58d3d2aSXin Li const opus_val16 *x, 130*a58d3d2aSXin Li const opus_val16 *y, 131*a58d3d2aSXin Li int N 132*a58d3d2aSXin Li ) = { 133*a58d3d2aSXin Li celt_inner_prod_c, /* non-sse */ 134*a58d3d2aSXin Li MAY_HAVE_SSE(celt_inner_prod), 135*a58d3d2aSXin Li MAY_HAVE_SSE(celt_inner_prod), 136*a58d3d2aSXin Li MAY_HAVE_SSE(celt_inner_prod), 137*a58d3d2aSXin Li MAY_HAVE_SSE(celt_inner_prod) 138*a58d3d2aSXin Li }; 139*a58d3d2aSXin Li 140*a58d3d2aSXin Li void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( 141*a58d3d2aSXin Li const opus_val16 *x, 142*a58d3d2aSXin Li const opus_val16 *y01, 143*a58d3d2aSXin Li const opus_val16 *y02, 144*a58d3d2aSXin Li int N, 145*a58d3d2aSXin Li opus_val32 *xy1, 146*a58d3d2aSXin Li opus_val32 *xy2 147*a58d3d2aSXin Li ) = { 148*a58d3d2aSXin Li dual_inner_prod_c, /* non-sse */ 149*a58d3d2aSXin Li MAY_HAVE_SSE(dual_inner_prod), 150*a58d3d2aSXin Li MAY_HAVE_SSE(dual_inner_prod), 151*a58d3d2aSXin Li MAY_HAVE_SSE(dual_inner_prod), 152*a58d3d2aSXin Li MAY_HAVE_SSE(dual_inner_prod) 153*a58d3d2aSXin Li }; 154*a58d3d2aSXin Li 155*a58d3d2aSXin Li void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( 156*a58d3d2aSXin Li opus_val32 *y, 157*a58d3d2aSXin Li opus_val32 *x, 158*a58d3d2aSXin Li int T, 159*a58d3d2aSXin Li int N, 160*a58d3d2aSXin Li opus_val16 g10, 161*a58d3d2aSXin Li opus_val16 g11, 162*a58d3d2aSXin Li opus_val16 g12 163*a58d3d2aSXin Li ) = { 164*a58d3d2aSXin Li comb_filter_const_c, /* non-sse */ 165*a58d3d2aSXin Li MAY_HAVE_SSE(comb_filter_const), 166*a58d3d2aSXin Li MAY_HAVE_SSE(comb_filter_const), 167*a58d3d2aSXin Li MAY_HAVE_SSE(comb_filter_const), 168*a58d3d2aSXin Li MAY_HAVE_SSE(comb_filter_const) 169*a58d3d2aSXin Li }; 170*a58d3d2aSXin Li 171*a58d3d2aSXin Li 172*a58d3d2aSXin Li #endif 173*a58d3d2aSXin Li 174*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2) 175*a58d3d2aSXin Li opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])( 176*a58d3d2aSXin Li celt_norm *_X, int *iy, int K, int N, int arch 177*a58d3d2aSXin Li ) = { 178*a58d3d2aSXin Li op_pvq_search_c, /* non-sse */ 179*a58d3d2aSXin Li op_pvq_search_c, 180*a58d3d2aSXin Li MAY_HAVE_SSE2(op_pvq_search), 181*a58d3d2aSXin Li MAY_HAVE_SSE2(op_pvq_search), 182*a58d3d2aSXin Li MAY_HAVE_SSE2(op_pvq_search) 183*a58d3d2aSXin Li }; 184*a58d3d2aSXin Li #endif 185*a58d3d2aSXin Li 186*a58d3d2aSXin Li #endif 187*a58d3d2aSXin Li #endif 188