xref: /aosp_15_r20/external/libopus/celt/x86/x86_celt_map.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2014, Cisco Systems, INC
2*a58d3d2aSXin Li    Written by XiangMingZhu WeiZhou MinPeng YanWang
3*a58d3d2aSXin Li 
4*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
5*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
6*a58d3d2aSXin Li    are met:
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
9*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
10*a58d3d2aSXin Li 
11*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
12*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
13*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
14*a58d3d2aSXin Li 
15*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19*a58d3d2aSXin Li    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li */
27*a58d3d2aSXin Li 
28*a58d3d2aSXin Li #if defined(HAVE_CONFIG_H)
29*a58d3d2aSXin Li #include "config.h"
30*a58d3d2aSXin Li #endif
31*a58d3d2aSXin Li 
32*a58d3d2aSXin Li #include "x86/x86cpu.h"
33*a58d3d2aSXin Li #include "celt_lpc.h"
34*a58d3d2aSXin Li #include "pitch.h"
35*a58d3d2aSXin Li #include "pitch_sse.h"
36*a58d3d2aSXin Li #include "vq.h"
37*a58d3d2aSXin Li 
38*a58d3d2aSXin Li #if defined(OPUS_HAVE_RTCD)
39*a58d3d2aSXin Li 
40*a58d3d2aSXin Li # if defined(FIXED_POINT)
41*a58d3d2aSXin Li 
42*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)
43*a58d3d2aSXin Li 
44*a58d3d2aSXin Li void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
45*a58d3d2aSXin Li          const opus_val16 *x,
46*a58d3d2aSXin Li          const opus_val16 *num,
47*a58d3d2aSXin Li          opus_val16       *y,
48*a58d3d2aSXin Li          int              N,
49*a58d3d2aSXin Li          int              ord,
50*a58d3d2aSXin Li          int              arch
51*a58d3d2aSXin Li ) = {
52*a58d3d2aSXin Li   celt_fir_c,                /* non-sse */
53*a58d3d2aSXin Li   celt_fir_c,
54*a58d3d2aSXin Li   celt_fir_c,
55*a58d3d2aSXin Li   MAY_HAVE_SSE4_1(celt_fir), /* sse4.1  */
56*a58d3d2aSXin Li   MAY_HAVE_SSE4_1(celt_fir)  /* avx  */
57*a58d3d2aSXin Li };
58*a58d3d2aSXin Li 
59*a58d3d2aSXin Li void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
60*a58d3d2aSXin Li          const opus_val16 *x,
61*a58d3d2aSXin Li          const opus_val16 *y,
62*a58d3d2aSXin Li          opus_val32       sum[4],
63*a58d3d2aSXin Li          int              len
64*a58d3d2aSXin Li ) = {
65*a58d3d2aSXin Li   xcorr_kernel_c,                /* non-sse */
66*a58d3d2aSXin Li   xcorr_kernel_c,
67*a58d3d2aSXin Li   xcorr_kernel_c,
68*a58d3d2aSXin Li   MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1  */
69*a58d3d2aSXin Li   MAY_HAVE_SSE4_1(xcorr_kernel)  /* avx  */
70*a58d3d2aSXin Li };
71*a58d3d2aSXin Li 
72*a58d3d2aSXin Li #endif
73*a58d3d2aSXin Li 
74*a58d3d2aSXin Li #if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) ||  \
75*a58d3d2aSXin Li  (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
76*a58d3d2aSXin Li 
77*a58d3d2aSXin Li opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
78*a58d3d2aSXin Li          const opus_val16 *x,
79*a58d3d2aSXin Li          const opus_val16 *y,
80*a58d3d2aSXin Li          int              N
81*a58d3d2aSXin Li ) = {
82*a58d3d2aSXin Li   celt_inner_prod_c,                /* non-sse */
83*a58d3d2aSXin Li   celt_inner_prod_c,
84*a58d3d2aSXin Li   MAY_HAVE_SSE2(celt_inner_prod),
85*a58d3d2aSXin Li   MAY_HAVE_SSE4_1(celt_inner_prod), /* sse4.1  */
86*a58d3d2aSXin Li   MAY_HAVE_SSE4_1(celt_inner_prod)  /* avx  */
87*a58d3d2aSXin Li };
88*a58d3d2aSXin Li 
89*a58d3d2aSXin Li #endif
90*a58d3d2aSXin Li 
91*a58d3d2aSXin Li # else
92*a58d3d2aSXin Li 
93*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)
94*a58d3d2aSXin Li 
95*a58d3d2aSXin Li void (*const PITCH_XCORR_IMPL[OPUS_ARCHMASK + 1])(
96*a58d3d2aSXin Li          const float *_x,
97*a58d3d2aSXin Li          const float *_y,
98*a58d3d2aSXin Li          float *xcorr,
99*a58d3d2aSXin Li          int len,
100*a58d3d2aSXin Li          int max_pitch,
101*a58d3d2aSXin Li          int arch
102*a58d3d2aSXin Li ) = {
103*a58d3d2aSXin Li   celt_pitch_xcorr_c,                /* non-sse */
104*a58d3d2aSXin Li   celt_pitch_xcorr_c,
105*a58d3d2aSXin Li   celt_pitch_xcorr_c,
106*a58d3d2aSXin Li   celt_pitch_xcorr_c,
107*a58d3d2aSXin Li   MAY_HAVE_AVX2(celt_pitch_xcorr)
108*a58d3d2aSXin Li };
109*a58d3d2aSXin Li 
110*a58d3d2aSXin Li #endif
111*a58d3d2aSXin Li 
112*a58d3d2aSXin Li 
113*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)
114*a58d3d2aSXin Li 
115*a58d3d2aSXin Li void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
116*a58d3d2aSXin Li          const opus_val16 *x,
117*a58d3d2aSXin Li          const opus_val16 *y,
118*a58d3d2aSXin Li          opus_val32       sum[4],
119*a58d3d2aSXin Li          int              len
120*a58d3d2aSXin Li ) = {
121*a58d3d2aSXin Li   xcorr_kernel_c,                /* non-sse */
122*a58d3d2aSXin Li   MAY_HAVE_SSE(xcorr_kernel),
123*a58d3d2aSXin Li   MAY_HAVE_SSE(xcorr_kernel),
124*a58d3d2aSXin Li   MAY_HAVE_SSE(xcorr_kernel),
125*a58d3d2aSXin Li   MAY_HAVE_SSE(xcorr_kernel)
126*a58d3d2aSXin Li };
127*a58d3d2aSXin Li 
128*a58d3d2aSXin Li opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
129*a58d3d2aSXin Li          const opus_val16 *x,
130*a58d3d2aSXin Li          const opus_val16 *y,
131*a58d3d2aSXin Li          int              N
132*a58d3d2aSXin Li ) = {
133*a58d3d2aSXin Li   celt_inner_prod_c,                /* non-sse */
134*a58d3d2aSXin Li   MAY_HAVE_SSE(celt_inner_prod),
135*a58d3d2aSXin Li   MAY_HAVE_SSE(celt_inner_prod),
136*a58d3d2aSXin Li   MAY_HAVE_SSE(celt_inner_prod),
137*a58d3d2aSXin Li   MAY_HAVE_SSE(celt_inner_prod)
138*a58d3d2aSXin Li };
139*a58d3d2aSXin Li 
140*a58d3d2aSXin Li void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
141*a58d3d2aSXin Li                     const opus_val16 *x,
142*a58d3d2aSXin Li                     const opus_val16 *y01,
143*a58d3d2aSXin Li                     const opus_val16 *y02,
144*a58d3d2aSXin Li                     int               N,
145*a58d3d2aSXin Li                     opus_val32       *xy1,
146*a58d3d2aSXin Li                     opus_val32       *xy2
147*a58d3d2aSXin Li ) = {
148*a58d3d2aSXin Li   dual_inner_prod_c,                /* non-sse */
149*a58d3d2aSXin Li   MAY_HAVE_SSE(dual_inner_prod),
150*a58d3d2aSXin Li   MAY_HAVE_SSE(dual_inner_prod),
151*a58d3d2aSXin Li   MAY_HAVE_SSE(dual_inner_prod),
152*a58d3d2aSXin Li   MAY_HAVE_SSE(dual_inner_prod)
153*a58d3d2aSXin Li };
154*a58d3d2aSXin Li 
155*a58d3d2aSXin Li void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
156*a58d3d2aSXin Li               opus_val32 *y,
157*a58d3d2aSXin Li               opus_val32 *x,
158*a58d3d2aSXin Li               int         T,
159*a58d3d2aSXin Li               int         N,
160*a58d3d2aSXin Li               opus_val16  g10,
161*a58d3d2aSXin Li               opus_val16  g11,
162*a58d3d2aSXin Li               opus_val16  g12
163*a58d3d2aSXin Li ) = {
164*a58d3d2aSXin Li   comb_filter_const_c,                /* non-sse */
165*a58d3d2aSXin Li   MAY_HAVE_SSE(comb_filter_const),
166*a58d3d2aSXin Li   MAY_HAVE_SSE(comb_filter_const),
167*a58d3d2aSXin Li   MAY_HAVE_SSE(comb_filter_const),
168*a58d3d2aSXin Li   MAY_HAVE_SSE(comb_filter_const)
169*a58d3d2aSXin Li };
170*a58d3d2aSXin Li 
171*a58d3d2aSXin Li 
172*a58d3d2aSXin Li #endif
173*a58d3d2aSXin Li 
174*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)
175*a58d3d2aSXin Li opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
176*a58d3d2aSXin Li       celt_norm *_X, int *iy, int K, int N, int arch
177*a58d3d2aSXin Li ) = {
178*a58d3d2aSXin Li   op_pvq_search_c,                /* non-sse */
179*a58d3d2aSXin Li   op_pvq_search_c,
180*a58d3d2aSXin Li   MAY_HAVE_SSE2(op_pvq_search),
181*a58d3d2aSXin Li   MAY_HAVE_SSE2(op_pvq_search),
182*a58d3d2aSXin Li   MAY_HAVE_SSE2(op_pvq_search)
183*a58d3d2aSXin Li };
184*a58d3d2aSXin Li #endif
185*a58d3d2aSXin Li 
186*a58d3d2aSXin Li #endif
187*a58d3d2aSXin Li #endif
188