xref: /aosp_15_r20/external/libopus/celt/mips/pitch_mipsr1.h (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2007-2008 CSIRO
2*a58d3d2aSXin Li    Copyright (c) 2007-2009 Xiph.Org Foundation
3*a58d3d2aSXin Li    Written by Jean-Marc Valin */
4*a58d3d2aSXin Li /**
5*a58d3d2aSXin Li    @file pitch.h
6*a58d3d2aSXin Li    @brief Pitch analysis
7*a58d3d2aSXin Li  */
8*a58d3d2aSXin Li 
9*a58d3d2aSXin Li /*
10*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
11*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
12*a58d3d2aSXin Li    are met:
13*a58d3d2aSXin Li 
14*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
15*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
16*a58d3d2aSXin Li 
17*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
18*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
19*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
20*a58d3d2aSXin Li 
21*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25*a58d3d2aSXin Li    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*a58d3d2aSXin Li */
33*a58d3d2aSXin Li 
34*a58d3d2aSXin Li #ifndef PITCH_MIPSR1_H
35*a58d3d2aSXin Li #define PITCH_MIPSR1_H
36*a58d3d2aSXin Li 
37*a58d3d2aSXin Li #define OVERRIDE_DUAL_INNER_PROD
dual_inner_prod(const opus_val16 * x,const opus_val16 * y01,const opus_val16 * y02,int N,opus_val32 * xy1,opus_val32 * xy2,int arch)38*a58d3d2aSXin Li static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
39*a58d3d2aSXin Li       int N, opus_val32 *xy1, opus_val32 *xy2, int arch)
40*a58d3d2aSXin Li {
41*a58d3d2aSXin Li    int j;
42*a58d3d2aSXin Li    opus_val32 xy01=0;
43*a58d3d2aSXin Li    opus_val32 xy02=0;
44*a58d3d2aSXin Li 
45*a58d3d2aSXin Li    (void)arch;
46*a58d3d2aSXin Li 
47*a58d3d2aSXin Li    asm volatile("MULT $ac1, $0, $0");
48*a58d3d2aSXin Li    asm volatile("MULT $ac2, $0, $0");
49*a58d3d2aSXin Li    /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
50*a58d3d2aSXin Li    for (j=0;j<N;j++)
51*a58d3d2aSXin Li    {
52*a58d3d2aSXin Li       asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
53*a58d3d2aSXin Li       asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
54*a58d3d2aSXin Li       ++j;
55*a58d3d2aSXin Li       asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
56*a58d3d2aSXin Li       asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
57*a58d3d2aSXin Li    }
58*a58d3d2aSXin Li    asm volatile ("mflo %0, $ac1": "=r"(xy01));
59*a58d3d2aSXin Li    asm volatile ("mflo %0, $ac2": "=r"(xy02));
60*a58d3d2aSXin Li    *xy1 = xy01;
61*a58d3d2aSXin Li    *xy2 = xy02;
62*a58d3d2aSXin Li }
63*a58d3d2aSXin Li 
xcorr_kernel_mips(const opus_val16 * x,const opus_val16 * y,opus_val32 sum[4],int len)64*a58d3d2aSXin Li static inline void xcorr_kernel_mips(const opus_val16 * x,
65*a58d3d2aSXin Li       const opus_val16 * y, opus_val32 sum[4], int len)
66*a58d3d2aSXin Li {
67*a58d3d2aSXin Li    int j;
68*a58d3d2aSXin Li    opus_val16 y_0, y_1, y_2, y_3;
69*a58d3d2aSXin Li 
70*a58d3d2aSXin Li     opus_int64 sum_0, sum_1, sum_2, sum_3;
71*a58d3d2aSXin Li     sum_0 =  (opus_int64)sum[0];
72*a58d3d2aSXin Li     sum_1 =  (opus_int64)sum[1];
73*a58d3d2aSXin Li     sum_2 =  (opus_int64)sum[2];
74*a58d3d2aSXin Li     sum_3 =  (opus_int64)sum[3];
75*a58d3d2aSXin Li 
76*a58d3d2aSXin Li     y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
77*a58d3d2aSXin Li     y_0=*y++;
78*a58d3d2aSXin Li     y_1=*y++;
79*a58d3d2aSXin Li     y_2=*y++;
80*a58d3d2aSXin Li     for (j=0;j<len-3;j+=4)
81*a58d3d2aSXin Li     {
82*a58d3d2aSXin Li         opus_val16 tmp;
83*a58d3d2aSXin Li         tmp = *x++;
84*a58d3d2aSXin Li         y_3=*y++;
85*a58d3d2aSXin Li 
86*a58d3d2aSXin Li         sum_0 = __builtin_mips_madd( sum_0, tmp, y_0);
87*a58d3d2aSXin Li         sum_1 = __builtin_mips_madd( sum_1, tmp, y_1);
88*a58d3d2aSXin Li         sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
89*a58d3d2aSXin Li         sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
90*a58d3d2aSXin Li 
91*a58d3d2aSXin Li         tmp=*x++;
92*a58d3d2aSXin Li         y_0=*y++;
93*a58d3d2aSXin Li 
94*a58d3d2aSXin Li         sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
95*a58d3d2aSXin Li         sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
96*a58d3d2aSXin Li         sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
97*a58d3d2aSXin Li         sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
98*a58d3d2aSXin Li 
99*a58d3d2aSXin Li        tmp=*x++;
100*a58d3d2aSXin Li        y_1=*y++;
101*a58d3d2aSXin Li 
102*a58d3d2aSXin Li        sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
103*a58d3d2aSXin Li        sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
104*a58d3d2aSXin Li        sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
105*a58d3d2aSXin Li        sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
106*a58d3d2aSXin Li 
107*a58d3d2aSXin Li 
108*a58d3d2aSXin Li       tmp=*x++;
109*a58d3d2aSXin Li       y_2=*y++;
110*a58d3d2aSXin Li 
111*a58d3d2aSXin Li        sum_0 = __builtin_mips_madd( sum_0, tmp, y_3 );
112*a58d3d2aSXin Li        sum_1 = __builtin_mips_madd( sum_1, tmp, y_0 );
113*a58d3d2aSXin Li        sum_2 = __builtin_mips_madd( sum_2, tmp, y_1);
114*a58d3d2aSXin Li        sum_3 = __builtin_mips_madd( sum_3, tmp, y_2);
115*a58d3d2aSXin Li 
116*a58d3d2aSXin Li    }
117*a58d3d2aSXin Li    if (j++<len)
118*a58d3d2aSXin Li    {
119*a58d3d2aSXin Li       opus_val16 tmp = *x++;
120*a58d3d2aSXin Li       y_3=*y++;
121*a58d3d2aSXin Li 
122*a58d3d2aSXin Li        sum_0 = __builtin_mips_madd( sum_0, tmp, y_0 );
123*a58d3d2aSXin Li        sum_1 = __builtin_mips_madd( sum_1, tmp, y_1 );
124*a58d3d2aSXin Li        sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
125*a58d3d2aSXin Li        sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
126*a58d3d2aSXin Li    }
127*a58d3d2aSXin Li 
128*a58d3d2aSXin Li    if (j++<len)
129*a58d3d2aSXin Li    {
130*a58d3d2aSXin Li       opus_val16 tmp=*x++;
131*a58d3d2aSXin Li       y_0=*y++;
132*a58d3d2aSXin Li 
133*a58d3d2aSXin Li       sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
134*a58d3d2aSXin Li       sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
135*a58d3d2aSXin Li       sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
136*a58d3d2aSXin Li       sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
137*a58d3d2aSXin Li    }
138*a58d3d2aSXin Li 
139*a58d3d2aSXin Li    if (j<len)
140*a58d3d2aSXin Li    {
141*a58d3d2aSXin Li       opus_val16 tmp=*x++;
142*a58d3d2aSXin Li       y_1=*y++;
143*a58d3d2aSXin Li 
144*a58d3d2aSXin Li        sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
145*a58d3d2aSXin Li        sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
146*a58d3d2aSXin Li        sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
147*a58d3d2aSXin Li        sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
148*a58d3d2aSXin Li 
149*a58d3d2aSXin Li    }
150*a58d3d2aSXin Li 
151*a58d3d2aSXin Li    sum[0] = (opus_val32)sum_0;
152*a58d3d2aSXin Li    sum[1] = (opus_val32)sum_1;
153*a58d3d2aSXin Li    sum[2] = (opus_val32)sum_2;
154*a58d3d2aSXin Li    sum[3] = (opus_val32)sum_3;
155*a58d3d2aSXin Li }
156*a58d3d2aSXin Li 
157*a58d3d2aSXin Li #define OVERRIDE_XCORR_KERNEL
158*a58d3d2aSXin Li #define xcorr_kernel(x, y, sum, len, arch) \
159*a58d3d2aSXin Li     ((void)(arch), xcorr_kernel_mips(x, y, sum, len))
160*a58d3d2aSXin Li 
161*a58d3d2aSXin Li #endif /* PITCH_MIPSR1_H */
162