xref: /aosp_15_r20/external/libopus/celt/arm/pitch_neon_intr.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /***********************************************************************
2*a58d3d2aSXin Li Copyright (c) 2017 Google Inc.
3*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without
4*a58d3d2aSXin Li modification, are permitted provided that the following conditions
5*a58d3d2aSXin Li are met:
6*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright notice,
7*a58d3d2aSXin Li this list of conditions and the following disclaimer.
8*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright
9*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the
10*a58d3d2aSXin Li documentation and/or other materials provided with the distribution.
11*a58d3d2aSXin Li - Neither the name of Internet Society, IETF or IETF Trust, nor the
12*a58d3d2aSXin Li names of specific contributors, may be used to endorse or promote
13*a58d3d2aSXin Li products derived from this software without specific prior written
14*a58d3d2aSXin Li permission.
15*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16*a58d3d2aSXin Li AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17*a58d3d2aSXin Li IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18*a58d3d2aSXin Li ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19*a58d3d2aSXin Li LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20*a58d3d2aSXin Li CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21*a58d3d2aSXin Li SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22*a58d3d2aSXin Li INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23*a58d3d2aSXin Li CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24*a58d3d2aSXin Li ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25*a58d3d2aSXin Li POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li ***********************************************************************/
27*a58d3d2aSXin Li 
28*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
29*a58d3d2aSXin Li #include "config.h"
30*a58d3d2aSXin Li #endif
31*a58d3d2aSXin Li 
32*a58d3d2aSXin Li #include <arm_neon.h>
33*a58d3d2aSXin Li #include "pitch.h"
34*a58d3d2aSXin Li 
35*a58d3d2aSXin Li #ifdef FIXED_POINT
36*a58d3d2aSXin Li 
celt_inner_prod_neon(const opus_val16 * x,const opus_val16 * y,int N)37*a58d3d2aSXin Li opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N)
38*a58d3d2aSXin Li {
39*a58d3d2aSXin Li     int i;
40*a58d3d2aSXin Li     opus_val32 xy;
41*a58d3d2aSXin Li     int16x8_t x_s16x8, y_s16x8;
42*a58d3d2aSXin Li     int32x4_t xy_s32x4 = vdupq_n_s32(0);
43*a58d3d2aSXin Li     int64x2_t xy_s64x2;
44*a58d3d2aSXin Li     int64x1_t xy_s64x1;
45*a58d3d2aSXin Li 
46*a58d3d2aSXin Li     for (i = 0; i < N - 7; i += 8) {
47*a58d3d2aSXin Li         x_s16x8  = vld1q_s16(&x[i]);
48*a58d3d2aSXin Li         y_s16x8  = vld1q_s16(&y[i]);
49*a58d3d2aSXin Li         xy_s32x4 = vmlal_s16(xy_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y_s16x8));
50*a58d3d2aSXin Li         xy_s32x4 = vmlal_s16(xy_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y_s16x8));
51*a58d3d2aSXin Li     }
52*a58d3d2aSXin Li 
53*a58d3d2aSXin Li     if (N - i >= 4) {
54*a58d3d2aSXin Li         const int16x4_t x_s16x4 = vld1_s16(&x[i]);
55*a58d3d2aSXin Li         const int16x4_t y_s16x4 = vld1_s16(&y[i]);
56*a58d3d2aSXin Li         xy_s32x4 = vmlal_s16(xy_s32x4, x_s16x4, y_s16x4);
57*a58d3d2aSXin Li         i += 4;
58*a58d3d2aSXin Li     }
59*a58d3d2aSXin Li 
60*a58d3d2aSXin Li     xy_s64x2 = vpaddlq_s32(xy_s32x4);
61*a58d3d2aSXin Li     xy_s64x1 = vadd_s64(vget_low_s64(xy_s64x2), vget_high_s64(xy_s64x2));
62*a58d3d2aSXin Li     xy       = vget_lane_s32(vreinterpret_s32_s64(xy_s64x1), 0);
63*a58d3d2aSXin Li 
64*a58d3d2aSXin Li     for (; i < N; i++) {
65*a58d3d2aSXin Li         xy = MAC16_16(xy, x[i], y[i]);
66*a58d3d2aSXin Li     }
67*a58d3d2aSXin Li 
68*a58d3d2aSXin Li #ifdef OPUS_CHECK_ASM
69*a58d3d2aSXin Li     celt_assert(celt_inner_prod_c(x, y, N) == xy);
70*a58d3d2aSXin Li #endif
71*a58d3d2aSXin Li 
72*a58d3d2aSXin Li     return xy;
73*a58d3d2aSXin Li }
74*a58d3d2aSXin Li 
dual_inner_prod_neon(const opus_val16 * x,const opus_val16 * y01,const opus_val16 * y02,int N,opus_val32 * xy1,opus_val32 * xy2)75*a58d3d2aSXin Li void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
76*a58d3d2aSXin Li         int N, opus_val32 *xy1, opus_val32 *xy2)
77*a58d3d2aSXin Li {
78*a58d3d2aSXin Li     int i;
79*a58d3d2aSXin Li     opus_val32 xy01, xy02;
80*a58d3d2aSXin Li     int16x8_t x_s16x8, y01_s16x8, y02_s16x8;
81*a58d3d2aSXin Li     int32x4_t xy01_s32x4 = vdupq_n_s32(0);
82*a58d3d2aSXin Li     int32x4_t xy02_s32x4 = vdupq_n_s32(0);
83*a58d3d2aSXin Li     int64x2_t xy01_s64x2, xy02_s64x2;
84*a58d3d2aSXin Li     int64x1_t xy01_s64x1, xy02_s64x1;
85*a58d3d2aSXin Li 
86*a58d3d2aSXin Li     for (i = 0; i < N - 7; i += 8) {
87*a58d3d2aSXin Li         x_s16x8    = vld1q_s16(&x[i]);
88*a58d3d2aSXin Li         y01_s16x8  = vld1q_s16(&y01[i]);
89*a58d3d2aSXin Li         y02_s16x8  = vld1q_s16(&y02[i]);
90*a58d3d2aSXin Li         xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y01_s16x8));
91*a58d3d2aSXin Li         xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y02_s16x8));
92*a58d3d2aSXin Li         xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y01_s16x8));
93*a58d3d2aSXin Li         xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y02_s16x8));
94*a58d3d2aSXin Li     }
95*a58d3d2aSXin Li 
96*a58d3d2aSXin Li     if (N - i >= 4) {
97*a58d3d2aSXin Li         const int16x4_t x_s16x4   = vld1_s16(&x[i]);
98*a58d3d2aSXin Li         const int16x4_t y01_s16x4 = vld1_s16(&y01[i]);
99*a58d3d2aSXin Li         const int16x4_t y02_s16x4 = vld1_s16(&y02[i]);
100*a58d3d2aSXin Li         xy01_s32x4 = vmlal_s16(xy01_s32x4, x_s16x4, y01_s16x4);
101*a58d3d2aSXin Li         xy02_s32x4 = vmlal_s16(xy02_s32x4, x_s16x4, y02_s16x4);
102*a58d3d2aSXin Li         i += 4;
103*a58d3d2aSXin Li     }
104*a58d3d2aSXin Li 
105*a58d3d2aSXin Li     xy01_s64x2 = vpaddlq_s32(xy01_s32x4);
106*a58d3d2aSXin Li     xy02_s64x2 = vpaddlq_s32(xy02_s32x4);
107*a58d3d2aSXin Li     xy01_s64x1 = vadd_s64(vget_low_s64(xy01_s64x2), vget_high_s64(xy01_s64x2));
108*a58d3d2aSXin Li     xy02_s64x1 = vadd_s64(vget_low_s64(xy02_s64x2), vget_high_s64(xy02_s64x2));
109*a58d3d2aSXin Li     xy01       = vget_lane_s32(vreinterpret_s32_s64(xy01_s64x1), 0);
110*a58d3d2aSXin Li     xy02       = vget_lane_s32(vreinterpret_s32_s64(xy02_s64x1), 0);
111*a58d3d2aSXin Li 
112*a58d3d2aSXin Li     for (; i < N; i++) {
113*a58d3d2aSXin Li         xy01 = MAC16_16(xy01, x[i], y01[i]);
114*a58d3d2aSXin Li         xy02 = MAC16_16(xy02, x[i], y02[i]);
115*a58d3d2aSXin Li     }
116*a58d3d2aSXin Li     *xy1 = xy01;
117*a58d3d2aSXin Li     *xy2 = xy02;
118*a58d3d2aSXin Li 
119*a58d3d2aSXin Li #ifdef OPUS_CHECK_ASM
120*a58d3d2aSXin Li     {
121*a58d3d2aSXin Li         opus_val32 xy1_c, xy2_c;
122*a58d3d2aSXin Li         dual_inner_prod_c(x, y01, y02, N, &xy1_c, &xy2_c);
123*a58d3d2aSXin Li         celt_assert(xy1_c == *xy1);
124*a58d3d2aSXin Li         celt_assert(xy2_c == *xy2);
125*a58d3d2aSXin Li     }
126*a58d3d2aSXin Li #endif
127*a58d3d2aSXin Li }
128*a58d3d2aSXin Li 
129*a58d3d2aSXin Li #else /* !FIXED_POINT */
130*a58d3d2aSXin Li 
131*a58d3d2aSXin Li /* ========================================================================== */
132*a58d3d2aSXin Li 
133*a58d3d2aSXin Li #ifdef __ARM_FEATURE_FMA
134*a58d3d2aSXin Li /* If we can, force the compiler to use an FMA instruction rather than break
135*a58d3d2aSXin Li    vmlaq_f32() into fmul/fadd. */
136*a58d3d2aSXin Li #define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)
137*a58d3d2aSXin Li #endif
138*a58d3d2aSXin Li 
139*a58d3d2aSXin Li 
140*a58d3d2aSXin Li #ifdef OPUS_CHECK_ASM
141*a58d3d2aSXin Li 
142*a58d3d2aSXin Li /* This part of code simulates floating-point NEON operations. */
143*a58d3d2aSXin Li 
144*a58d3d2aSXin Li /* celt_inner_prod_neon_float_c_simulation() simulates the floating-point   */
145*a58d3d2aSXin Li /* operations of celt_inner_prod_neon(), and both functions should have bit */
146*a58d3d2aSXin Li /* exact output.                                                            */
celt_inner_prod_neon_float_c_simulation(const opus_val16 * x,const opus_val16 * y,float * err,int N)147*a58d3d2aSXin Li static opus_val32 celt_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y, float *err, int N)
148*a58d3d2aSXin Li {
149*a58d3d2aSXin Li    int i;
150*a58d3d2aSXin Li    *err = 0;
151*a58d3d2aSXin Li    opus_val32 xy, xy0 = 0, xy1 = 0, xy2 = 0, xy3 = 0;
152*a58d3d2aSXin Li    for (i = 0; i < N - 3; i += 4) {
153*a58d3d2aSXin Li       xy0 = MAC16_16(xy0, x[i + 0], y[i + 0]);
154*a58d3d2aSXin Li       xy1 = MAC16_16(xy1, x[i + 1], y[i + 1]);
155*a58d3d2aSXin Li       xy2 = MAC16_16(xy2, x[i + 2], y[i + 2]);
156*a58d3d2aSXin Li       xy3 = MAC16_16(xy3, x[i + 3], y[i + 3]);
157*a58d3d2aSXin Li       *err += ABS32(xy0)+ABS32(xy1)+ABS32(xy2)+ABS32(xy3);
158*a58d3d2aSXin Li    }
159*a58d3d2aSXin Li    xy0 += xy2;
160*a58d3d2aSXin Li    xy1 += xy3;
161*a58d3d2aSXin Li    xy = xy0 + xy1;
162*a58d3d2aSXin Li    *err += ABS32(xy1)+ABS32(xy0)+ABS32(xy);
163*a58d3d2aSXin Li    for (; i < N; i++) {
164*a58d3d2aSXin Li       xy = MAC16_16(xy, x[i], y[i]);
165*a58d3d2aSXin Li       *err += ABS32(xy);
166*a58d3d2aSXin Li    }
167*a58d3d2aSXin Li    *err = *err*2e-7 + N*1e-37;
168*a58d3d2aSXin Li    return xy;
169*a58d3d2aSXin Li }
170*a58d3d2aSXin Li 
171*a58d3d2aSXin Li /* dual_inner_prod_neon_float_c_simulation() simulates the floating-point   */
172*a58d3d2aSXin Li /* operations of dual_inner_prod_neon(), and both functions should have bit */
173*a58d3d2aSXin Li /* exact output.                                                            */
dual_inner_prod_neon_float_c_simulation(const opus_val16 * x,const opus_val16 * y01,const opus_val16 * y02,int N,opus_val32 * xy1,opus_val32 * xy2,float * err)174*a58d3d2aSXin Li static void dual_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
175*a58d3d2aSXin Li       int N, opus_val32 *xy1, opus_val32 *xy2, float *err)
176*a58d3d2aSXin Li {
177*a58d3d2aSXin Li    *xy1 = celt_inner_prod_neon_float_c_simulation(x, y01, &err[0], N);
178*a58d3d2aSXin Li    *xy2 = celt_inner_prod_neon_float_c_simulation(x, y02, &err[1], N);
179*a58d3d2aSXin Li }
180*a58d3d2aSXin Li 
181*a58d3d2aSXin Li #endif /* OPUS_CHECK_ASM */
182*a58d3d2aSXin Li 
183*a58d3d2aSXin Li /* ========================================================================== */
184*a58d3d2aSXin Li 
celt_inner_prod_neon(const opus_val16 * x,const opus_val16 * y,int N)185*a58d3d2aSXin Li opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N)
186*a58d3d2aSXin Li {
187*a58d3d2aSXin Li     int i;
188*a58d3d2aSXin Li     opus_val32 xy;
189*a58d3d2aSXin Li     float32x4_t xy_f32x4 = vdupq_n_f32(0);
190*a58d3d2aSXin Li     float32x2_t xy_f32x2;
191*a58d3d2aSXin Li 
192*a58d3d2aSXin Li     for (i = 0; i < N - 7; i += 8) {
193*a58d3d2aSXin Li         float32x4_t x_f32x4, y_f32x4;
194*a58d3d2aSXin Li         x_f32x4  = vld1q_f32(&x[i]);
195*a58d3d2aSXin Li         y_f32x4  = vld1q_f32(&y[i]);
196*a58d3d2aSXin Li         xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
197*a58d3d2aSXin Li         x_f32x4  = vld1q_f32(&x[i + 4]);
198*a58d3d2aSXin Li         y_f32x4  = vld1q_f32(&y[i + 4]);
199*a58d3d2aSXin Li         xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
200*a58d3d2aSXin Li     }
201*a58d3d2aSXin Li 
202*a58d3d2aSXin Li     if (N - i >= 4) {
203*a58d3d2aSXin Li         const float32x4_t x_f32x4 = vld1q_f32(&x[i]);
204*a58d3d2aSXin Li         const float32x4_t y_f32x4 = vld1q_f32(&y[i]);
205*a58d3d2aSXin Li         xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
206*a58d3d2aSXin Li         i += 4;
207*a58d3d2aSXin Li     }
208*a58d3d2aSXin Li 
209*a58d3d2aSXin Li     xy_f32x2 = vadd_f32(vget_low_f32(xy_f32x4), vget_high_f32(xy_f32x4));
210*a58d3d2aSXin Li     xy_f32x2 = vpadd_f32(xy_f32x2, xy_f32x2);
211*a58d3d2aSXin Li     xy       = vget_lane_f32(xy_f32x2, 0);
212*a58d3d2aSXin Li 
213*a58d3d2aSXin Li     for (; i < N; i++) {
214*a58d3d2aSXin Li         xy = MAC16_16(xy, x[i], y[i]);
215*a58d3d2aSXin Li     }
216*a58d3d2aSXin Li 
217*a58d3d2aSXin Li #ifdef OPUS_CHECK_ASM
218*a58d3d2aSXin Li     {
219*a58d3d2aSXin Li         float err, res;
220*a58d3d2aSXin Li         res = celt_inner_prod_neon_float_c_simulation(x, y, &err, N);
221*a58d3d2aSXin Li         /*if (ABS32(res - xy) > err) fprintf(stderr, "%g %g %g\n", res, xy, err);*/
222*a58d3d2aSXin Li         celt_assert(ABS32(res - xy) <= err);
223*a58d3d2aSXin Li     }
224*a58d3d2aSXin Li #endif
225*a58d3d2aSXin Li 
226*a58d3d2aSXin Li     return xy;
227*a58d3d2aSXin Li }
228*a58d3d2aSXin Li 
dual_inner_prod_neon(const opus_val16 * x,const opus_val16 * y01,const opus_val16 * y02,int N,opus_val32 * xy1,opus_val32 * xy2)229*a58d3d2aSXin Li void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
230*a58d3d2aSXin Li         int N, opus_val32 *xy1, opus_val32 *xy2)
231*a58d3d2aSXin Li {
232*a58d3d2aSXin Li     int i;
233*a58d3d2aSXin Li     opus_val32 xy01, xy02;
234*a58d3d2aSXin Li     float32x4_t xy01_f32x4 = vdupq_n_f32(0);
235*a58d3d2aSXin Li     float32x4_t xy02_f32x4 = vdupq_n_f32(0);
236*a58d3d2aSXin Li     float32x2_t xy01_f32x2, xy02_f32x2;
237*a58d3d2aSXin Li 
238*a58d3d2aSXin Li     for (i = 0; i < N - 7; i += 8) {
239*a58d3d2aSXin Li         float32x4_t x_f32x4, y01_f32x4, y02_f32x4;
240*a58d3d2aSXin Li         x_f32x4    = vld1q_f32(&x[i]);
241*a58d3d2aSXin Li         y01_f32x4  = vld1q_f32(&y01[i]);
242*a58d3d2aSXin Li         y02_f32x4  = vld1q_f32(&y02[i]);
243*a58d3d2aSXin Li         xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
244*a58d3d2aSXin Li         xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
245*a58d3d2aSXin Li         x_f32x4    = vld1q_f32(&x[i + 4]);
246*a58d3d2aSXin Li         y01_f32x4  = vld1q_f32(&y01[i + 4]);
247*a58d3d2aSXin Li         y02_f32x4  = vld1q_f32(&y02[i + 4]);
248*a58d3d2aSXin Li         xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
249*a58d3d2aSXin Li         xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
250*a58d3d2aSXin Li     }
251*a58d3d2aSXin Li 
252*a58d3d2aSXin Li     if (N - i >= 4) {
253*a58d3d2aSXin Li         const float32x4_t x_f32x4   = vld1q_f32(&x[i]);
254*a58d3d2aSXin Li         const float32x4_t y01_f32x4 = vld1q_f32(&y01[i]);
255*a58d3d2aSXin Li         const float32x4_t y02_f32x4 = vld1q_f32(&y02[i]);
256*a58d3d2aSXin Li         xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
257*a58d3d2aSXin Li         xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
258*a58d3d2aSXin Li         i += 4;
259*a58d3d2aSXin Li     }
260*a58d3d2aSXin Li 
261*a58d3d2aSXin Li     xy01_f32x2 = vadd_f32(vget_low_f32(xy01_f32x4), vget_high_f32(xy01_f32x4));
262*a58d3d2aSXin Li     xy02_f32x2 = vadd_f32(vget_low_f32(xy02_f32x4), vget_high_f32(xy02_f32x4));
263*a58d3d2aSXin Li     xy01_f32x2 = vpadd_f32(xy01_f32x2, xy01_f32x2);
264*a58d3d2aSXin Li     xy02_f32x2 = vpadd_f32(xy02_f32x2, xy02_f32x2);
265*a58d3d2aSXin Li     xy01       = vget_lane_f32(xy01_f32x2, 0);
266*a58d3d2aSXin Li     xy02       = vget_lane_f32(xy02_f32x2, 0);
267*a58d3d2aSXin Li 
268*a58d3d2aSXin Li     for (; i < N; i++) {
269*a58d3d2aSXin Li         xy01 = MAC16_16(xy01, x[i], y01[i]);
270*a58d3d2aSXin Li         xy02 = MAC16_16(xy02, x[i], y02[i]);
271*a58d3d2aSXin Li     }
272*a58d3d2aSXin Li     *xy1 = xy01;
273*a58d3d2aSXin Li     *xy2 = xy02;
274*a58d3d2aSXin Li 
275*a58d3d2aSXin Li #ifdef OPUS_CHECK_ASM
276*a58d3d2aSXin Li     {
277*a58d3d2aSXin Li         opus_val32 xy1_c, xy2_c;
278*a58d3d2aSXin Li         float err[2];
279*a58d3d2aSXin Li         dual_inner_prod_neon_float_c_simulation(x, y01, y02, N, &xy1_c, &xy2_c, err);
280*a58d3d2aSXin Li         /*if (ABS32(xy1_c - *xy1) > err[0]) fprintf(stderr, "dual1 fail: %g %g %g\n", xy1_c, *xy1, err[0]);
281*a58d3d2aSXin Li         if (ABS32(xy2_c - *xy2) > err[1]) fprintf(stderr, "dual2 fail: %g %g %g\n", xy2_c, *xy2, err[1]);*/
282*a58d3d2aSXin Li         celt_assert(ABS32(xy1_c - *xy1) <= err[0]);
283*a58d3d2aSXin Li         celt_assert(ABS32(xy2_c - *xy2) <= err[1]);
284*a58d3d2aSXin Li     }
285*a58d3d2aSXin Li #endif
286*a58d3d2aSXin Li }
287*a58d3d2aSXin Li 
288*a58d3d2aSXin Li #endif /* FIXED_POINT */
289