xref: /aosp_15_r20/external/liblc3/src/mdct_neon.h (revision 49fe348c0058011ee60b6957cdd9d52742df84bc)
1*49fe348cSAndroid Build Coastguard Worker /******************************************************************************
2*49fe348cSAndroid Build Coastguard Worker  *
3*49fe348cSAndroid Build Coastguard Worker  *  Copyright 2022 Google LLC
4*49fe348cSAndroid Build Coastguard Worker  *
5*49fe348cSAndroid Build Coastguard Worker  *  Licensed under the Apache License, Version 2.0 (the "License");
6*49fe348cSAndroid Build Coastguard Worker  *  you may not use this file except in compliance with the License.
7*49fe348cSAndroid Build Coastguard Worker  *  You may obtain a copy of the License at:
8*49fe348cSAndroid Build Coastguard Worker  *
9*49fe348cSAndroid Build Coastguard Worker  *  http://www.apache.org/licenses/LICENSE-2.0
10*49fe348cSAndroid Build Coastguard Worker  *
11*49fe348cSAndroid Build Coastguard Worker  *  Unless required by applicable law or agreed to in writing, software
12*49fe348cSAndroid Build Coastguard Worker  *  distributed under the License is distributed on an "AS IS" BASIS,
13*49fe348cSAndroid Build Coastguard Worker  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*49fe348cSAndroid Build Coastguard Worker  *  See the License for the specific language governing permissions and
15*49fe348cSAndroid Build Coastguard Worker  *  limitations under the License.
16*49fe348cSAndroid Build Coastguard Worker  *
17*49fe348cSAndroid Build Coastguard Worker  ******************************************************************************/
18*49fe348cSAndroid Build Coastguard Worker 
19*49fe348cSAndroid Build Coastguard Worker #if __ARM_NEON && __ARM_ARCH_ISA_A64 && \
20*49fe348cSAndroid Build Coastguard Worker         !defined(TEST_ARM) || defined(TEST_NEON)
21*49fe348cSAndroid Build Coastguard Worker 
22*49fe348cSAndroid Build Coastguard Worker #ifndef TEST_NEON
23*49fe348cSAndroid Build Coastguard Worker #include <arm_neon.h>
24*49fe348cSAndroid Build Coastguard Worker #endif /* TEST_NEON */
25*49fe348cSAndroid Build Coastguard Worker 
26*49fe348cSAndroid Build Coastguard Worker 
27*49fe348cSAndroid Build Coastguard Worker /**
28*49fe348cSAndroid Build Coastguard Worker  * FFT 5 Points
29*49fe348cSAndroid Build Coastguard Worker  * The number of interleaved transform `n` assumed to be even
30*49fe348cSAndroid Build Coastguard Worker  */
31*49fe348cSAndroid Build Coastguard Worker #ifndef fft_5
32*49fe348cSAndroid Build Coastguard Worker 
neon_fft_5(const struct lc3_complex * x,struct lc3_complex * y,int n)33*49fe348cSAndroid Build Coastguard Worker LC3_HOT static inline void neon_fft_5(
34*49fe348cSAndroid Build Coastguard Worker     const struct lc3_complex *x, struct lc3_complex *y, int n)
35*49fe348cSAndroid Build Coastguard Worker {
36*49fe348cSAndroid Build Coastguard Worker     static const union { float f[2]; uint64_t u64; }
37*49fe348cSAndroid Build Coastguard Worker         __cos1 = { {  0.3090169944,  0.3090169944 } },
38*49fe348cSAndroid Build Coastguard Worker         __cos2 = { { -0.8090169944, -0.8090169944 } },
39*49fe348cSAndroid Build Coastguard Worker         __sin1 = { {  0.9510565163, -0.9510565163 } },
40*49fe348cSAndroid Build Coastguard Worker         __sin2 = { {  0.5877852523, -0.5877852523 } };
41*49fe348cSAndroid Build Coastguard Worker 
42*49fe348cSAndroid Build Coastguard Worker     float32x2_t sin1 = vcreate_f32(__sin1.u64);
43*49fe348cSAndroid Build Coastguard Worker     float32x2_t sin2 = vcreate_f32(__sin2.u64);
44*49fe348cSAndroid Build Coastguard Worker     float32x2_t cos1 = vcreate_f32(__cos1.u64);
45*49fe348cSAndroid Build Coastguard Worker     float32x2_t cos2 = vcreate_f32(__cos2.u64);
46*49fe348cSAndroid Build Coastguard Worker 
47*49fe348cSAndroid Build Coastguard Worker     float32x4_t sin1q = vcombine_f32(sin1, sin1);
48*49fe348cSAndroid Build Coastguard Worker     float32x4_t sin2q = vcombine_f32(sin2, sin2);
49*49fe348cSAndroid Build Coastguard Worker     float32x4_t cos1q = vcombine_f32(cos1, cos1);
50*49fe348cSAndroid Build Coastguard Worker     float32x4_t cos2q = vcombine_f32(cos2, cos2);
51*49fe348cSAndroid Build Coastguard Worker 
52*49fe348cSAndroid Build Coastguard Worker     for (int i = 0; i < n; i += 2, x += 2, y += 10) {
53*49fe348cSAndroid Build Coastguard Worker 
54*49fe348cSAndroid Build Coastguard Worker         float32x4_t y0, y1, y2, y3, y4;
55*49fe348cSAndroid Build Coastguard Worker 
56*49fe348cSAndroid Build Coastguard Worker         float32x4_t x0 = vld1q_f32( (float *)(x + 0*n) );
57*49fe348cSAndroid Build Coastguard Worker         float32x4_t x1 = vld1q_f32( (float *)(x + 1*n) );
58*49fe348cSAndroid Build Coastguard Worker         float32x4_t x2 = vld1q_f32( (float *)(x + 2*n) );
59*49fe348cSAndroid Build Coastguard Worker         float32x4_t x3 = vld1q_f32( (float *)(x + 3*n) );
60*49fe348cSAndroid Build Coastguard Worker         float32x4_t x4 = vld1q_f32( (float *)(x + 4*n) );
61*49fe348cSAndroid Build Coastguard Worker 
62*49fe348cSAndroid Build Coastguard Worker         float32x4_t s14 = vaddq_f32(x1, x4);
63*49fe348cSAndroid Build Coastguard Worker         float32x4_t s23 = vaddq_f32(x2, x3);
64*49fe348cSAndroid Build Coastguard Worker 
65*49fe348cSAndroid Build Coastguard Worker         float32x4_t d14 = vrev64q_f32( vsubq_f32(x1, x4) );
66*49fe348cSAndroid Build Coastguard Worker         float32x4_t d23 = vrev64q_f32( vsubq_f32(x2, x3) );
67*49fe348cSAndroid Build Coastguard Worker 
68*49fe348cSAndroid Build Coastguard Worker         y0 = vaddq_f32( x0, vaddq_f32(s14, s23) );
69*49fe348cSAndroid Build Coastguard Worker 
70*49fe348cSAndroid Build Coastguard Worker         y4 = vfmaq_f32( x0, s14, cos1q );
71*49fe348cSAndroid Build Coastguard Worker         y4 = vfmaq_f32( y4, s23, cos2q );
72*49fe348cSAndroid Build Coastguard Worker 
73*49fe348cSAndroid Build Coastguard Worker         y1 = vfmaq_f32( y4, d14, sin1q );
74*49fe348cSAndroid Build Coastguard Worker         y1 = vfmaq_f32( y1, d23, sin2q );
75*49fe348cSAndroid Build Coastguard Worker 
76*49fe348cSAndroid Build Coastguard Worker         y4 = vfmsq_f32( y4, d14, sin1q );
77*49fe348cSAndroid Build Coastguard Worker         y4 = vfmsq_f32( y4, d23, sin2q );
78*49fe348cSAndroid Build Coastguard Worker 
79*49fe348cSAndroid Build Coastguard Worker         y3 = vfmaq_f32( x0, s14, cos2q );
80*49fe348cSAndroid Build Coastguard Worker         y3 = vfmaq_f32( y3, s23, cos1q );
81*49fe348cSAndroid Build Coastguard Worker 
82*49fe348cSAndroid Build Coastguard Worker         y2 = vfmaq_f32( y3, d14, sin2q );
83*49fe348cSAndroid Build Coastguard Worker         y2 = vfmsq_f32( y2, d23, sin1q );
84*49fe348cSAndroid Build Coastguard Worker 
85*49fe348cSAndroid Build Coastguard Worker         y3 = vfmsq_f32( y3, d14, sin2q );
86*49fe348cSAndroid Build Coastguard Worker         y3 = vfmaq_f32( y3, d23, sin1q );
87*49fe348cSAndroid Build Coastguard Worker 
88*49fe348cSAndroid Build Coastguard Worker         vst1_f32( (float *)(y + 0), vget_low_f32(y0) );
89*49fe348cSAndroid Build Coastguard Worker         vst1_f32( (float *)(y + 1), vget_low_f32(y1) );
90*49fe348cSAndroid Build Coastguard Worker         vst1_f32( (float *)(y + 2), vget_low_f32(y2) );
91*49fe348cSAndroid Build Coastguard Worker         vst1_f32( (float *)(y + 3), vget_low_f32(y3) );
92*49fe348cSAndroid Build Coastguard Worker         vst1_f32( (float *)(y + 4), vget_low_f32(y4) );
93*49fe348cSAndroid Build Coastguard Worker 
94*49fe348cSAndroid Build Coastguard Worker         vst1_f32( (float *)(y + 5), vget_high_f32(y0) );
95*49fe348cSAndroid Build Coastguard Worker         vst1_f32( (float *)(y + 6), vget_high_f32(y1) );
96*49fe348cSAndroid Build Coastguard Worker         vst1_f32( (float *)(y + 7), vget_high_f32(y2) );
97*49fe348cSAndroid Build Coastguard Worker         vst1_f32( (float *)(y + 8), vget_high_f32(y3) );
98*49fe348cSAndroid Build Coastguard Worker         vst1_f32( (float *)(y + 9), vget_high_f32(y4) );
99*49fe348cSAndroid Build Coastguard Worker     }
100*49fe348cSAndroid Build Coastguard Worker }
101*49fe348cSAndroid Build Coastguard Worker 
102*49fe348cSAndroid Build Coastguard Worker #ifndef TEST_NEON
103*49fe348cSAndroid Build Coastguard Worker #define fft_5 neon_fft_5
104*49fe348cSAndroid Build Coastguard Worker #endif
105*49fe348cSAndroid Build Coastguard Worker 
106*49fe348cSAndroid Build Coastguard Worker #endif /* fft_5 */
107*49fe348cSAndroid Build Coastguard Worker 
108*49fe348cSAndroid Build Coastguard Worker /**
109*49fe348cSAndroid Build Coastguard Worker  * FFT Butterfly 3 Points
110*49fe348cSAndroid Build Coastguard Worker  */
111*49fe348cSAndroid Build Coastguard Worker #ifndef fft_bf3
112*49fe348cSAndroid Build Coastguard Worker 
neon_fft_bf3(const struct lc3_fft_bf3_twiddles * twiddles,const struct lc3_complex * x,struct lc3_complex * y,int n)113*49fe348cSAndroid Build Coastguard Worker LC3_HOT static inline void neon_fft_bf3(
114*49fe348cSAndroid Build Coastguard Worker     const struct lc3_fft_bf3_twiddles *twiddles,
115*49fe348cSAndroid Build Coastguard Worker     const struct lc3_complex *x, struct lc3_complex *y, int n)
116*49fe348cSAndroid Build Coastguard Worker {
117*49fe348cSAndroid Build Coastguard Worker     int n3 = twiddles->n3;
118*49fe348cSAndroid Build Coastguard Worker     const struct lc3_complex (*w0_ptr)[2] = twiddles->t;
119*49fe348cSAndroid Build Coastguard Worker     const struct lc3_complex (*w1_ptr)[2] = w0_ptr + n3;
120*49fe348cSAndroid Build Coastguard Worker     const struct lc3_complex (*w2_ptr)[2] = w1_ptr + n3;
121*49fe348cSAndroid Build Coastguard Worker 
122*49fe348cSAndroid Build Coastguard Worker     const struct lc3_complex *x0_ptr = x;
123*49fe348cSAndroid Build Coastguard Worker     const struct lc3_complex *x1_ptr = x0_ptr + n*n3;
124*49fe348cSAndroid Build Coastguard Worker     const struct lc3_complex *x2_ptr = x1_ptr + n*n3;
125*49fe348cSAndroid Build Coastguard Worker 
126*49fe348cSAndroid Build Coastguard Worker     struct lc3_complex *y0_ptr = y;
127*49fe348cSAndroid Build Coastguard Worker     struct lc3_complex *y1_ptr = y0_ptr + n3;
128*49fe348cSAndroid Build Coastguard Worker     struct lc3_complex *y2_ptr = y1_ptr + n3;
129*49fe348cSAndroid Build Coastguard Worker 
130*49fe348cSAndroid Build Coastguard Worker     for (int j, i = 0; i < n; i++,
131*49fe348cSAndroid Build Coastguard Worker             y0_ptr += 3*n3, y1_ptr += 3*n3, y2_ptr += 3*n3) {
132*49fe348cSAndroid Build Coastguard Worker 
133*49fe348cSAndroid Build Coastguard Worker         /* --- Process by pair --- */
134*49fe348cSAndroid Build Coastguard Worker 
135*49fe348cSAndroid Build Coastguard Worker         for (j = 0; j < (n3 >> 1); j++,
136*49fe348cSAndroid Build Coastguard Worker                 x0_ptr += 2, x1_ptr += 2, x2_ptr += 2) {
137*49fe348cSAndroid Build Coastguard Worker 
138*49fe348cSAndroid Build Coastguard Worker             float32x4_t x0 = vld1q_f32( (float *)x0_ptr );
139*49fe348cSAndroid Build Coastguard Worker             float32x4_t x1 = vld1q_f32( (float *)x1_ptr );
140*49fe348cSAndroid Build Coastguard Worker             float32x4_t x2 = vld1q_f32( (float *)x2_ptr );
141*49fe348cSAndroid Build Coastguard Worker 
142*49fe348cSAndroid Build Coastguard Worker             float32x4_t x1r = vtrn1q_f32( vrev64q_f32(vnegq_f32(x1)), x1 );
143*49fe348cSAndroid Build Coastguard Worker             float32x4_t x2r = vtrn1q_f32( vrev64q_f32(vnegq_f32(x2)), x2 );
144*49fe348cSAndroid Build Coastguard Worker 
145*49fe348cSAndroid Build Coastguard Worker             float32x4x2_t wn;
146*49fe348cSAndroid Build Coastguard Worker             float32x4_t yn;
147*49fe348cSAndroid Build Coastguard Worker 
148*49fe348cSAndroid Build Coastguard Worker             wn = vld2q_f32( (float *)(w0_ptr + 2*j) );
149*49fe348cSAndroid Build Coastguard Worker 
150*49fe348cSAndroid Build Coastguard Worker             yn = vfmaq_f32( x0, x1 , vtrn1q_f32(wn.val[0], wn.val[0]) );
151*49fe348cSAndroid Build Coastguard Worker             yn = vfmaq_f32( yn, x1r, vtrn1q_f32(wn.val[1], wn.val[1]) );
152*49fe348cSAndroid Build Coastguard Worker             yn = vfmaq_f32( yn, x2 , vtrn2q_f32(wn.val[0], wn.val[0]) );
153*49fe348cSAndroid Build Coastguard Worker             yn = vfmaq_f32( yn, x2r, vtrn2q_f32(wn.val[1], wn.val[1]) );
154*49fe348cSAndroid Build Coastguard Worker             vst1q_f32( (float *)(y0_ptr + 2*j), yn );
155*49fe348cSAndroid Build Coastguard Worker 
156*49fe348cSAndroid Build Coastguard Worker             wn = vld2q_f32( (float *)(w1_ptr + 2*j) );
157*49fe348cSAndroid Build Coastguard Worker 
158*49fe348cSAndroid Build Coastguard Worker             yn = vfmaq_f32( x0, x1 , vtrn1q_f32(wn.val[0], wn.val[0]) );
159*49fe348cSAndroid Build Coastguard Worker             yn = vfmaq_f32( yn, x1r, vtrn1q_f32(wn.val[1], wn.val[1]) );
160*49fe348cSAndroid Build Coastguard Worker             yn = vfmaq_f32( yn, x2 , vtrn2q_f32(wn.val[0], wn.val[0]) );
161*49fe348cSAndroid Build Coastguard Worker             yn = vfmaq_f32( yn, x2r, vtrn2q_f32(wn.val[1], wn.val[1]) );
162*49fe348cSAndroid Build Coastguard Worker             vst1q_f32( (float *)(y1_ptr + 2*j), yn );
163*49fe348cSAndroid Build Coastguard Worker 
164*49fe348cSAndroid Build Coastguard Worker             wn = vld2q_f32( (float *)(w2_ptr + 2*j) );
165*49fe348cSAndroid Build Coastguard Worker 
166*49fe348cSAndroid Build Coastguard Worker             yn = vfmaq_f32( x0, x1 , vtrn1q_f32(wn.val[0], wn.val[0]) );
167*49fe348cSAndroid Build Coastguard Worker             yn = vfmaq_f32( yn, x1r, vtrn1q_f32(wn.val[1], wn.val[1]) );
168*49fe348cSAndroid Build Coastguard Worker             yn = vfmaq_f32( yn, x2 , vtrn2q_f32(wn.val[0], wn.val[0]) );
169*49fe348cSAndroid Build Coastguard Worker             yn = vfmaq_f32( yn, x2r, vtrn2q_f32(wn.val[1], wn.val[1]) );
170*49fe348cSAndroid Build Coastguard Worker             vst1q_f32( (float *)(y2_ptr + 2*j), yn );
171*49fe348cSAndroid Build Coastguard Worker 
172*49fe348cSAndroid Build Coastguard Worker         }
173*49fe348cSAndroid Build Coastguard Worker 
174*49fe348cSAndroid Build Coastguard Worker         /* --- Last iteration --- */
175*49fe348cSAndroid Build Coastguard Worker 
176*49fe348cSAndroid Build Coastguard Worker         if (n3 & 1) {
177*49fe348cSAndroid Build Coastguard Worker 
178*49fe348cSAndroid Build Coastguard Worker             float32x2x2_t wn;
179*49fe348cSAndroid Build Coastguard Worker             float32x2_t yn;
180*49fe348cSAndroid Build Coastguard Worker 
181*49fe348cSAndroid Build Coastguard Worker             float32x2_t x0 = vld1_f32( (float *)(x0_ptr++) );
182*49fe348cSAndroid Build Coastguard Worker             float32x2_t x1 = vld1_f32( (float *)(x1_ptr++) );
183*49fe348cSAndroid Build Coastguard Worker             float32x2_t x2 = vld1_f32( (float *)(x2_ptr++) );
184*49fe348cSAndroid Build Coastguard Worker 
185*49fe348cSAndroid Build Coastguard Worker             float32x2_t x1r = vtrn1_f32( vrev64_f32(vneg_f32(x1)), x1 );
186*49fe348cSAndroid Build Coastguard Worker             float32x2_t x2r = vtrn1_f32( vrev64_f32(vneg_f32(x2)), x2 );
187*49fe348cSAndroid Build Coastguard Worker 
188*49fe348cSAndroid Build Coastguard Worker             wn = vld2_f32( (float *)(w0_ptr + 2*j) );
189*49fe348cSAndroid Build Coastguard Worker 
190*49fe348cSAndroid Build Coastguard Worker             yn = vfma_f32( x0, x1 , vtrn1_f32(wn.val[0], wn.val[0]) );
191*49fe348cSAndroid Build Coastguard Worker             yn = vfma_f32( yn, x1r, vtrn1_f32(wn.val[1], wn.val[1]) );
192*49fe348cSAndroid Build Coastguard Worker             yn = vfma_f32( yn, x2 , vtrn2_f32(wn.val[0], wn.val[0]) );
193*49fe348cSAndroid Build Coastguard Worker             yn = vfma_f32( yn, x2r, vtrn2_f32(wn.val[1], wn.val[1]) );
194*49fe348cSAndroid Build Coastguard Worker             vst1_f32( (float *)(y0_ptr + 2*j), yn );
195*49fe348cSAndroid Build Coastguard Worker 
196*49fe348cSAndroid Build Coastguard Worker             wn = vld2_f32( (float *)(w1_ptr + 2*j) );
197*49fe348cSAndroid Build Coastguard Worker 
198*49fe348cSAndroid Build Coastguard Worker             yn = vfma_f32( x0, x1 , vtrn1_f32(wn.val[0], wn.val[0]) );
199*49fe348cSAndroid Build Coastguard Worker             yn = vfma_f32( yn, x1r, vtrn1_f32(wn.val[1], wn.val[1]) );
200*49fe348cSAndroid Build Coastguard Worker             yn = vfma_f32( yn, x2 , vtrn2_f32(wn.val[0], wn.val[0]) );
201*49fe348cSAndroid Build Coastguard Worker             yn = vfma_f32( yn, x2r, vtrn2_f32(wn.val[1], wn.val[1]) );
202*49fe348cSAndroid Build Coastguard Worker             vst1_f32( (float *)(y1_ptr + 2*j), yn );
203*49fe348cSAndroid Build Coastguard Worker 
204*49fe348cSAndroid Build Coastguard Worker             wn = vld2_f32( (float *)(w2_ptr + 2*j) );
205*49fe348cSAndroid Build Coastguard Worker 
206*49fe348cSAndroid Build Coastguard Worker             yn = vfma_f32( x0, x1 , vtrn1_f32(wn.val[0], wn.val[0]) );
207*49fe348cSAndroid Build Coastguard Worker             yn = vfma_f32( yn, x1r, vtrn1_f32(wn.val[1], wn.val[1]) );
208*49fe348cSAndroid Build Coastguard Worker             yn = vfma_f32( yn, x2 , vtrn2_f32(wn.val[0], wn.val[0]) );
209*49fe348cSAndroid Build Coastguard Worker             yn = vfma_f32( yn, x2r, vtrn2_f32(wn.val[1], wn.val[1]) );
210*49fe348cSAndroid Build Coastguard Worker             vst1_f32( (float *)(y2_ptr + 2*j), yn );
211*49fe348cSAndroid Build Coastguard Worker         }
212*49fe348cSAndroid Build Coastguard Worker 
213*49fe348cSAndroid Build Coastguard Worker     }
214*49fe348cSAndroid Build Coastguard Worker }
215*49fe348cSAndroid Build Coastguard Worker 
216*49fe348cSAndroid Build Coastguard Worker #ifndef TEST_NEON
217*49fe348cSAndroid Build Coastguard Worker #define fft_bf3 neon_fft_bf3
218*49fe348cSAndroid Build Coastguard Worker #endif
219*49fe348cSAndroid Build Coastguard Worker 
220*49fe348cSAndroid Build Coastguard Worker #endif /* fft_bf3 */
221*49fe348cSAndroid Build Coastguard Worker 
222*49fe348cSAndroid Build Coastguard Worker /**
223*49fe348cSAndroid Build Coastguard Worker  * FFT Butterfly 2 Points
224*49fe348cSAndroid Build Coastguard Worker  */
225*49fe348cSAndroid Build Coastguard Worker #ifndef fft_bf2
226*49fe348cSAndroid Build Coastguard Worker 
neon_fft_bf2(const struct lc3_fft_bf2_twiddles * twiddles,const struct lc3_complex * x,struct lc3_complex * y,int n)227*49fe348cSAndroid Build Coastguard Worker LC3_HOT static inline void neon_fft_bf2(
228*49fe348cSAndroid Build Coastguard Worker     const struct lc3_fft_bf2_twiddles *twiddles,
229*49fe348cSAndroid Build Coastguard Worker     const struct lc3_complex *x, struct lc3_complex *y, int n)
230*49fe348cSAndroid Build Coastguard Worker {
231*49fe348cSAndroid Build Coastguard Worker     int n2 = twiddles->n2;
232*49fe348cSAndroid Build Coastguard Worker     const struct lc3_complex *w_ptr = twiddles->t;
233*49fe348cSAndroid Build Coastguard Worker 
234*49fe348cSAndroid Build Coastguard Worker     const struct lc3_complex *x0_ptr = x;
235*49fe348cSAndroid Build Coastguard Worker     const struct lc3_complex *x1_ptr = x0_ptr + n*n2;
236*49fe348cSAndroid Build Coastguard Worker 
237*49fe348cSAndroid Build Coastguard Worker     struct lc3_complex *y0_ptr = y;
238*49fe348cSAndroid Build Coastguard Worker     struct lc3_complex *y1_ptr = y0_ptr + n2;
239*49fe348cSAndroid Build Coastguard Worker 
240*49fe348cSAndroid Build Coastguard Worker     for (int j, i = 0; i < n; i++, y0_ptr += 2*n2, y1_ptr += 2*n2) {
241*49fe348cSAndroid Build Coastguard Worker 
242*49fe348cSAndroid Build Coastguard Worker         /* --- Process by pair --- */
243*49fe348cSAndroid Build Coastguard Worker 
244*49fe348cSAndroid Build Coastguard Worker         for (j = 0; j < (n2 >> 1); j++, x0_ptr += 2, x1_ptr += 2) {
245*49fe348cSAndroid Build Coastguard Worker 
246*49fe348cSAndroid Build Coastguard Worker             float32x4_t x0 = vld1q_f32( (float *)x0_ptr );
247*49fe348cSAndroid Build Coastguard Worker             float32x4_t x1 = vld1q_f32( (float *)x1_ptr );
248*49fe348cSAndroid Build Coastguard Worker             float32x4_t y0, y1;
249*49fe348cSAndroid Build Coastguard Worker 
250*49fe348cSAndroid Build Coastguard Worker             float32x4_t x1r = vtrn1q_f32( vrev64q_f32(vnegq_f32(x1)), x1 );
251*49fe348cSAndroid Build Coastguard Worker 
252*49fe348cSAndroid Build Coastguard Worker             float32x4_t w = vld1q_f32( (float *)(w_ptr + 2*j) );
253*49fe348cSAndroid Build Coastguard Worker             float32x4_t w_re = vtrn1q_f32(w, w);
254*49fe348cSAndroid Build Coastguard Worker             float32x4_t w_im = vtrn2q_f32(w, w);
255*49fe348cSAndroid Build Coastguard Worker 
256*49fe348cSAndroid Build Coastguard Worker             y0 = vfmaq_f32( x0, x1 , w_re );
257*49fe348cSAndroid Build Coastguard Worker             y0 = vfmaq_f32( y0, x1r, w_im );
258*49fe348cSAndroid Build Coastguard Worker             vst1q_f32( (float *)(y0_ptr + 2*j), y0 );
259*49fe348cSAndroid Build Coastguard Worker 
260*49fe348cSAndroid Build Coastguard Worker             y1 = vfmsq_f32( x0, x1 , w_re );
261*49fe348cSAndroid Build Coastguard Worker             y1 = vfmsq_f32( y1, x1r, w_im );
262*49fe348cSAndroid Build Coastguard Worker             vst1q_f32( (float *)(y1_ptr + 2*j), y1 );
263*49fe348cSAndroid Build Coastguard Worker         }
264*49fe348cSAndroid Build Coastguard Worker 
265*49fe348cSAndroid Build Coastguard Worker         /* --- Last iteration --- */
266*49fe348cSAndroid Build Coastguard Worker 
267*49fe348cSAndroid Build Coastguard Worker         if (n2 & 1) {
268*49fe348cSAndroid Build Coastguard Worker 
269*49fe348cSAndroid Build Coastguard Worker             float32x2_t x0 = vld1_f32( (float *)(x0_ptr++) );
270*49fe348cSAndroid Build Coastguard Worker             float32x2_t x1 = vld1_f32( (float *)(x1_ptr++) );
271*49fe348cSAndroid Build Coastguard Worker             float32x2_t y0, y1;
272*49fe348cSAndroid Build Coastguard Worker 
273*49fe348cSAndroid Build Coastguard Worker             float32x2_t x1r = vtrn1_f32( vrev64_f32(vneg_f32(x1)), x1 );
274*49fe348cSAndroid Build Coastguard Worker 
275*49fe348cSAndroid Build Coastguard Worker             float32x2_t w = vld1_f32( (float *)(w_ptr + 2*j) );
276*49fe348cSAndroid Build Coastguard Worker             float32x2_t w_re = vtrn1_f32(w, w);
277*49fe348cSAndroid Build Coastguard Worker             float32x2_t w_im = vtrn2_f32(w, w);
278*49fe348cSAndroid Build Coastguard Worker 
279*49fe348cSAndroid Build Coastguard Worker             y0 = vfma_f32( x0, x1 , w_re );
280*49fe348cSAndroid Build Coastguard Worker             y0 = vfma_f32( y0, x1r, w_im );
281*49fe348cSAndroid Build Coastguard Worker             vst1_f32( (float *)(y0_ptr + 2*j), y0 );
282*49fe348cSAndroid Build Coastguard Worker 
283*49fe348cSAndroid Build Coastguard Worker             y1 = vfms_f32( x0, x1 , w_re );
284*49fe348cSAndroid Build Coastguard Worker             y1 = vfms_f32( y1, x1r, w_im );
285*49fe348cSAndroid Build Coastguard Worker             vst1_f32( (float *)(y1_ptr + 2*j), y1 );
286*49fe348cSAndroid Build Coastguard Worker         }
287*49fe348cSAndroid Build Coastguard Worker     }
288*49fe348cSAndroid Build Coastguard Worker }
289*49fe348cSAndroid Build Coastguard Worker 
290*49fe348cSAndroid Build Coastguard Worker #ifndef TEST_NEON
291*49fe348cSAndroid Build Coastguard Worker #define fft_bf2 neon_fft_bf2
292*49fe348cSAndroid Build Coastguard Worker #endif
293*49fe348cSAndroid Build Coastguard Worker 
294*49fe348cSAndroid Build Coastguard Worker #endif /* fft_bf2 */
295*49fe348cSAndroid Build Coastguard Worker 
296*49fe348cSAndroid Build Coastguard Worker #endif /* __ARM_NEON && __ARM_ARCH_ISA_A64 */
297