xref: /btstack/3rd-party/lc3-google/src/mdct_neon.h (revision 4930cef6e21e6da2d7571b9259c7f0fb8bed3d01)
1*4930cef6SMatthias Ringwald /******************************************************************************
2*4930cef6SMatthias Ringwald  *
3*4930cef6SMatthias Ringwald  *  Copyright 2022 Google LLC
4*4930cef6SMatthias Ringwald  *
5*4930cef6SMatthias Ringwald  *  Licensed under the Apache License, Version 2.0 (the "License");
6*4930cef6SMatthias Ringwald  *  you may not use this file except in compliance with the License.
7*4930cef6SMatthias Ringwald  *  You may obtain a copy of the License at:
8*4930cef6SMatthias Ringwald  *
9*4930cef6SMatthias Ringwald  *  http://www.apache.org/licenses/LICENSE-2.0
10*4930cef6SMatthias Ringwald  *
11*4930cef6SMatthias Ringwald  *  Unless required by applicable law or agreed to in writing, software
12*4930cef6SMatthias Ringwald  *  distributed under the License is distributed on an "AS IS" BASIS,
13*4930cef6SMatthias Ringwald  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*4930cef6SMatthias Ringwald  *  See the License for the specific language governing permissions and
15*4930cef6SMatthias Ringwald  *  limitations under the License.
16*4930cef6SMatthias Ringwald  *
17*4930cef6SMatthias Ringwald  ******************************************************************************/
18*4930cef6SMatthias Ringwald 
19*4930cef6SMatthias Ringwald #if __ARM_NEON && __ARM_ARCH_ISA_A64
20*4930cef6SMatthias Ringwald 
21*4930cef6SMatthias Ringwald #ifndef TEST_NEON
22*4930cef6SMatthias Ringwald #include <arm_neon.h>
23*4930cef6SMatthias Ringwald #endif /* TEST_NEON */
24*4930cef6SMatthias Ringwald 
25*4930cef6SMatthias Ringwald 
26*4930cef6SMatthias Ringwald /**
27*4930cef6SMatthias Ringwald  * FFT 5 Points
28*4930cef6SMatthias Ringwald  * The number of interleaved transform `n` assumed to be even
29*4930cef6SMatthias Ringwald  */
30*4930cef6SMatthias Ringwald #ifndef fft_5
31*4930cef6SMatthias Ringwald #define fft_5 neon_fft_5
32*4930cef6SMatthias Ringwald LC3_HOT static inline void neon_fft_5(
33*4930cef6SMatthias Ringwald     const struct lc3_complex *x, struct lc3_complex *y, int n)
34*4930cef6SMatthias Ringwald {
35*4930cef6SMatthias Ringwald     static const union { float f[2]; uint64_t u64; }
36*4930cef6SMatthias Ringwald         __cos1 = { {  0.3090169944,  0.3090169944 } },
37*4930cef6SMatthias Ringwald         __cos2 = { { -0.8090169944, -0.8090169944 } },
38*4930cef6SMatthias Ringwald         __sin1 = { {  0.9510565163, -0.9510565163 } },
39*4930cef6SMatthias Ringwald         __sin2 = { {  0.5877852523, -0.5877852523 } };
40*4930cef6SMatthias Ringwald 
41*4930cef6SMatthias Ringwald     float32x2_t sin1 = vcreate_f32(__sin1.u64);
42*4930cef6SMatthias Ringwald     float32x2_t sin2 = vcreate_f32(__sin2.u64);
43*4930cef6SMatthias Ringwald     float32x2_t cos1 = vcreate_f32(__cos1.u64);
44*4930cef6SMatthias Ringwald     float32x2_t cos2 = vcreate_f32(__cos2.u64);
45*4930cef6SMatthias Ringwald 
46*4930cef6SMatthias Ringwald     float32x4_t sin1q = vcombine_f32(sin1, sin1);
47*4930cef6SMatthias Ringwald     float32x4_t sin2q = vcombine_f32(sin2, sin2);
48*4930cef6SMatthias Ringwald     float32x4_t cos1q = vcombine_f32(cos1, cos1);
49*4930cef6SMatthias Ringwald     float32x4_t cos2q = vcombine_f32(cos2, cos2);
50*4930cef6SMatthias Ringwald 
51*4930cef6SMatthias Ringwald     for (int i = 0; i < n; i += 2, x += 2, y += 10) {
52*4930cef6SMatthias Ringwald 
53*4930cef6SMatthias Ringwald         float32x4_t y0, y1, y2, y3, y4;
54*4930cef6SMatthias Ringwald 
55*4930cef6SMatthias Ringwald         float32x4_t x0 = vld1q_f32( (float *)(x + 0*n) );
56*4930cef6SMatthias Ringwald         float32x4_t x1 = vld1q_f32( (float *)(x + 1*n) );
57*4930cef6SMatthias Ringwald         float32x4_t x2 = vld1q_f32( (float *)(x + 2*n) );
58*4930cef6SMatthias Ringwald         float32x4_t x3 = vld1q_f32( (float *)(x + 3*n) );
59*4930cef6SMatthias Ringwald         float32x4_t x4 = vld1q_f32( (float *)(x + 4*n) );
60*4930cef6SMatthias Ringwald 
61*4930cef6SMatthias Ringwald         float32x4_t s14 = vaddq_f32(x1, x4);
62*4930cef6SMatthias Ringwald         float32x4_t s23 = vaddq_f32(x2, x3);
63*4930cef6SMatthias Ringwald 
64*4930cef6SMatthias Ringwald         float32x4_t d14 = vrev64q_f32( vsubq_f32(x1, x4) );
65*4930cef6SMatthias Ringwald         float32x4_t d23 = vrev64q_f32( vsubq_f32(x2, x3) );
66*4930cef6SMatthias Ringwald 
67*4930cef6SMatthias Ringwald         y0 = vaddq_f32( x0, vaddq_f32(s14, s23) );
68*4930cef6SMatthias Ringwald 
69*4930cef6SMatthias Ringwald         y4 = vfmaq_f32( x0, s14, cos1q );
70*4930cef6SMatthias Ringwald         y4 = vfmaq_f32( y4, s23, cos2q );
71*4930cef6SMatthias Ringwald 
72*4930cef6SMatthias Ringwald         y1 = vfmaq_f32( y4, d14, sin1q );
73*4930cef6SMatthias Ringwald         y1 = vfmaq_f32( y1, d23, sin2q );
74*4930cef6SMatthias Ringwald 
75*4930cef6SMatthias Ringwald         y4 = vfmsq_f32( y4, d14, sin1q );
76*4930cef6SMatthias Ringwald         y4 = vfmsq_f32( y4, d23, sin2q );
77*4930cef6SMatthias Ringwald 
78*4930cef6SMatthias Ringwald         y3 = vfmaq_f32( x0, s14, cos2q );
79*4930cef6SMatthias Ringwald         y3 = vfmaq_f32( y3, s23, cos1q );
80*4930cef6SMatthias Ringwald 
81*4930cef6SMatthias Ringwald         y2 = vfmaq_f32( y3, d14, sin2q );
82*4930cef6SMatthias Ringwald         y2 = vfmsq_f32( y2, d23, sin1q );
83*4930cef6SMatthias Ringwald 
84*4930cef6SMatthias Ringwald         y3 = vfmsq_f32( y3, d14, sin2q );
85*4930cef6SMatthias Ringwald         y3 = vfmaq_f32( y3, d23, sin1q );
86*4930cef6SMatthias Ringwald 
87*4930cef6SMatthias Ringwald         vst1_f32( (float *)(y + 0), vget_low_f32(y0) );
88*4930cef6SMatthias Ringwald         vst1_f32( (float *)(y + 1), vget_low_f32(y1) );
89*4930cef6SMatthias Ringwald         vst1_f32( (float *)(y + 2), vget_low_f32(y2) );
90*4930cef6SMatthias Ringwald         vst1_f32( (float *)(y + 3), vget_low_f32(y3) );
91*4930cef6SMatthias Ringwald         vst1_f32( (float *)(y + 4), vget_low_f32(y4) );
92*4930cef6SMatthias Ringwald 
93*4930cef6SMatthias Ringwald         vst1_f32( (float *)(y + 5), vget_high_f32(y0) );
94*4930cef6SMatthias Ringwald         vst1_f32( (float *)(y + 6), vget_high_f32(y1) );
95*4930cef6SMatthias Ringwald         vst1_f32( (float *)(y + 7), vget_high_f32(y2) );
96*4930cef6SMatthias Ringwald         vst1_f32( (float *)(y + 8), vget_high_f32(y3) );
97*4930cef6SMatthias Ringwald         vst1_f32( (float *)(y + 9), vget_high_f32(y4) );
98*4930cef6SMatthias Ringwald     }
99*4930cef6SMatthias Ringwald }
100*4930cef6SMatthias Ringwald #endif /* fft_5 */
101*4930cef6SMatthias Ringwald 
102*4930cef6SMatthias Ringwald /**
103*4930cef6SMatthias Ringwald  * FFT Butterfly 3 Points
104*4930cef6SMatthias Ringwald  */
105*4930cef6SMatthias Ringwald #ifndef fft_bf3
106*4930cef6SMatthias Ringwald #define fft_bf3 neon_fft_bf3
107*4930cef6SMatthias Ringwald LC3_HOT static inline void neon_fft_bf3(
108*4930cef6SMatthias Ringwald     const struct lc3_fft_bf3_twiddles *twiddles,
109*4930cef6SMatthias Ringwald     const struct lc3_complex *x, struct lc3_complex *y, int n)
110*4930cef6SMatthias Ringwald {
111*4930cef6SMatthias Ringwald     int n3 = twiddles->n3;
112*4930cef6SMatthias Ringwald     const struct lc3_complex (*w0_ptr)[2] = twiddles->t;
113*4930cef6SMatthias Ringwald     const struct lc3_complex (*w1_ptr)[2] = w0_ptr + n3;
114*4930cef6SMatthias Ringwald     const struct lc3_complex (*w2_ptr)[2] = w1_ptr + n3;
115*4930cef6SMatthias Ringwald 
116*4930cef6SMatthias Ringwald     const struct lc3_complex *x0_ptr = x;
117*4930cef6SMatthias Ringwald     const struct lc3_complex *x1_ptr = x0_ptr + n*n3;
118*4930cef6SMatthias Ringwald     const struct lc3_complex *x2_ptr = x1_ptr + n*n3;
119*4930cef6SMatthias Ringwald 
120*4930cef6SMatthias Ringwald     struct lc3_complex *y0_ptr = y;
121*4930cef6SMatthias Ringwald     struct lc3_complex *y1_ptr = y0_ptr + n3;
122*4930cef6SMatthias Ringwald     struct lc3_complex *y2_ptr = y1_ptr + n3;
123*4930cef6SMatthias Ringwald 
124*4930cef6SMatthias Ringwald     for (int j, i = 0; i < n; i++,
125*4930cef6SMatthias Ringwald             y0_ptr += 3*n3, y1_ptr += 3*n3, y2_ptr += 3*n3) {
126*4930cef6SMatthias Ringwald 
127*4930cef6SMatthias Ringwald         /* --- Process by pair --- */
128*4930cef6SMatthias Ringwald 
129*4930cef6SMatthias Ringwald         for (j = 0; j < (n3 >> 1); j++,
130*4930cef6SMatthias Ringwald                 x0_ptr += 2, x1_ptr += 2, x2_ptr += 2) {
131*4930cef6SMatthias Ringwald 
132*4930cef6SMatthias Ringwald             float32x4_t x0 = vld1q_f32( (float *)x0_ptr );
133*4930cef6SMatthias Ringwald             float32x4_t x1 = vld1q_f32( (float *)x1_ptr );
134*4930cef6SMatthias Ringwald             float32x4_t x2 = vld1q_f32( (float *)x2_ptr );
135*4930cef6SMatthias Ringwald 
136*4930cef6SMatthias Ringwald             float32x4_t x1r = vtrn1q_f32( vrev64q_f32(vnegq_f32(x1)), x1 );
137*4930cef6SMatthias Ringwald             float32x4_t x2r = vtrn1q_f32( vrev64q_f32(vnegq_f32(x2)), x2 );
138*4930cef6SMatthias Ringwald 
139*4930cef6SMatthias Ringwald             float32x4x2_t wn;
140*4930cef6SMatthias Ringwald             float32x4_t yn;
141*4930cef6SMatthias Ringwald 
142*4930cef6SMatthias Ringwald             wn = vld2q_f32( (float *)(w0_ptr + 2*j) );
143*4930cef6SMatthias Ringwald 
144*4930cef6SMatthias Ringwald             yn = vfmaq_f32( x0, x1 , vtrn1q_f32(wn.val[0], wn.val[0]) );
145*4930cef6SMatthias Ringwald             yn = vfmaq_f32( yn, x1r, vtrn1q_f32(wn.val[1], wn.val[1]) );
146*4930cef6SMatthias Ringwald             yn = vfmaq_f32( yn, x2 , vtrn2q_f32(wn.val[0], wn.val[0]) );
147*4930cef6SMatthias Ringwald             yn = vfmaq_f32( yn, x2r, vtrn2q_f32(wn.val[1], wn.val[1]) );
148*4930cef6SMatthias Ringwald             vst1q_f32( (float *)(y0_ptr + 2*j), yn );
149*4930cef6SMatthias Ringwald 
150*4930cef6SMatthias Ringwald             wn = vld2q_f32( (float *)(w1_ptr + 2*j) );
151*4930cef6SMatthias Ringwald 
152*4930cef6SMatthias Ringwald             yn = vfmaq_f32( x0, x1 , vtrn1q_f32(wn.val[0], wn.val[0]) );
153*4930cef6SMatthias Ringwald             yn = vfmaq_f32( yn, x1r, vtrn1q_f32(wn.val[1], wn.val[1]) );
154*4930cef6SMatthias Ringwald             yn = vfmaq_f32( yn, x2 , vtrn2q_f32(wn.val[0], wn.val[0]) );
155*4930cef6SMatthias Ringwald             yn = vfmaq_f32( yn, x2r, vtrn2q_f32(wn.val[1], wn.val[1]) );
156*4930cef6SMatthias Ringwald             vst1q_f32( (float *)(y1_ptr + 2*j), yn );
157*4930cef6SMatthias Ringwald 
158*4930cef6SMatthias Ringwald             wn = vld2q_f32( (float *)(w2_ptr + 2*j) );
159*4930cef6SMatthias Ringwald 
160*4930cef6SMatthias Ringwald             yn = vfmaq_f32( x0, x1 , vtrn1q_f32(wn.val[0], wn.val[0]) );
161*4930cef6SMatthias Ringwald             yn = vfmaq_f32( yn, x1r, vtrn1q_f32(wn.val[1], wn.val[1]) );
162*4930cef6SMatthias Ringwald             yn = vfmaq_f32( yn, x2 , vtrn2q_f32(wn.val[0], wn.val[0]) );
163*4930cef6SMatthias Ringwald             yn = vfmaq_f32( yn, x2r, vtrn2q_f32(wn.val[1], wn.val[1]) );
164*4930cef6SMatthias Ringwald             vst1q_f32( (float *)(y2_ptr + 2*j), yn );
165*4930cef6SMatthias Ringwald 
166*4930cef6SMatthias Ringwald         }
167*4930cef6SMatthias Ringwald 
168*4930cef6SMatthias Ringwald         /* --- Last iteration --- */
169*4930cef6SMatthias Ringwald 
170*4930cef6SMatthias Ringwald         if (n3 & 1) {
171*4930cef6SMatthias Ringwald 
172*4930cef6SMatthias Ringwald             float32x2x2_t wn;
173*4930cef6SMatthias Ringwald             float32x2_t yn;
174*4930cef6SMatthias Ringwald 
175*4930cef6SMatthias Ringwald             float32x2_t x0 = vld1_f32( (float *)(x0_ptr++) );
176*4930cef6SMatthias Ringwald             float32x2_t x1 = vld1_f32( (float *)(x1_ptr++) );
177*4930cef6SMatthias Ringwald             float32x2_t x2 = vld1_f32( (float *)(x2_ptr++) );
178*4930cef6SMatthias Ringwald 
179*4930cef6SMatthias Ringwald             float32x2_t x1r = vtrn1_f32( vrev64_f32(vneg_f32(x1)), x1 );
180*4930cef6SMatthias Ringwald             float32x2_t x2r = vtrn1_f32( vrev64_f32(vneg_f32(x2)), x2 );
181*4930cef6SMatthias Ringwald 
182*4930cef6SMatthias Ringwald             wn = vld2_f32( (float *)(w0_ptr + 2*j) );
183*4930cef6SMatthias Ringwald 
184*4930cef6SMatthias Ringwald             yn = vfma_f32( x0, x1 , vtrn1_f32(wn.val[0], wn.val[0]) );
185*4930cef6SMatthias Ringwald             yn = vfma_f32( yn, x1r, vtrn1_f32(wn.val[1], wn.val[1]) );
186*4930cef6SMatthias Ringwald             yn = vfma_f32( yn, x2 , vtrn2_f32(wn.val[0], wn.val[0]) );
187*4930cef6SMatthias Ringwald             yn = vfma_f32( yn, x2r, vtrn2_f32(wn.val[1], wn.val[1]) );
188*4930cef6SMatthias Ringwald             vst1_f32( (float *)(y0_ptr + 2*j), yn );
189*4930cef6SMatthias Ringwald 
190*4930cef6SMatthias Ringwald             wn = vld2_f32( (float *)(w1_ptr + 2*j) );
191*4930cef6SMatthias Ringwald 
192*4930cef6SMatthias Ringwald             yn = vfma_f32( x0, x1 , vtrn1_f32(wn.val[0], wn.val[0]) );
193*4930cef6SMatthias Ringwald             yn = vfma_f32( yn, x1r, vtrn1_f32(wn.val[1], wn.val[1]) );
194*4930cef6SMatthias Ringwald             yn = vfma_f32( yn, x2 , vtrn2_f32(wn.val[0], wn.val[0]) );
195*4930cef6SMatthias Ringwald             yn = vfma_f32( yn, x2r, vtrn2_f32(wn.val[1], wn.val[1]) );
196*4930cef6SMatthias Ringwald             vst1_f32( (float *)(y1_ptr + 2*j), yn );
197*4930cef6SMatthias Ringwald 
198*4930cef6SMatthias Ringwald             wn = vld2_f32( (float *)(w2_ptr + 2*j) );
199*4930cef6SMatthias Ringwald 
200*4930cef6SMatthias Ringwald             yn = vfma_f32( x0, x1 , vtrn1_f32(wn.val[0], wn.val[0]) );
201*4930cef6SMatthias Ringwald             yn = vfma_f32( yn, x1r, vtrn1_f32(wn.val[1], wn.val[1]) );
202*4930cef6SMatthias Ringwald             yn = vfma_f32( yn, x2 , vtrn2_f32(wn.val[0], wn.val[0]) );
203*4930cef6SMatthias Ringwald             yn = vfma_f32( yn, x2r, vtrn2_f32(wn.val[1], wn.val[1]) );
204*4930cef6SMatthias Ringwald             vst1_f32( (float *)(y2_ptr + 2*j), yn );
205*4930cef6SMatthias Ringwald         }
206*4930cef6SMatthias Ringwald 
207*4930cef6SMatthias Ringwald     }
208*4930cef6SMatthias Ringwald }
209*4930cef6SMatthias Ringwald #endif /* fft_bf3 */
210*4930cef6SMatthias Ringwald 
211*4930cef6SMatthias Ringwald /**
212*4930cef6SMatthias Ringwald  * FFT Butterfly 2 Points
213*4930cef6SMatthias Ringwald  */
214*4930cef6SMatthias Ringwald #ifndef fft_bf2
215*4930cef6SMatthias Ringwald #define fft_bf2 neon_fft_bf2
216*4930cef6SMatthias Ringwald LC3_HOT static inline void neon_fft_bf2(
217*4930cef6SMatthias Ringwald     const struct lc3_fft_bf2_twiddles *twiddles,
218*4930cef6SMatthias Ringwald     const struct lc3_complex *x, struct lc3_complex *y, int n)
219*4930cef6SMatthias Ringwald {
220*4930cef6SMatthias Ringwald     int n2 = twiddles->n2;
221*4930cef6SMatthias Ringwald     const struct lc3_complex *w_ptr = twiddles->t;
222*4930cef6SMatthias Ringwald 
223*4930cef6SMatthias Ringwald     const struct lc3_complex *x0_ptr = x;
224*4930cef6SMatthias Ringwald     const struct lc3_complex *x1_ptr = x0_ptr + n*n2;
225*4930cef6SMatthias Ringwald 
226*4930cef6SMatthias Ringwald     struct lc3_complex *y0_ptr = y;
227*4930cef6SMatthias Ringwald     struct lc3_complex *y1_ptr = y0_ptr + n2;
228*4930cef6SMatthias Ringwald 
229*4930cef6SMatthias Ringwald     for (int j, i = 0; i < n; i++, y0_ptr += 2*n2, y1_ptr += 2*n2) {
230*4930cef6SMatthias Ringwald 
231*4930cef6SMatthias Ringwald         /* --- Process by pair --- */
232*4930cef6SMatthias Ringwald 
233*4930cef6SMatthias Ringwald         for (j = 0; j < (n2 >> 1); j++, x0_ptr += 2, x1_ptr += 2) {
234*4930cef6SMatthias Ringwald 
235*4930cef6SMatthias Ringwald             float32x4_t x0 = vld1q_f32( (float *)x0_ptr );
236*4930cef6SMatthias Ringwald             float32x4_t x1 = vld1q_f32( (float *)x1_ptr );
237*4930cef6SMatthias Ringwald             float32x4_t y0, y1;
238*4930cef6SMatthias Ringwald 
239*4930cef6SMatthias Ringwald             float32x4_t x1r = vtrn1q_f32( vrev64q_f32(vnegq_f32(x1)), x1 );
240*4930cef6SMatthias Ringwald 
241*4930cef6SMatthias Ringwald             float32x4_t w = vld1q_f32( (float *)(w_ptr + 2*j) );
242*4930cef6SMatthias Ringwald             float32x4_t w_re = vtrn1q_f32(w, w);
243*4930cef6SMatthias Ringwald             float32x4_t w_im = vtrn2q_f32(w, w);
244*4930cef6SMatthias Ringwald 
245*4930cef6SMatthias Ringwald             y0 = vfmaq_f32( x0, x1 , w_re );
246*4930cef6SMatthias Ringwald             y0 = vfmaq_f32( y0, x1r, w_im );
247*4930cef6SMatthias Ringwald             vst1q_f32( (float *)(y0_ptr + 2*j), y0 );
248*4930cef6SMatthias Ringwald 
249*4930cef6SMatthias Ringwald             y1 = vfmsq_f32( x0, x1 , w_re );
250*4930cef6SMatthias Ringwald             y1 = vfmsq_f32( y1, x1r, w_im );
251*4930cef6SMatthias Ringwald             vst1q_f32( (float *)(y1_ptr + 2*j), y1 );
252*4930cef6SMatthias Ringwald         }
253*4930cef6SMatthias Ringwald 
254*4930cef6SMatthias Ringwald         /* --- Last iteration --- */
255*4930cef6SMatthias Ringwald 
256*4930cef6SMatthias Ringwald         if (n2 & 1) {
257*4930cef6SMatthias Ringwald 
258*4930cef6SMatthias Ringwald             float32x2_t x0 = vld1_f32( (float *)(x0_ptr++) );
259*4930cef6SMatthias Ringwald             float32x2_t x1 = vld1_f32( (float *)(x1_ptr++) );
260*4930cef6SMatthias Ringwald             float32x2_t y0, y1;
261*4930cef6SMatthias Ringwald 
262*4930cef6SMatthias Ringwald             float32x2_t x1r = vtrn1_f32( vrev64_f32(vneg_f32(x1)), x1 );
263*4930cef6SMatthias Ringwald 
264*4930cef6SMatthias Ringwald             float32x2_t w = vld1_f32( (float *)(w_ptr + 2*j) );
265*4930cef6SMatthias Ringwald             float32x2_t w_re = vtrn1_f32(w, w);
266*4930cef6SMatthias Ringwald             float32x2_t w_im = vtrn2_f32(w, w);
267*4930cef6SMatthias Ringwald 
268*4930cef6SMatthias Ringwald             y0 = vfma_f32( x0, x1 , w_re );
269*4930cef6SMatthias Ringwald             y0 = vfma_f32( y0, x1r, w_im );
270*4930cef6SMatthias Ringwald             vst1_f32( (float *)(y0_ptr + 2*j), y0 );
271*4930cef6SMatthias Ringwald 
272*4930cef6SMatthias Ringwald             y1 = vfms_f32( x0, x1 , w_re );
273*4930cef6SMatthias Ringwald             y1 = vfms_f32( y1, x1r, w_im );
274*4930cef6SMatthias Ringwald             vst1_f32( (float *)(y1_ptr + 2*j), y1 );
275*4930cef6SMatthias Ringwald         }
276*4930cef6SMatthias Ringwald     }
277*4930cef6SMatthias Ringwald }
278*4930cef6SMatthias Ringwald #endif /* fft_bf2 */
279*4930cef6SMatthias Ringwald 
280*4930cef6SMatthias Ringwald #endif /* __ARM_NEON && __ARM_ARCH_ISA_A64 */
281