xref: /aosp_15_r20/external/libxaac/encoder/ixheaace_mdct_480.c (revision 15dc779a375ca8b5125643b829a8aa4b70d7f451)
1 /******************************************************************************
2  *                                                                            *
3  * Copyright (C) 2023 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 
21 #include <string.h>
22 #include "ixheaac_type_def.h"
23 #include "ixheaace_psy_const.h"
24 #include "ixheaace_tns.h"
25 #include "ixheaace_tns_params.h"
26 #include "ixheaace_rom.h"
27 #include "ixheaace_common_rom.h"
28 #include "ixheaac_constants.h"
29 #include "ixheaace_aac_constants.h"
30 
31 #include "ixheaace_fft.h"
32 
ia_aac_ld_enc_rearrange(WORD32 * ip,WORD32 * op,WORD32 N,UWORD8 * re_arr_tab)33 void ia_aac_ld_enc_rearrange(WORD32 *ip, WORD32 *op, WORD32 N, UWORD8 *re_arr_tab) {
34   WORD32 n, i = 0;
35 
36   for (n = 0; n < N; n++) {
37     WORD32 idx = re_arr_tab[n] << 1;
38 
39     op[i++] = ip[idx];
40     op[i++] = ip[idx + 1];
41   }
42 
43   return;
44 }
45 
ia_enhaacplus_enc_fft15(FLOAT32 * ptr_vec)46 static VOID ia_enhaacplus_enc_fft15(FLOAT32 *ptr_vec) {
47   FLOAT32 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, r16, r17, i0, i1,
48       i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, i16, i17, tmp0, tmp1, tmp2,
49       tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16,
50       tmp17, tmp18, tmp19, tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27, tmp28, tmp29;
51 
52   /* Pre-additions real part */
53   r1 = ptr_vec[2] + ptr_vec[8];
54   r2 = ptr_vec[2] - ptr_vec[8];
55   r3 = ptr_vec[4] + ptr_vec[16];
56   r4 = ptr_vec[4] - ptr_vec[16];
57   r5 = ptr_vec[6] + ptr_vec[24];
58   r6 = ptr_vec[6] - ptr_vec[24];
59   r7 = ptr_vec[10] + ptr_vec[20];
60   r8 = ptr_vec[10] - ptr_vec[20];
61   r9 = ptr_vec[12] + ptr_vec[18];
62   r10 = ptr_vec[12] - ptr_vec[18];
63   r11 = ptr_vec[14] + ptr_vec[26];
64   r12 = ptr_vec[14] - ptr_vec[26];
65   r13 = ptr_vec[22] + ptr_vec[28];
66   r14 = ptr_vec[22] - ptr_vec[28];
67 
68   tmp2 = r1 + r3;
69   tmp4 = r1 - r3;
70   tmp6 = r2 + r14;
71   tmp8 = r2 - r14;
72   tmp10 = r4 + r12;
73   tmp12 = r4 - r12;
74   tmp14 = r5 + r9;
75   tmp16 = r5 - r9;
76   tmp18 = r11 + r13;
77   tmp20 = r11 - r13;
78 
79   /* Pre-additions imaginary part */
80   i1 = ptr_vec[3] + ptr_vec[9];
81   i2 = ptr_vec[3] - ptr_vec[9];
82   i3 = ptr_vec[5] + ptr_vec[17];
83   i4 = ptr_vec[5] - ptr_vec[17];
84   i5 = ptr_vec[7] + ptr_vec[25];
85   i6 = ptr_vec[7] - ptr_vec[25];
86   i7 = ptr_vec[11] + ptr_vec[21];
87   i8 = ptr_vec[11] - ptr_vec[21];
88   i9 = ptr_vec[13] + ptr_vec[19];
89   i10 = ptr_vec[13] - ptr_vec[19];
90   i11 = ptr_vec[15] + ptr_vec[27];
91   i12 = ptr_vec[15] - ptr_vec[27];
92   i13 = ptr_vec[23] + ptr_vec[29];
93   i14 = ptr_vec[23] - ptr_vec[29];
94 
95   tmp3 = i1 + i3;
96   tmp5 = i1 - i3;
97   tmp7 = i2 + i14;
98   tmp9 = i2 - i14;
99   tmp11 = i4 + i12;
100   tmp13 = i4 - i12;
101   tmp15 = i5 + i9;
102   tmp17 = i5 - i9;
103   tmp19 = i11 + i13;
104   tmp21 = i11 - i13;
105 
106   /* Pre-additions and core multiplications */
107   tmp28 = tmp4 + tmp20;
108   tmp29 = tmp5 + tmp21;
109   r4 = tmp2 + tmp18;
110   i4 = tmp3 + tmp19;
111   r3 = (FLOAT32)((r4 + tmp14) * -1.25);
112   i3 = (FLOAT32)((i4 + tmp15) * -1.25);
113   r2 = (FLOAT32)((tmp29 - i8) * -8.660254037844387e-1);
114   i2 = (FLOAT32)((tmp28 - r8) * 8.660254037844387e-1);
115   r1 = r4 + r7;
116   i1 = i4 + i7;
117   r0 = r1 + ptr_vec[0] + tmp14;
118   i0 = i1 + ptr_vec[1] + tmp15;
119   r7 = tmp4 - tmp20;
120   i7 = tmp5 - tmp21;
121   r8 = (FLOAT32)((tmp3 - tmp19) * -4.841229182759272e-1);
122   i8 = (FLOAT32)((tmp2 - tmp18) * 4.841229182759272e-1);
123   tmp0 = tmp6 + r10;
124   tmp1 = tmp7 + i10;
125   tmp2 = r6 - tmp10;
126   tmp3 = i6 - tmp11;
127   r10 = (FLOAT32)(tmp7 * -2.308262652881440);
128   i10 = (FLOAT32)(tmp6 * 2.308262652881440);
129   r11 = (FLOAT32)(tmp8 * 1.332676064001459);
130   i11 = (FLOAT32)(tmp9 * 1.332676064001459);
131   r6 = (FLOAT32)((r7 - tmp16) * 5.590169943749475e-1);
132   i6 = (FLOAT32)((i7 - tmp17) * 5.590169943749475e-1);
133   r12 = (FLOAT32)((tmp1 + tmp3) * 5.877852522924733e-1);
134   i12 = (FLOAT32)((tmp0 + tmp2) * -5.877852522924733e-1);
135   r13 = (FLOAT32)((tmp7 - tmp11) * -8.816778784387098e-1);
136   i13 = (FLOAT32)((tmp6 - tmp10) * 8.816778784387098e-1);
137   r14 = (FLOAT32)((tmp8 + tmp12) * 5.090369604551274e-1);
138   i14 = (FLOAT32)((tmp9 + tmp13) * 5.090369604551274e-1);
139   r16 = (FLOAT32)(tmp11 * 5.449068960040204e-1);
140   i16 = (FLOAT32)(tmp10 * -5.449068960040204e-1);
141   r17 = (FLOAT32)(tmp12 * 3.146021430912046e-1);
142   i17 = (FLOAT32)(tmp13 * 3.146021430912046e-1);
143 
144   r4 *= 1.875;
145   i4 *= 1.875;
146   r1 *= -1.5;
147   i1 *= -1.5;
148   r7 *= (FLOAT32)(-8.385254915624212e-1);
149   i7 *= (FLOAT32)(-8.385254915624212e-1);
150   r5 = (FLOAT32)(tmp29 * 1.082531754730548);
151   i5 = (FLOAT32)(tmp28 * -1.082531754730548);
152   r9 = (FLOAT32)(tmp1 * 1.5388417685876270);
153   i9 = (FLOAT32)(tmp0 * -1.538841768587627);
154   r15 = (FLOAT32)(tmp3 * 3.632712640026803e-1);
155   i15 = (FLOAT32)(tmp2 * -3.632712640026803e-1);
156 
157   /* Post-additions real part */
158   tmp2 = r0 + r1;
159   tmp4 = r3 + r6;
160   tmp6 = r3 - r6;
161   tmp8 = r4 + r5;
162   tmp10 = r4 - r5;
163   tmp12 = r7 + r8;
164   tmp14 = r7 - r8;
165   tmp16 = r13 + r16;
166   tmp18 = r14 + r17;
167   tmp20 = r10 - r13;
168   tmp22 = r11 - r14;
169   tmp24 = r12 + r15;
170   tmp26 = r12 - r9;
171 
172   r1 = tmp2 + r2;
173   r2 = tmp2 - r2;
174   r3 = tmp4 + tmp26;
175   r4 = tmp4 - tmp26;
176   r5 = tmp6 + tmp24;
177   r6 = tmp6 - tmp24;
178   r7 = tmp16 + tmp18;
179   r8 = tmp16 - tmp18;
180   r9 = tmp20 - tmp22;
181   r10 = tmp20 + tmp22;
182   r11 = r1 + tmp8;
183   r12 = r2 + tmp10;
184   r13 = r11 - tmp12;
185   r14 = r12 - tmp14;
186   r15 = r12 + tmp14;
187   r16 = r11 + tmp12;
188 
189   /* Post-additions imaginary part */
190   tmp3 = i0 + i1;
191   tmp5 = i3 + i6;
192   tmp7 = i3 - i6;
193   tmp9 = i4 + i5;
194   tmp11 = i4 - i5;
195   tmp13 = i7 + i8;
196   tmp15 = i7 - i8;
197   tmp17 = i13 + i16;
198   tmp19 = i14 + i17;
199   tmp21 = i10 - i13;
200   tmp23 = i11 - i14;
201   tmp25 = i12 + i15;
202   tmp27 = i12 - i9;
203 
204   i1 = tmp3 + i2;
205   i2 = tmp3 - i2;
206   i3 = tmp5 + tmp27;
207   i4 = tmp5 - tmp27;
208   i5 = tmp7 + tmp25;
209   i6 = tmp7 - tmp25;
210   i7 = tmp17 + tmp19;
211   i8 = tmp17 - tmp19;
212   i9 = tmp21 - tmp23;
213   i10 = tmp21 + tmp23;
214   i11 = i1 + tmp9;
215   i12 = i2 + tmp11;
216   i13 = i11 - tmp13;
217   i14 = i12 - tmp15;
218   i15 = i12 + tmp15;
219   i16 = i11 + tmp13;
220 
221   *ptr_vec++ = r0;
222   *ptr_vec++ = i0;
223   *ptr_vec++ = r13 + r5 + r7;
224   *ptr_vec++ = i13 + i5 + i7;
225   *ptr_vec++ = r15 + r3 - r9;
226   *ptr_vec++ = i15 + i3 - i9;
227   *ptr_vec++ = r0 + r4;
228   *ptr_vec++ = i0 + i4;
229   *ptr_vec++ = r13 + r6 - r7;
230   *ptr_vec++ = i13 + i6 - i7;
231   *ptr_vec++ = r2;
232   *ptr_vec++ = i2;
233   *ptr_vec++ = r0 + r5;
234   *ptr_vec++ = i0 + i5;
235   *ptr_vec++ = r16 + r3 - r10;
236   *ptr_vec++ = i16 + i3 - i10;
237   *ptr_vec++ = r15 + r4 + r9;
238   *ptr_vec++ = i15 + i4 + i9;
239   *ptr_vec++ = r0 + r6;
240   *ptr_vec++ = i0 + i6;
241   *ptr_vec++ = r1;
242   *ptr_vec++ = i1;
243   *ptr_vec++ = r14 + r5 + r8;
244   *ptr_vec++ = i14 + i5 + i8;
245   *ptr_vec++ = r0 + r3;
246   *ptr_vec++ = i0 + i3;
247   *ptr_vec++ = r16 + r4 + r10;
248   *ptr_vec++ = i16 + i4 + i10;
249   *ptr_vec++ = r14 + r6 - r8;
250   *ptr_vec++ = i14 + i6 - i8;
251 }
252 
ia_enhaacplus_enc_fft16(FLOAT32 * ptr_vec)253 static VOID ia_enhaacplus_enc_fft16(FLOAT32 *ptr_vec) {
254   FLOAT32 var10, var11, var12, var13, var14, var15, var16, var17, var18, var19, var110, var111,
255       var112, var113, var114, var115, var20, var21, var22, var23, var24, var25, var26, var27,
256       var28, var29, var210, var211, var212, var213, var214, var215, arr0, arr1, arr2, arr3, arr4,
257       arr5, arr6, arr7, arr8, arr9, arr10, arr11, arr12, arr13, arr14, arr15;
258 
259   /* Pre-additions */
260   arr0 = ptr_vec[0] + ptr_vec[16];
261   arr8 = ptr_vec[8] + ptr_vec[24];
262   var10 = arr0 + arr8;
263   var12 = arr0 - arr8;
264   arr1 = ptr_vec[1] + ptr_vec[17];
265   arr9 = ptr_vec[9] + ptr_vec[25];
266   var11 = arr1 + arr9;
267   var13 = arr1 - arr9;
268   arr2 = ptr_vec[2] + ptr_vec[18];
269   arr10 = ptr_vec[10] + ptr_vec[26];
270   var14 = arr2 + arr10;
271   var16 = arr2 - arr10;
272   arr3 = ptr_vec[3] + ptr_vec[19];
273   arr11 = ptr_vec[11] + ptr_vec[27];
274   var15 = arr3 + arr11;
275   var17 = arr3 - arr11;
276   arr4 = ptr_vec[4] + ptr_vec[20];
277   arr12 = ptr_vec[12] + ptr_vec[28];
278   var18 = arr4 + arr12;
279   var110 = arr4 - arr12;
280   arr5 = ptr_vec[5] + ptr_vec[21];
281   arr13 = ptr_vec[13] + ptr_vec[29];
282   var19 = arr5 + arr13;
283   var111 = arr5 - arr13;
284   arr6 = ptr_vec[6] + ptr_vec[22];
285   arr14 = ptr_vec[14] + ptr_vec[30];
286   var112 = arr6 + arr14;
287   var114 = arr6 - arr14;
288   arr7 = ptr_vec[7] + ptr_vec[23];
289   arr15 = ptr_vec[15] + ptr_vec[31];
290   var113 = arr7 + arr15;
291   var115 = arr7 - arr15;
292 
293   /* Pre-additions and core multiplications */
294   var20 = var10 + var18;
295   var24 = var10 - var18;
296   var21 = var11 + var19;
297   var25 = var11 - var19;
298   var28 = var12 - var111;
299   var210 = var12 + var111;
300   var29 = var13 + var110;
301   var211 = var13 - var110;
302   var22 = var14 + var112;
303   var27 = var14 - var112;
304   var23 = var15 + var113;
305   var26 = var113 - var15;
306 
307   var11 = var16 + var114;
308   var12 = var16 - var114;
309   var10 = var17 + var115;
310   var13 = var17 - var115;
311   var212 = (var10 + var12) * IXHEAACE_INV_SQRT2;
312   var214 = (var10 - var12) * IXHEAACE_INV_SQRT2;
313   var213 = (var13 - var11) * IXHEAACE_INV_SQRT2;
314   var215 = (var11 + var13) * -IXHEAACE_INV_SQRT2;
315 
316   /* odd */
317   arr0 = ptr_vec[0] - ptr_vec[16];
318   arr1 = ptr_vec[1] - ptr_vec[17];
319   arr2 = ptr_vec[2] - ptr_vec[18];
320   arr3 = ptr_vec[3] - ptr_vec[19];
321   arr4 = ptr_vec[4] - ptr_vec[20];
322   arr5 = ptr_vec[5] - ptr_vec[21];
323   arr6 = ptr_vec[6] - ptr_vec[22];
324   arr7 = ptr_vec[7] - ptr_vec[23];
325   arr8 = ptr_vec[8] - ptr_vec[24];
326   arr9 = ptr_vec[9] - ptr_vec[25];
327   arr10 = ptr_vec[10] - ptr_vec[26];
328   arr11 = ptr_vec[11] - ptr_vec[27];
329   arr12 = ptr_vec[12] - ptr_vec[28];
330   arr13 = ptr_vec[13] - ptr_vec[29];
331   arr14 = ptr_vec[14] - ptr_vec[30];
332   arr15 = ptr_vec[15] - ptr_vec[31];
333 
334   /* Pre-additions and core multiplications */
335   var19 = (arr2 + arr14) * -IXHEAACE_COS_3PI_DIV8;
336   var110 = (arr2 - arr14) * IXHEAACE_COS_PI_DIV8;
337   var18 = (arr3 + arr15) * IXHEAACE_COS_3PI_DIV8;
338   var111 = (arr3 - arr15) * IXHEAACE_COS_PI_DIV8;
339   var15 = (arr4 + arr12) * -IXHEAACE_INV_SQRT2;
340   var16 = (arr4 - arr12) * IXHEAACE_INV_SQRT2;
341   var14 = (arr5 + arr13) * IXHEAACE_INV_SQRT2;
342   var17 = (arr5 - arr13) * IXHEAACE_INV_SQRT2;
343   var113 = (arr6 + arr10) * -IXHEAACE_COS_PI_DIV8;
344   var114 = (arr6 - arr10) * IXHEAACE_COS_3PI_DIV8;
345   var112 = (arr7 + arr11) * IXHEAACE_COS_PI_DIV8;
346   var115 = (arr7 - arr11) * IXHEAACE_COS_3PI_DIV8;
347 
348   /* Core multiplications */
349   arr2 = var18 * IXHEAACE_SQRT2PLUS1 - var112 * IXHEAACE_SQRT2MINUS1;
350   arr3 = var19 * IXHEAACE_SQRT2PLUS1 - var113 * IXHEAACE_SQRT2MINUS1;
351   arr4 = var110 * IXHEAACE_SQRT2MINUS1 - var114 * IXHEAACE_SQRT2PLUS1;
352   arr5 = var111 * IXHEAACE_SQRT2MINUS1 - var115 * IXHEAACE_SQRT2PLUS1;
353 
354   /* Post-additions */
355   var18 = var18 + var112;
356   var19 = var19 + var113;
357   var110 = var110 + var114;
358   var111 = var111 + var115;
359   arr6 = arr0 + var14;
360   arr10 = arr0 - var14;
361   arr7 = arr1 + var15;
362   arr11 = arr1 - var15;
363 
364   arr12 = var16 - arr9;
365   arr14 = var16 + arr9;
366   arr13 = arr8 + var17;
367   arr15 = arr8 - var17;
368 
369   var10 = arr6 - arr14;
370   var12 = arr6 + arr14;
371   var11 = arr7 + arr15;
372   var13 = arr7 - arr15;
373   var14 = arr10 + arr12;
374   var16 = arr10 - arr12;
375   var15 = arr11 + arr13;
376   var17 = arr11 - arr13;
377 
378   arr10 = var18 + var110;
379   var110 = var18 - var110;
380   arr11 = var19 + var111;
381   var111 = var19 - var111;
382 
383   var112 = arr2 + arr4;
384   var114 = arr2 - arr4;
385   var113 = arr3 + arr5;
386   var115 = arr3 - arr5;
387 
388   /* Post-additions */
389   ptr_vec[0] = var20 + var22;
390   ptr_vec[1] = var21 + var23;
391   ptr_vec[2] = var12 + arr10;
392   ptr_vec[3] = var13 + arr11;
393   ptr_vec[4] = var210 + var212;
394   ptr_vec[5] = var211 + var213;
395   ptr_vec[6] = var10 + var112;
396   ptr_vec[7] = var11 + var113;
397   ptr_vec[8] = var24 - var26;
398   ptr_vec[9] = var25 - var27;
399   ptr_vec[10] = var16 + var114;
400   ptr_vec[11] = var17 + var115;
401   ptr_vec[12] = var28 + var214;
402   ptr_vec[13] = var29 + var215;
403   ptr_vec[14] = var14 + var110;
404   ptr_vec[15] = var15 + var111;
405   ptr_vec[16] = var20 - var22;
406   ptr_vec[17] = var21 - var23;
407   ptr_vec[18] = var12 - arr10;
408   ptr_vec[19] = var13 - arr11;
409   ptr_vec[20] = var210 - var212;
410   ptr_vec[21] = var211 - var213;
411   ptr_vec[22] = var10 - var112;
412   ptr_vec[23] = var11 - var113;
413   ptr_vec[24] = var24 + var26;
414   ptr_vec[25] = var25 + var27;
415   ptr_vec[26] = var16 - var114;
416   ptr_vec[27] = var17 - var115;
417   ptr_vec[28] = var28 - var214;
418   ptr_vec[29] = var29 - var215;
419   ptr_vec[30] = var14 - var110;
420   ptr_vec[31] = var15 - var111;
421 }
422 
ia_enhaacplus_enc_fft240(FLOAT32 * ptr_in)423 static VOID ia_enhaacplus_enc_fft240(FLOAT32 *ptr_in) {
424   const WORD32 n1 = 240;
425   const WORD32 n2 = 15;
426   const WORD32 n3 = 16;
427   const WORD32 *ptr_idx1 = ia_enhaacplus_enc_fft240_table1;
428   const WORD32 *ptr_idx2 = ia_enhaacplus_enc_fft240_table2;
429 
430   WORD32 k, l;
431   FLOAT32 temp[32], out[480];
432 
433   for (k = 0; k < n2; k++) {
434     for (l = 0; l < n3; l++) {
435       temp[2 * l] = ptr_in[2 * *ptr_idx1];
436       temp[2 * l + 1] = ptr_in[2 * *ptr_idx1 + 1];
437       ptr_idx1 += n2;
438     }
439 
440     ia_enhaacplus_enc_fft16(temp); /* 16-point FFT */
441     ptr_idx1 -= n1;
442 
443     for (l = 0; l < n3; l++) {
444       ptr_in[2 * *ptr_idx1] = temp[2 * l];
445       ptr_in[2 * *ptr_idx1 + 1] = temp[2 * l + 1];
446       ptr_idx1 += n2;
447     }
448 
449     ptr_idx1 -= n1 - 1;
450   }
451 
452   ptr_idx1 -= n2;
453 
454   for (k = 0; k < n3; k++) {
455     for (l = 0; l < n2; l++) {
456       temp[2 * l] = ptr_in[2 * *ptr_idx1];
457       temp[2 * l + 1] = ptr_in[2 * *ptr_idx1++ + 1];
458     }
459 
460     ia_enhaacplus_enc_fft15(temp); /* 15-point FFT */
461 
462     for (l = 0; l < n2; l++) {
463       out[2 * *ptr_idx2] = temp[2 * l];
464       out[2 * *ptr_idx2++ + 1] = temp[2 * l + 1];
465     }
466   }
467 
468   memcpy(ptr_in, out, (2 * n1) * sizeof(out[0]));
469 }
470 
ia_aac_ld_enc_mdct_480(FLOAT32 * ptr_inp,FLOAT32 * ptr_scratch,WORD32 mdct_flag,ixheaace_mdct_tables * pstr_mdct_tables)471 VOID ia_aac_ld_enc_mdct_480(FLOAT32 *ptr_inp, FLOAT32 *ptr_scratch, WORD32 mdct_flag,
472                             ixheaace_mdct_tables *pstr_mdct_tables) {
473   WORD32 k;
474   FLOAT32 const_mltfac = ((FLOAT32)FRAME_LEN_512) / FRAME_LEN_480;
475 
476   ia_eaacp_enc_pre_twiddle_aac(ptr_scratch, ptr_inp, FRAME_LEN_480,
477                                pstr_mdct_tables->cosine_array_960);
478 
479   ia_enhaacplus_enc_fft240(ptr_scratch);
480 
481   ia_enhaacplus_enc_post_twiddle(ptr_inp, ptr_scratch, pstr_mdct_tables->cosine_array_960,
482                                  FRAME_LEN_480);
483 
484   if (0 == mdct_flag) {
485     for (k = 0; k < MDCT_LEN; k++) {
486       ptr_inp[k] *= const_mltfac;
487     }
488   }
489 }
490