xref: /btstack/3rd-party/lc3-google/src/ltpf_arm.h (revision da8e14c5aa3783b6bb7dd63e71572a901bcf168b)
1 /******************************************************************************
2  *
3  *  Copyright 2022 Google LLC
4  *
5  *  Licensed under the Apache License, Version 2.0 (the "License");
6  *  you may not use this file except in compliance with the License.
7  *  You may obtain a copy of the License at:
8  *
9  *  http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  ******************************************************************************/
18 
19 #if __ARM_FEATURE_SIMD32 && !(__GNUC__ < 10)
20 
21 #ifndef TEST_ARM
22 
23 #include <arm_acle.h>
24 
25 static inline int16x2_t __pkhbt(int16x2_t a, int16x2_t b)
26 {
27     int16x2_t r;
28     __asm("pkhbt %0, %1, %2" : "=r" (r) : "r" (a), "r" (b));
29     return r;
30 }
31 
32 #endif /* TEST_ARM */
33 
34 
35 /**
36  * Import
37  */
38 
39 static inline int32_t filter_hp50(struct lc3_ltpf_hp50_state *, int32_t);
40 static inline float dot(const int16_t *, const int16_t *, int);
41 
42 
43 /**
44  * Resample from 8 / 16 / 32 KHz to 12.8 KHz Template
45  */
46 #if !defined(resample_8k_12k8) || !defined(resample_16k_12k8) \
47     || !defined(resample_32k_12k8)
48 static inline void arm_resample_x64k_12k8(const int p, const int16x2_t *h,
49     struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n)
50 {
51     const int w = 40 / p;
52 
53     x -= w;
54 
55     for (int i = 0; i < 5*n; i += 5) {
56         const int16x2_t *hn = h + (i % (2*p)) * (48 / p);
57         const int16x2_t *xn = x + (i / (2*p));
58 
59         int32_t un = __smlad(*(xn++), *(hn++), 0);
60 
61         for (int k = 0; k < w; k += 5) {
62             un = __smlad(*(xn++), *(hn++), un);
63             un = __smlad(*(xn++), *(hn++), un);
64             un = __smlad(*(xn++), *(hn++), un);
65             un = __smlad(*(xn++), *(hn++), un);
66             un = __smlad(*(xn++), *(hn++), un);
67         }
68 
69         int32_t yn = filter_hp50(hp50, un);
70         *(y++) = (yn + (1 << 15)) >> 16;
71     }
72 }
73 #endif
74 
75 /**
76  * Resample from 24 / 48 KHz to 12.8 KHz Template
77  */
78 #if !defined(resample_24k_12k8) || !defined(resample_48k_12k8)
79 static inline void arm_resample_x192k_12k8(const int p, const int16x2_t *h,
80     struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n)
81 {
82     const int w = 120 / p;
83 
84     x -= w;
85 
86     for (int i = 0; i < 15*n; i += 15) {
87         const int16x2_t *hn = h + (i % (2*p)) * (128 / p);
88         const int16x2_t *xn = x + (i / (2*p));
89 
90         int32_t un = __smlad(*(xn++), *(hn++), 0);
91 
92         for (int k = 0; k < w; k += 15) {
93             un = __smlad(*(xn++), *(hn++), un);
94             un = __smlad(*(xn++), *(hn++), un);
95             un = __smlad(*(xn++), *(hn++), un);
96             un = __smlad(*(xn++), *(hn++), un);
97             un = __smlad(*(xn++), *(hn++), un);
98             un = __smlad(*(xn++), *(hn++), un);
99             un = __smlad(*(xn++), *(hn++), un);
100             un = __smlad(*(xn++), *(hn++), un);
101             un = __smlad(*(xn++), *(hn++), un);
102             un = __smlad(*(xn++), *(hn++), un);
103             un = __smlad(*(xn++), *(hn++), un);
104             un = __smlad(*(xn++), *(hn++), un);
105             un = __smlad(*(xn++), *(hn++), un);
106             un = __smlad(*(xn++), *(hn++), un);
107             un = __smlad(*(xn++), *(hn++), un);
108         }
109 
110         int32_t yn = filter_hp50(hp50, un);
111         *(y++) = (yn + (1 << 15)) >> 16;
112     }
113 }
114 #endif
115 
116 /**
117  * Resample from 8 Khz to 12.8 KHz
118  */
119 #ifndef resample_8k_12k8
120 #define resample_8k_12k8 arm_resample_8k_12k8
121 static void arm_resample_8k_12k8(
122     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
123 {
124     static const int16_t alignas(int32_t) h[2*8*12] = {
125         0, 214,  417, -1052, -4529, 26233, -4529, -1052,   417,  214,   0, 0,
126         0, 180,    0, -1522, -2427, 24506, -5289,     0,   763,  156, -28, 0,
127         0,  92, -323, -1361,     0, 19741, -3885,  1317,   861,    0, -61, 0,
128         0,   0, -457,  -752,  1873, 13068,     0,  2389,   598, -213, -79, 0,
129         0, -61, -398,     0,  2686,  5997,  5997,  2686,     0, -398, -61, 0,
130         0, -79, -213,   598,  2389,     0, 13068,  1873,  -752, -457,   0, 0,
131         0, -61,    0,   861,  1317, -3885, 19741,     0, -1361, -323,  92, 0,
132         0, -28,  156,   763,     0, -5289, 24506, -2427, -1522,    0, 180, 0,
133         0, 0, 214,  417, -1052, -4529, 26233, -4529, -1052,   417,  214,   0,
134         0, 0, 180,    0, -1522, -2427, 24506, -5289,     0,   763,  156, -28,
135         0, 0,  92, -323, -1361,     0, 19741, -3885,  1317,   861,    0, -61,
136         0, 0,   0, -457,  -752,  1873, 13068,     0,  2389,   598, -213, -79,
137         0, 0, -61, -398,     0,  2686,  5997,  5997,  2686,     0, -398, -61,
138         0, 0, -79, -213,   598,  2389,     0, 13068,  1873,  -752, -457,   0,
139         0, 0, -61,    0,   861,  1317, -3885, 19741,     0, -1361, -323,  92,
140         0, 0, -28,  156,   763,     0, -5289, 24506, -2427, -1522,    0, 180,
141     };
142 
143     arm_resample_x64k_12k8(
144         8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
145 }
146 #endif /* resample_8k_12k8 */
147 
148 /**
149  * Resample from 16 Khz to 12.8 KHz
150  */
151 #ifndef resample_16k_12k8
152 #define resample_16k_12k8 arm_resample_16k_12k8
153 static void arm_resample_16k_12k8(
154     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
155 {
156     static const int16_t alignas(int32_t) h[2*4*24] = {
157 
158             0,   -61,   214,  -398,   417,     0, -1052,  2686,
159         -4529,  5997, 26233,  5997, -4529,  2686, -1052,     0,
160           417,  -398,   214,   -61,     0,     0,     0,     0,
161 
162 
163             0,   -79,   180,  -213,     0,   598, -1522,  2389,
164         -2427,     0, 24506, 13068, -5289,  1873,     0,  -752,
165           763,  -457,   156,     0,   -28,     0,     0,     0,
166 
167 
168             0,   -61,    92,     0,  -323,   861, -1361,  1317,
169             0, -3885, 19741, 19741, -3885,     0,  1317, -1361,
170           861,  -323,     0,    92,   -61,     0,     0,     0,
171 
172             0,   -28,     0,   156,  -457,   763,  -752,     0,
173          1873, -5289, 13068, 24506,     0, -2427,  2389, -1522,
174           598,     0,  -213,   180,   -79,     0,     0,     0,
175 
176 
177             0,     0,   -61,   214,  -398,   417,     0, -1052,
178          2686, -4529,  5997, 26233,  5997, -4529,  2686, -1052,
179             0,   417,  -398,   214,   -61,     0,     0,     0,
180 
181 
182             0,     0,   -79,   180,  -213,     0,   598, -1522,
183          2389, -2427,     0, 24506, 13068, -5289,  1873,     0,
184          -752,   763,  -457,   156,     0,   -28,     0,     0,
185 
186 
187             0,     0,   -61,    92,     0,  -323,   861, -1361,
188          1317,     0, -3885, 19741, 19741, -3885,     0,  1317,
189         -1361,   861,  -323,     0,    92,   -61,     0,     0,
190 
191             0,     0,   -28,     0,   156,  -457,   763,  -752,
192             0,  1873, -5289, 13068, 24506,     0, -2427,  2389,
193         -1522,   598,     0,  -213,   180,   -79,     0,     0,
194     };
195 
196     arm_resample_x64k_12k8(
197         4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
198 }
199 #endif /* resample_16k_12k8 */
200 
201 /**
202  * Resample from 32 Khz to 12.8 KHz
203  */
204 #ifndef resample_32k_12k8
205 #define resample_32k_12k8 arm_resample_32k_12k8
206 static void arm_resample_32k_12k8(
207     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
208 {
209     static const int16_t alignas(int32_t) h[2*2*48] = {
210 
211             0,   -30,   -31,    46,   107,     0,  -199,  -162,
212           209,   430,     0,  -681,  -526,   658,  1343,     0,
213         -2264, -1943,  2999,  9871, 13116,  9871,  2999, -1943,
214         -2264,     0,  1343,   658,  -526,  -681,     0,   430,
215           209,  -162,  -199,     0,   107,    46,   -31,   -30,
216             0,     0,     0,     0,     0,     0,     0,     0,
217 
218             0,   -14,   -39,     0,    90,    78,  -106,  -229,
219             0,   382,   299,  -376,  -761,     0,  1194,   937,
220         -1214, -2644,     0,  6534, 12253, 12253,  6534,     0,
221         -2644, -1214,   937,  1194,     0,  -761,  -376,   299,
222           382,     0,  -229,  -106,    78,    90,     0,   -39,
223           -14,     0,     0,     0,     0,     0,     0,     0,
224 
225             0,     0,   -30,   -31,    46,   107,     0,  -199,
226          -162,   209,   430,     0,  -681,  -526,   658,  1343,
227             0, -2264, -1943,  2999,  9871, 13116,  9871,  2999,
228         -1943, -2264,     0,  1343,   658,  -526,  -681,     0,
229           430,   209,  -162,  -199,     0,   107,    46,   -31,
230           -30,     0,     0,     0,     0,     0,     0,     0,
231 
232             0,     0,   -14,   -39,     0,    90,    78,  -106,
233          -229,     0,   382,   299,  -376,  -761,     0,  1194,
234           937, -1214, -2644,     0,  6534, 12253, 12253,  6534,
235             0, -2644, -1214,   937,  1194,     0,  -761,  -376,
236           299,   382,     0,  -229,  -106,    78,    90,     0,
237           -39,   -14,     0,     0,     0,     0,     0,     0,
238     };
239 
240     arm_resample_x64k_12k8(
241         2, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
242 }
243 #endif /* resample_32k_12k8 */
244 
245 /**
246  * Resample from 24 Khz to 12.8 KHz
247  */
248 #ifndef resample_24k_12k8
249 #define resample_24k_12k8 arm_resample_24k_12k8
250 static void arm_resample_24k_12k8(
251     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
252 {
253     static const int16_t alignas(int32_t) h[2*8*32] = {
254 
255             0,   -50,    19,   143,   -93,  -290,   278,   485,
256          -658,  -701,  1396,   901, -3019, -1042, 10276, 17488,
257         10276, -1042, -3019,   901,  1396,  -701,  -658,   485,
258           278,  -290,   -93,   143,    19,   -50,     0,     0,
259 
260             0,   -46,     0,   141,   -45,  -305,   185,   543,
261          -501,  -854,  1153,  1249, -2619, -1908,  8712, 17358,
262         11772,     0, -3319,   480,  1593,  -504,  -796,   399,
263           367,  -261,  -142,   138,    40,   -52,    -5,     0,
264 
265             0,   -41,   -17,   133,     0,  -304,    91,   574,
266          -334,  -959,   878,  1516, -2143, -2590,  7118, 16971,
267         13161,  1202, -3495,     0,  1731,  -267,  -908,   287,
268           445,  -215,  -188,   125,    62,   -52,   -12,     0,
269 
270             0,   -34,   -30,   120,    41,  -291,     0,   577,
271          -164, -1015,   585,  1697, -1618, -3084,  5534, 16337,
272         14406,  2544, -3526,  -523,  1800,     0,  -985,   152,
273           509,  -156,  -230,   104,    83,   -48,   -19,     0,
274 
275             0,   -26,   -41,   103,    76,  -265,   -83,   554,
276             0, -1023,   288,  1791, -1070, -3393,  3998, 15474,
277         15474,  3998, -3393, -1070,  1791,   288, -1023,     0,
278           554,   -83,  -265,    76,   103,   -41,   -26,     0,
279 
280             0,   -19,   -48,    83,   104,  -230,  -156,   509,
281           152,  -985,     0,  1800,  -523, -3526,  2544, 14406,
282         16337,  5534, -3084, -1618,  1697,   585, -1015,  -164,
283           577,     0,  -291,    41,   120,   -30,   -34,     0,
284 
285             0,   -12,   -52,    62,   125,  -188,  -215,   445,
286           287,  -908,  -267,  1731,     0, -3495,  1202, 13161,
287         16971,  7118, -2590, -2143,  1516,   878,  -959,  -334,
288           574,    91,  -304,     0,   133,   -17,   -41,     0,
289 
290             0,    -5,   -52,    40,   138,  -142,  -261,   367,
291           399,  -796,  -504,  1593,   480, -3319,     0, 11772,
292         17358,  8712, -1908, -2619,  1249,  1153,  -854,  -501,
293           543,   185,  -305,   -45,   141,     0,   -46,     0,
294 
295             0,     0,   -50,    19,   143,   -93,  -290,   278,
296           485,  -658,  -701,  1396,   901, -3019, -1042, 10276,
297         17488, 10276, -1042, -3019,   901,  1396,  -701,  -658,
298           485,   278,  -290,   -93,   143,    19,   -50,     0,
299 
300             0,     0,   -46,     0,   141,   -45,  -305,   185,
301           543,  -501,  -854,  1153,  1249, -2619, -1908,  8712,
302         17358, 11772,     0, -3319,   480,  1593,  -504,  -796,
303           399,   367,  -261,  -142,   138,    40,   -52,    -5,
304 
305             0,     0,   -41,   -17,   133,     0,  -304,    91,
306           574,  -334,  -959,   878,  1516, -2143, -2590,  7118,
307         16971, 13161,  1202, -3495,     0,  1731,  -267,  -908,
308           287,   445,  -215,  -188,   125,    62,   -52,   -12,
309 
310             0,     0,   -34,   -30,   120,    41,  -291,     0,
311           577,  -164, -1015,   585,  1697, -1618, -3084,  5534,
312         16337, 14406,  2544, -3526,  -523,  1800,     0,  -985,
313           152,   509,  -156,  -230,   104,    83,   -48,   -19,
314 
315             0,     0,   -26,   -41,   103,    76,  -265,   -83,
316           554,     0, -1023,   288,  1791, -1070, -3393,  3998,
317         15474, 15474,  3998, -3393, -1070,  1791,   288, -1023,
318             0,   554,   -83,  -265,    76,   103,   -41,   -26,
319 
320             0,     0,   -19,   -48,    83,   104,  -230,  -156,
321           509,   152,  -985,     0,  1800,  -523, -3526,  2544,
322         14406, 16337,  5534, -3084, -1618,  1697,   585, -1015,
323          -164,   577,     0,  -291,    41,   120,   -30,   -34,
324 
325             0,     0,   -12,   -52,    62,   125,  -188,  -215,
326           445,   287,  -908,  -267,  1731,     0, -3495,  1202,
327         13161, 16971,  7118, -2590, -2143,  1516,   878,  -959,
328          -334,   574,    91,  -304,     0,   133,   -17,   -41,
329 
330             0,     0,    -5,   -52,    40,   138,  -142,  -261,
331           367,   399,  -796,  -504,  1593,   480, -3319,     0,
332         11772, 17358,  8712, -1908, -2619,  1249,  1153,  -854,
333          -501,   543,   185,  -305,   -45,   141,     0,   -46,
334     };
335 
336     arm_resample_x192k_12k8(
337         8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
338 }
339 #endif /* resample_24k_12k8 */
340 
341 /**
342  * Resample from 48 Khz to 12.8 KHz
343  */
344 #ifndef resample_48k_12k8
345 #define resample_48k_12k8 arm_resample_48k_12k8
346 static void arm_resample_48k_12k8(
347     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
348 {
349     static const int16_t alignas(int32_t) h[2*4*64] = {
350 
351             0,   -13,   -25,   -20,    10,    51,    71,    38,
352           -47,  -133,  -145,   -42,   139,   277,   242,     0,
353          -329,  -511,  -351,   144,   698,   895,   450,  -535,
354         -1510, -1697,  -521,  1999,  5138,  7737,  8744,  7737,
355          5138,  1999,  -521, -1697, -1510,  -535,   450,   895,
356           698,   144,  -351,  -511,  -329,     0,   242,   277,
357           139,   -42,  -145,  -133,   -47,    38,    71,    51,
358            10,   -20,   -25,   -13,     0,     0,     0,     0,
359 
360             0,    -9,   -23,   -24,     0,    41,    71,    52,
361           -23,  -115,  -152,   -78,    92,   254,   272,    76,
362          -251,  -493,  -427,     0,   576,   900,   624,  -262,
363         -1309, -1763,  -954,  1272,  4356,  7203,  8679,  8169,
364          5886,  2767,     0, -1542, -1660,  -809,   240,   848,
365           796,   292,  -252,  -507,  -398,   -82,   199,   288,
366           183,     0,  -130,  -145,   -71,    20,    69,    60,
367            20,   -15,   -26,   -17,    -3,     0,     0,     0,
368 
369             0,    -6,   -20,   -26,    -8,    31,    67,    62,
370             0,   -94,  -152,  -108,    45,   223,   287,   143,
371          -167,  -454,  -480,  -134,   439,   866,   758,     0,
372         -1071, -1748, -1295,   601,  3559,  6580,  8485,  8485,
373          6580,  3559,   601, -1295, -1748, -1071,     0,   758,
374           866,   439,  -134,  -480,  -454,  -167,   143,   287,
375           223,    45,  -108,  -152,   -94,     0,    62,    67,
376            31,    -8,   -26,   -20,    -6,     0,     0,     0,
377 
378             0,    -3,   -17,   -26,   -15,    20,    60,    69,
379            20,   -71,  -145,  -130,     0,   183,   288,   199,
380           -82,  -398,  -507,  -252,   292,   796,   848,   240,
381          -809, -1660, -1542,     0,  2767,  5886,  8169,  8679,
382          7203,  4356,  1272,  -954, -1763, -1309,  -262,   624,
383           900,   576,     0,  -427,  -493,  -251,    76,   272,
384           254,    92,   -78,  -152,  -115,   -23,    52,    71,
385            41,     0,   -24,   -23,    -9,     0,     0,     0,
386 
387             0,     0,   -13,   -25,   -20,    10,    51,    71,
388            38,   -47,  -133,  -145,   -42,   139,   277,   242,
389             0,  -329,  -511,  -351,   144,   698,   895,   450,
390          -535, -1510, -1697,  -521,  1999,  5138,  7737,  8744,
391          7737,  5138,  1999,  -521, -1697, -1510,  -535,   450,
392           895,   698,   144,  -351,  -511,  -329,     0,   242,
393           277,   139,   -42,  -145,  -133,   -47,    38,    71,
394            51,    10,   -20,   -25,   -13,     0,     0,     0,
395 
396             0,     0,    -9,   -23,   -24,     0,    41,    71,
397            52,   -23,  -115,  -152,   -78,    92,   254,   272,
398            76,  -251,  -493,  -427,     0,   576,   900,   624,
399          -262, -1309, -1763,  -954,  1272,  4356,  7203,  8679,
400          8169,  5886,  2767,     0, -1542, -1660,  -809,   240,
401           848,   796,   292,  -252,  -507,  -398,   -82,   199,
402           288,   183,     0,  -130,  -145,   -71,    20,    69,
403            60,    20,   -15,   -26,   -17,    -3,     0,     0,
404 
405             0,     0,    -6,   -20,   -26,    -8,    31,    67,
406            62,     0,   -94,  -152,  -108,    45,   223,   287,
407           143,  -167,  -454,  -480,  -134,   439,   866,   758,
408             0, -1071, -1748, -1295,   601,  3559,  6580,  8485,
409          8485,  6580,  3559,   601, -1295, -1748, -1071,     0,
410           758,   866,   439,  -134,  -480,  -454,  -167,   143,
411           287,   223,    45,  -108,  -152,   -94,     0,    62,
412            67,    31,    -8,   -26,   -20,    -6,     0,     0,
413 
414             0,     0,    -3,   -17,   -26,   -15,    20,    60,
415            69,    20,   -71,  -145,  -130,     0,   183,   288,
416           199,   -82,  -398,  -507,  -252,   292,   796,   848,
417           240,  -809, -1660, -1542,     0,  2767,  5886,  8169,
418          8679,  7203,  4356,  1272,  -954, -1763, -1309,  -262,
419           624,   900,   576,     0,  -427,  -493,  -251,    76,
420           272,   254,    92,   -78,  -152,  -115,   -23,    52,
421            71,    41,     0,   -24,   -23,    -9,     0,     0,
422     };
423 
424     arm_resample_x192k_12k8(
425         4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
426 }
427 #endif /* resample_48k_12k8 */
428 
429 /**
430  * Return vector of correlations
431  */
432 #ifndef correlate
433 #define correlate arm_correlate
434 static void arm_correlate(
435     const int16_t *a, const int16_t *b, int n, float *y, int nc)
436 {
437     /* --- Check alignment of `b` --- */
438 
439     if ((uintptr_t)b & 3)
440         *(y++) = dot(a, b--, n), nc--;
441 
442     /* --- Processing by pair --- */
443 
444     for ( ; nc >= 2; nc -= 2) {
445         const int16x2_t *an = (const int16x2_t *)(a  );
446         const int16x2_t *bn = (const int16x2_t *)(b--);
447 
448         int16x2_t ax, b0, b1;
449         int64_t v0 = 0, v1 = 0;
450 
451         b1 = (int16x2_t)*(b--) << 16;
452 
453         for (int i = 0; i < (n >> 4); i++ )
454             for (int j = 0; j < 4; j++) {
455 
456                 ax = *(an++), b0 = *(bn++);
457                 v0 = __smlald (ax, b0, v0);
458                 v1 = __smlaldx(ax, __pkhbt(b0, b1), v1);
459 
460                 ax = *(an++), b1 = *(bn++);
461                 v0 = __smlald (ax, b1, v0);
462                 v1 = __smlaldx(ax, __pkhbt(b1, b0), v1);
463             }
464 
465         *(y++) = (float)((int32_t)((v0 + (1 << 5)) >> 6));
466         *(y++) = (float)((int32_t)((v1 + (1 << 5)) >> 6));
467     }
468 
469     /* --- Odd element count --- */
470 
471     if (nc > 0)
472         *(y++) = dot(a, b, n);
473 }
474 #endif /* correlate */
475 
476 #endif /* __ARM_FEATURE_SIMD32 */
477