1 /****************************************************************************** 2 * 3 * Copyright 2022 Google LLC 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 19 #if __ARM_FEATURE_SIMD32 20 21 #ifndef TEST_ARM 22 23 #include <arm_acle.h> 24 25 static inline int16x2_t __pkhbt(int16x2_t a, int16x2_t b) 26 { 27 int16x2_t r; 28 __asm("pkhbt %0, %1, %2" : "=r" (r) : "r" (a), "r" (b)); 29 return r; 30 } 31 32 #endif /* TEST_ARM */ 33 34 35 /** 36 * Import 37 */ 38 39 static inline int32_t filter_hp50(struct lc3_ltpf_hp50_state *, int32_t); 40 static inline float dot(const int16_t *, const int16_t *, int); 41 42 43 /** 44 * Resample from 8 / 16 / 32 KHz to 12.8 KHz Template 45 */ 46 #if !defined(resample_8k_12k8) || !defined(resample_16k_12k8) \ 47 || !defined(resample_32k_12k8) 48 static inline void arm_resample_x64k_12k8(const int p, const int16x2_t *h, 49 struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n) 50 { 51 const int w = 40 / p; 52 53 x -= w; 54 55 for (int i = 0; i < 5*n; i += 5) { 56 const int16x2_t *hn = h + (i % (2*p)) * (48 / p); 57 const int16x2_t *xn = x + (i / (2*p)); 58 59 int32_t un = __smlad(*(xn++), *(hn++), 0); 60 61 for (int k = 0; k < w; k += 5) { 62 un = __smlad(*(xn++), *(hn++), un); 63 un = __smlad(*(xn++), *(hn++), un); 64 un = __smlad(*(xn++), *(hn++), un); 65 un = __smlad(*(xn++), *(hn++), un); 66 un = __smlad(*(xn++), *(hn++), un); 67 } 68 69 int32_t yn = filter_hp50(hp50, un); 70 *(y++) = (yn + (1 << 15)) >> 16; 71 } 72 } 73 #endif 74 75 /** 76 * Resample from 24 / 48 KHz to 12.8 KHz Template 77 */ 78 #if !defined(resample_24k_12k8) || !defined(resample_48k_12k8) 79 static inline void arm_resample_x192k_12k8(const int p, const int16x2_t *h, 80 struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n) 81 { 82 const int w = 120 / p; 83 84 x -= w; 85 86 for (int i = 0; i < 15*n; i += 15) { 87 const int16x2_t *hn = h + (i % (2*p)) * (128 / p); 88 const int16x2_t *xn = x + (i / (2*p)); 89 90 int32_t un = __smlad(*(xn++), *(hn++), 0); 91 92 for (int k = 0; k < w; k += 15) { 93 un = __smlad(*(xn++), *(hn++), un); 94 un = __smlad(*(xn++), *(hn++), un); 95 un = __smlad(*(xn++), *(hn++), un); 96 un = __smlad(*(xn++), *(hn++), un); 97 un = __smlad(*(xn++), *(hn++), un); 98 un = __smlad(*(xn++), *(hn++), un); 99 un = __smlad(*(xn++), *(hn++), un); 100 un = __smlad(*(xn++), *(hn++), un); 101 un = __smlad(*(xn++), *(hn++), un); 102 un = __smlad(*(xn++), *(hn++), un); 103 un = __smlad(*(xn++), *(hn++), un); 104 un = __smlad(*(xn++), *(hn++), un); 105 un = __smlad(*(xn++), *(hn++), un); 106 un = __smlad(*(xn++), *(hn++), un); 107 un = __smlad(*(xn++), *(hn++), un); 108 } 109 110 int32_t yn = filter_hp50(hp50, un); 111 *(y++) = (yn + (1 << 15)) >> 16; 112 } 113 } 114 #endif 115 116 /** 117 * Resample from 8 Khz to 12.8 KHz 118 */ 119 #ifndef resample_8k_12k8 120 #define resample_8k_12k8 arm_resample_8k_12k8 121 static void arm_resample_8k_12k8( 122 struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) 123 { 124 static const int16_t alignas(int32_t) h[2*8*12] = { 125 0, 214, 417, -1052, -4529, 26233, -4529, -1052, 417, 214, 0, 0, 126 0, 180, 0, -1522, -2427, 24506, -5289, 0, 763, 156, -28, 0, 127 0, 92, -323, -1361, 0, 19741, -3885, 1317, 861, 0, -61, 0, 128 0, 0, -457, -752, 1873, 13068, 0, 2389, 598, -213, -79, 0, 129 0, -61, -398, 0, 2686, 5997, 5997, 2686, 0, -398, -61, 0, 130 0, -79, -213, 598, 2389, 0, 13068, 1873, -752, -457, 0, 0, 131 0, -61, 0, 861, 1317, -3885, 19741, 0, -1361, -323, 92, 0, 132 0, -28, 156, 763, 0, -5289, 24506, -2427, -1522, 0, 180, 0, 133 0, 0, 214, 417, -1052, -4529, 26233, -4529, -1052, 417, 214, 0, 134 0, 0, 180, 0, -1522, -2427, 24506, -5289, 0, 763, 156, -28, 135 0, 0, 92, -323, -1361, 0, 19741, -3885, 1317, 861, 0, -61, 136 0, 0, 0, -457, -752, 1873, 13068, 0, 2389, 598, -213, -79, 137 0, 0, -61, -398, 0, 2686, 5997, 5997, 2686, 0, -398, -61, 138 0, 0, -79, -213, 598, 2389, 0, 13068, 1873, -752, -457, 0, 139 0, 0, -61, 0, 861, 1317, -3885, 19741, 0, -1361, -323, 92, 140 0, 0, -28, 156, 763, 0, -5289, 24506, -2427, -1522, 0, 180, 141 }; 142 143 arm_resample_x64k_12k8( 144 8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n); 145 } 146 #endif /* resample_8k_12k8 */ 147 148 /** 149 * Resample from 16 Khz to 12.8 KHz 150 */ 151 #ifndef resample_16k_12k8 152 #define resample_16k_12k8 arm_resample_16k_12k8 153 static void arm_resample_16k_12k8( 154 struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) 155 { 156 static const int16_t alignas(int32_t) h[2*4*24] = { 157 158 0, -61, 214, -398, 417, 0, -1052, 2686, 159 -4529, 5997, 26233, 5997, -4529, 2686, -1052, 0, 160 417, -398, 214, -61, 0, 0, 0, 0, 161 162 163 0, -79, 180, -213, 0, 598, -1522, 2389, 164 -2427, 0, 24506, 13068, -5289, 1873, 0, -752, 165 763, -457, 156, 0, -28, 0, 0, 0, 166 167 168 0, -61, 92, 0, -323, 861, -1361, 1317, 169 0, -3885, 19741, 19741, -3885, 0, 1317, -1361, 170 861, -323, 0, 92, -61, 0, 0, 0, 171 172 0, -28, 0, 156, -457, 763, -752, 0, 173 1873, -5289, 13068, 24506, 0, -2427, 2389, -1522, 174 598, 0, -213, 180, -79, 0, 0, 0, 175 176 177 0, 0, -61, 214, -398, 417, 0, -1052, 178 2686, -4529, 5997, 26233, 5997, -4529, 2686, -1052, 179 0, 417, -398, 214, -61, 0, 0, 0, 180 181 182 0, 0, -79, 180, -213, 0, 598, -1522, 183 2389, -2427, 0, 24506, 13068, -5289, 1873, 0, 184 -752, 763, -457, 156, 0, -28, 0, 0, 185 186 187 0, 0, -61, 92, 0, -323, 861, -1361, 188 1317, 0, -3885, 19741, 19741, -3885, 0, 1317, 189 -1361, 861, -323, 0, 92, -61, 0, 0, 190 191 0, 0, -28, 0, 156, -457, 763, -752, 192 0, 1873, -5289, 13068, 24506, 0, -2427, 2389, 193 -1522, 598, 0, -213, 180, -79, 0, 0, 194 }; 195 196 arm_resample_x64k_12k8( 197 4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n); 198 } 199 #endif /* resample_16k_12k8 */ 200 201 /** 202 * Resample from 32 Khz to 12.8 KHz 203 */ 204 #ifndef resample_32k_12k8 205 #define resample_32k_12k8 arm_resample_32k_12k8 206 static void arm_resample_32k_12k8( 207 struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) 208 { 209 static const int16_t alignas(int32_t) h[2*2*48] = { 210 211 0, -30, -31, 46, 107, 0, -199, -162, 212 209, 430, 0, -681, -526, 658, 1343, 0, 213 -2264, -1943, 2999, 9871, 13116, 9871, 2999, -1943, 214 -2264, 0, 1343, 658, -526, -681, 0, 430, 215 209, -162, -199, 0, 107, 46, -31, -30, 216 0, 0, 0, 0, 0, 0, 0, 0, 217 218 0, -14, -39, 0, 90, 78, -106, -229, 219 0, 382, 299, -376, -761, 0, 1194, 937, 220 -1214, -2644, 0, 6534, 12253, 12253, 6534, 0, 221 -2644, -1214, 937, 1194, 0, -761, -376, 299, 222 382, 0, -229, -106, 78, 90, 0, -39, 223 -14, 0, 0, 0, 0, 0, 0, 0, 224 225 0, 0, -30, -31, 46, 107, 0, -199, 226 -162, 209, 430, 0, -681, -526, 658, 1343, 227 0, -2264, -1943, 2999, 9871, 13116, 9871, 2999, 228 -1943, -2264, 0, 1343, 658, -526, -681, 0, 229 430, 209, -162, -199, 0, 107, 46, -31, 230 -30, 0, 0, 0, 0, 0, 0, 0, 231 232 0, 0, -14, -39, 0, 90, 78, -106, 233 -229, 0, 382, 299, -376, -761, 0, 1194, 234 937, -1214, -2644, 0, 6534, 12253, 12253, 6534, 235 0, -2644, -1214, 937, 1194, 0, -761, -376, 236 299, 382, 0, -229, -106, 78, 90, 0, 237 -39, -14, 0, 0, 0, 0, 0, 0, 238 }; 239 240 arm_resample_x64k_12k8( 241 2, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n); 242 } 243 #endif /* resample_32k_12k8 */ 244 245 /** 246 * Resample from 24 Khz to 12.8 KHz 247 */ 248 #ifndef resample_24k_12k8 249 #define resample_24k_12k8 arm_resample_24k_12k8 250 static void arm_resample_24k_12k8( 251 struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) 252 { 253 static const int16_t alignas(int32_t) h[2*8*32] = { 254 255 0, -50, 19, 143, -93, -290, 278, 485, 256 -658, -701, 1396, 901, -3019, -1042, 10276, 17488, 257 10276, -1042, -3019, 901, 1396, -701, -658, 485, 258 278, -290, -93, 143, 19, -50, 0, 0, 259 260 0, -46, 0, 141, -45, -305, 185, 543, 261 -501, -854, 1153, 1249, -2619, -1908, 8712, 17358, 262 11772, 0, -3319, 480, 1593, -504, -796, 399, 263 367, -261, -142, 138, 40, -52, -5, 0, 264 265 0, -41, -17, 133, 0, -304, 91, 574, 266 -334, -959, 878, 1516, -2143, -2590, 7118, 16971, 267 13161, 1202, -3495, 0, 1731, -267, -908, 287, 268 445, -215, -188, 125, 62, -52, -12, 0, 269 270 0, -34, -30, 120, 41, -291, 0, 577, 271 -164, -1015, 585, 1697, -1618, -3084, 5534, 16337, 272 14406, 2544, -3526, -523, 1800, 0, -985, 152, 273 509, -156, -230, 104, 83, -48, -19, 0, 274 275 0, -26, -41, 103, 76, -265, -83, 554, 276 0, -1023, 288, 1791, -1070, -3393, 3998, 15474, 277 15474, 3998, -3393, -1070, 1791, 288, -1023, 0, 278 554, -83, -265, 76, 103, -41, -26, 0, 279 280 0, -19, -48, 83, 104, -230, -156, 509, 281 152, -985, 0, 1800, -523, -3526, 2544, 14406, 282 16337, 5534, -3084, -1618, 1697, 585, -1015, -164, 283 577, 0, -291, 41, 120, -30, -34, 0, 284 285 0, -12, -52, 62, 125, -188, -215, 445, 286 287, -908, -267, 1731, 0, -3495, 1202, 13161, 287 16971, 7118, -2590, -2143, 1516, 878, -959, -334, 288 574, 91, -304, 0, 133, -17, -41, 0, 289 290 0, -5, -52, 40, 138, -142, -261, 367, 291 399, -796, -504, 1593, 480, -3319, 0, 11772, 292 17358, 8712, -1908, -2619, 1249, 1153, -854, -501, 293 543, 185, -305, -45, 141, 0, -46, 0, 294 295 0, 0, -50, 19, 143, -93, -290, 278, 296 485, -658, -701, 1396, 901, -3019, -1042, 10276, 297 17488, 10276, -1042, -3019, 901, 1396, -701, -658, 298 485, 278, -290, -93, 143, 19, -50, 0, 299 300 0, 0, -46, 0, 141, -45, -305, 185, 301 543, -501, -854, 1153, 1249, -2619, -1908, 8712, 302 17358, 11772, 0, -3319, 480, 1593, -504, -796, 303 399, 367, -261, -142, 138, 40, -52, -5, 304 305 0, 0, -41, -17, 133, 0, -304, 91, 306 574, -334, -959, 878, 1516, -2143, -2590, 7118, 307 16971, 13161, 1202, -3495, 0, 1731, -267, -908, 308 287, 445, -215, -188, 125, 62, -52, -12, 309 310 0, 0, -34, -30, 120, 41, -291, 0, 311 577, -164, -1015, 585, 1697, -1618, -3084, 5534, 312 16337, 14406, 2544, -3526, -523, 1800, 0, -985, 313 152, 509, -156, -230, 104, 83, -48, -19, 314 315 0, 0, -26, -41, 103, 76, -265, -83, 316 554, 0, -1023, 288, 1791, -1070, -3393, 3998, 317 15474, 15474, 3998, -3393, -1070, 1791, 288, -1023, 318 0, 554, -83, -265, 76, 103, -41, -26, 319 320 0, 0, -19, -48, 83, 104, -230, -156, 321 509, 152, -985, 0, 1800, -523, -3526, 2544, 322 14406, 16337, 5534, -3084, -1618, 1697, 585, -1015, 323 -164, 577, 0, -291, 41, 120, -30, -34, 324 325 0, 0, -12, -52, 62, 125, -188, -215, 326 445, 287, -908, -267, 1731, 0, -3495, 1202, 327 13161, 16971, 7118, -2590, -2143, 1516, 878, -959, 328 -334, 574, 91, -304, 0, 133, -17, -41, 329 330 0, 0, -5, -52, 40, 138, -142, -261, 331 367, 399, -796, -504, 1593, 480, -3319, 0, 332 11772, 17358, 8712, -1908, -2619, 1249, 1153, -854, 333 -501, 543, 185, -305, -45, 141, 0, -46, 334 }; 335 336 arm_resample_x192k_12k8( 337 8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n); 338 } 339 #endif /* resample_24k_12k8 */ 340 341 /** 342 * Resample from 48 Khz to 12.8 KHz 343 */ 344 #ifndef resample_48k_12k8 345 #define resample_48k_12k8 arm_resample_48k_12k8 346 static void arm_resample_48k_12k8( 347 struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) 348 { 349 static const int16_t alignas(int32_t) h[2*4*64] = { 350 351 0, -13, -25, -20, 10, 51, 71, 38, 352 -47, -133, -145, -42, 139, 277, 242, 0, 353 -329, -511, -351, 144, 698, 895, 450, -535, 354 -1510, -1697, -521, 1999, 5138, 7737, 8744, 7737, 355 5138, 1999, -521, -1697, -1510, -535, 450, 895, 356 698, 144, -351, -511, -329, 0, 242, 277, 357 139, -42, -145, -133, -47, 38, 71, 51, 358 10, -20, -25, -13, 0, 0, 0, 0, 359 360 0, -9, -23, -24, 0, 41, 71, 52, 361 -23, -115, -152, -78, 92, 254, 272, 76, 362 -251, -493, -427, 0, 576, 900, 624, -262, 363 -1309, -1763, -954, 1272, 4356, 7203, 8679, 8169, 364 5886, 2767, 0, -1542, -1660, -809, 240, 848, 365 796, 292, -252, -507, -398, -82, 199, 288, 366 183, 0, -130, -145, -71, 20, 69, 60, 367 20, -15, -26, -17, -3, 0, 0, 0, 368 369 0, -6, -20, -26, -8, 31, 67, 62, 370 0, -94, -152, -108, 45, 223, 287, 143, 371 -167, -454, -480, -134, 439, 866, 758, 0, 372 -1071, -1748, -1295, 601, 3559, 6580, 8485, 8485, 373 6580, 3559, 601, -1295, -1748, -1071, 0, 758, 374 866, 439, -134, -480, -454, -167, 143, 287, 375 223, 45, -108, -152, -94, 0, 62, 67, 376 31, -8, -26, -20, -6, 0, 0, 0, 377 378 0, -3, -17, -26, -15, 20, 60, 69, 379 20, -71, -145, -130, 0, 183, 288, 199, 380 -82, -398, -507, -252, 292, 796, 848, 240, 381 -809, -1660, -1542, 0, 2767, 5886, 8169, 8679, 382 7203, 4356, 1272, -954, -1763, -1309, -262, 624, 383 900, 576, 0, -427, -493, -251, 76, 272, 384 254, 92, -78, -152, -115, -23, 52, 71, 385 41, 0, -24, -23, -9, 0, 0, 0, 386 387 0, 0, -13, -25, -20, 10, 51, 71, 388 38, -47, -133, -145, -42, 139, 277, 242, 389 0, -329, -511, -351, 144, 698, 895, 450, 390 -535, -1510, -1697, -521, 1999, 5138, 7737, 8744, 391 7737, 5138, 1999, -521, -1697, -1510, -535, 450, 392 895, 698, 144, -351, -511, -329, 0, 242, 393 277, 139, -42, -145, -133, -47, 38, 71, 394 51, 10, -20, -25, -13, 0, 0, 0, 395 396 0, 0, -9, -23, -24, 0, 41, 71, 397 52, -23, -115, -152, -78, 92, 254, 272, 398 76, -251, -493, -427, 0, 576, 900, 624, 399 -262, -1309, -1763, -954, 1272, 4356, 7203, 8679, 400 8169, 5886, 2767, 0, -1542, -1660, -809, 240, 401 848, 796, 292, -252, -507, -398, -82, 199, 402 288, 183, 0, -130, -145, -71, 20, 69, 403 60, 20, -15, -26, -17, -3, 0, 0, 404 405 0, 0, -6, -20, -26, -8, 31, 67, 406 62, 0, -94, -152, -108, 45, 223, 287, 407 143, -167, -454, -480, -134, 439, 866, 758, 408 0, -1071, -1748, -1295, 601, 3559, 6580, 8485, 409 8485, 6580, 3559, 601, -1295, -1748, -1071, 0, 410 758, 866, 439, -134, -480, -454, -167, 143, 411 287, 223, 45, -108, -152, -94, 0, 62, 412 67, 31, -8, -26, -20, -6, 0, 0, 413 414 0, 0, -3, -17, -26, -15, 20, 60, 415 69, 20, -71, -145, -130, 0, 183, 288, 416 199, -82, -398, -507, -252, 292, 796, 848, 417 240, -809, -1660, -1542, 0, 2767, 5886, 8169, 418 8679, 7203, 4356, 1272, -954, -1763, -1309, -262, 419 624, 900, 576, 0, -427, -493, -251, 76, 420 272, 254, 92, -78, -152, -115, -23, 52, 421 71, 41, 0, -24, -23, -9, 0, 0, 422 }; 423 424 arm_resample_x192k_12k8( 425 4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n); 426 } 427 #endif /* resample_48k_12k8 */ 428 429 /** 430 * Return vector of correlations 431 */ 432 #ifndef correlate 433 #define correlate arm_correlate 434 static void arm_correlate( 435 const int16_t *a, const int16_t *b, int n, float *y, int nc) 436 { 437 /* --- Check alignment of `b` --- */ 438 439 if ((uintptr_t)b & 3) 440 *(y++) = dot(a, b--, n), nc--; 441 442 /* --- Processing by pair --- */ 443 444 for ( ; nc >= 2; nc -= 2) { 445 const int16x2_t *an = (const int16x2_t *)(a ); 446 const int16x2_t *bn = (const int16x2_t *)(b--); 447 448 int16x2_t ax, b0, b1; 449 int64_t v0 = 0, v1 = 0; 450 451 b1 = (int16x2_t)*(b--) << 16; 452 453 for (int i = 0; i < (n >> 4); i++ ) 454 for (int j = 0; j < 4; j++) { 455 456 ax = *(an++), b0 = *(bn++); 457 v0 = __smlald (ax, b0, v0); 458 v1 = __smlaldx(ax, __pkhbt(b0, b1), v1); 459 460 ax = *(an++), b1 = *(bn++); 461 v0 = __smlald (ax, b1, v0); 462 v1 = __smlaldx(ax, __pkhbt(b1, b0), v1); 463 } 464 465 *(y++) = (float)((int32_t)((v0 + (1 << 5)) >> 6)); 466 *(y++) = (float)((int32_t)((v1 + (1 << 5)) >> 6)); 467 } 468 469 /* --- Odd element count --- */ 470 471 if (nc > 0) 472 *(y++) = dot(a, b, n); 473 } 474 #endif /* correlate */ 475 476 #endif /* __ARM_FEATURE_SIMD32 */ 477