1 /**
2 * Copyright (C) 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "Quantiser.h"
18
BsearchLL(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)19 XBT_INLINE_ int32_t BsearchLL(const int32_t absDiffSignalShifted, const int32_t delta,
20 const int32_t* dqbitTablePrt) {
21 int32_t qCode = 0;
22 reg64_t tmp_acc;
23 int32_t tmp = 0;
24 int32_t lc_delta = delta << 8;
25
26 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[128];
27 tmp_acc.s32.h -= absDiffSignalShifted;
28 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
29 if (tmp <= 0) {
30 qCode = 128;
31 }
32
33 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 64];
34 tmp_acc.s32.h -= absDiffSignalShifted;
35 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
36 if (tmp <= 0) {
37 qCode += 64;
38 }
39
40 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 32];
41 tmp_acc.s32.h -= absDiffSignalShifted;
42 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
43 if (tmp <= 0) {
44 qCode += 32;
45 }
46
47 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 16];
48 tmp_acc.s32.h -= absDiffSignalShifted;
49 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
50 if (tmp <= 0) {
51 qCode += 16;
52 }
53 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 8];
54 tmp_acc.s32.h -= absDiffSignalShifted;
55 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
56 if (tmp <= 0) {
57 qCode += 8;
58 }
59
60 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 4];
61 tmp_acc.s32.h -= absDiffSignalShifted;
62 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
63 if (tmp <= 0) {
64 qCode += 4;
65 }
66
67 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
68 tmp_acc.s32.h -= absDiffSignalShifted;
69 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
70 if (tmp <= 0) {
71 qCode += 2;
72 }
73
74 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
75 tmp_acc.s32.h -= absDiffSignalShifted;
76 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
77 if (tmp <= 0) {
78 qCode++;
79 }
80
81 return qCode;
82 }
83
BsearchHL(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)84 XBT_INLINE_ int32_t BsearchHL(const int32_t absDiffSignalShifted, const int32_t delta,
85 const int32_t* dqbitTablePrt) {
86 int32_t qCode = 0;
87 reg64_t tmp_acc;
88 int32_t tmp = 0;
89 int32_t lc_delta = delta << 8;
90
91 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[4];
92 tmp_acc.s32.h -= absDiffSignalShifted;
93 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
94 if (tmp <= 0) {
95 qCode = 4;
96 }
97
98 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
99 tmp_acc.s32.h -= absDiffSignalShifted;
100 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
101 if (tmp <= 0) {
102 qCode += 2;
103 }
104
105 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
106 tmp_acc.s32.h -= absDiffSignalShifted;
107 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
108 if (tmp <= 0) {
109 qCode++;
110 }
111
112 return qCode;
113 }
114
BsearchHH(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)115 XBT_INLINE_ int32_t BsearchHH(const int32_t absDiffSignalShifted, const int32_t delta,
116 const int32_t* dqbitTablePrt) {
117 int32_t qCode = 0;
118 reg64_t tmp_acc;
119 int32_t tmp = 0;
120 int32_t lc_delta = delta << 8;
121
122 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[8];
123 tmp_acc.s32.h -= absDiffSignalShifted;
124 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
125 if (tmp <= 0) {
126 qCode = 8;
127 }
128
129 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 4];
130 tmp_acc.s32.h -= absDiffSignalShifted;
131 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
132 if (tmp <= 0) {
133 qCode += 4;
134 }
135
136 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
137 tmp_acc.s32.h -= absDiffSignalShifted;
138 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
139 if (tmp <= 0) {
140 qCode += 2;
141 }
142
143 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
144 tmp_acc.s32.h -= absDiffSignalShifted;
145 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
146 if (tmp <= 0) {
147 qCode++;
148 }
149
150 return qCode;
151 }
152
quantiseDifference_HDHL(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)153 void quantiseDifference_HDHL(const int32_t diffSignal, const int32_t ditherVal, const int32_t delta,
154 Quantiser_data* qdata_pt) {
155 int32_t absDiffSignal = 0;
156 int32_t absDiffSignalShifted = 0;
157 int32_t index = 0;
158 int32_t dithSquared = 0;
159 int32_t minusLambdaD = 0;
160 int32_t acc = 0;
161 int32_t threshDiff = 0;
162 reg64_t tmp_acc;
163 reg64_t tmp_reg64;
164 int32_t tmp_accL = 0;
165 int32_t tmp_qCode = 0;
166 int32_t tmp_altQcode = 0;
167 uint32_t tmp_round0 = 0;
168 int32_t _delta = 0;
169
170 /* Form the absolute value of the difference signal and maintain a version
171 * that is right-shifted 4 places for delta scaling. */
172 absDiffSignal = -diffSignal;
173 if (diffSignal >= 0) {
174 absDiffSignal = diffSignal;
175 }
176 absDiffSignal = ssat24(absDiffSignal);
177 absDiffSignalShifted = absDiffSignal >> deltaScale;
178
179 /* Binary search for the quantised code. This search terminates with the
180 * table index of the LARGEST threshold table value for which
181 * absDiffSignalShifted >= (delta * threshold)
182 */
183 index = BsearchHL(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
184
185 /* We actually wanted the SMALLEST magnitude quantised code for which
186 * absDiffSignalShifted < (delta * threshold)
187 * i.e. the code with the next highest magnitude than the one we actually
188 * found. We could add +1 to the code magnitude to do this, but we need to
189 * subtract 1 from the code magnitude to compensate for the "phantom
190 * element" at the base of the quantisation table. These two effects cancel
191 * out, so we leave the value of code alone. However, we need to form code+1
192 * to get the proper index into the both the threshold and dither tables,
193 * since we must skip over the phantom element at the base. */
194 qdata_pt->qCode = index;
195
196 /* Square the dither and get the value back from the ALU
197 * (saturated/rounded). */
198 tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
199
200 acc = tmp_acc.s32.h;
201 tmp_round0 = (uint32_t)acc << 8;
202
203 acc = (acc >> 6) + 1;
204 acc >>= 1;
205 if (tmp_round0 == 0x40000000L) {
206 acc--;
207 }
208 acc = ssat24(acc);
209
210 dithSquared = acc;
211
212 /* Form the negative difference of the dither values at index and index-1.
213 * Load the accumulator with this value divided by 2. Ensure saturation is
214 * applied to the difference calculation. */
215 minusLambdaD = qdata_pt->minusLambdaDTable[index];
216
217 tmp_accL = (1 << 23) - dithSquared;
218 tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
219
220 tmp_round0 = tmp_acc.s32.l << 8;
221
222 acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
223 acc++;
224 acc >>= 1;
225 if (tmp_round0 == 0x40000000L) {
226 acc--;
227 }
228
229 /* Add the threshold table values at index and index-1 to the accumulated
230 * value. */
231 acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
232 //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
233 acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
234 //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
235
236 // saturation required
237 acc = ssat24(acc);
238
239 /* Form the threshold table difference at index and index-1. Ensure
240 * saturation is applied to the difference calculation. */
241 threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] - qdata_pt->thresholdTablePtr_sl1[index];
242
243 /* Based on the sign of the difference signal, either add or subtract the
244 * threshold table difference from the accumulated value. Recover the final
245 * accumulated value (saturated/rounded) */
246 if (diffSignal < 0) {
247 threshDiff = -threshDiff;
248 }
249 tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
250
251 tmp_reg64.s32.h += acc;
252 acc = tmp_reg64.s32.h;
253
254 if (tmp_reg64.u32.l >= 0x80000000) {
255 acc++;
256 }
257 tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
258
259 acc = ssat24(acc);
260
261 if (tmp_round0 == 0x40000000L) {
262 acc--;
263 }
264 _delta = -delta << 8;
265
266 acc = (int32_t)((uint32_t)acc << 4);
267
268 /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
269 * signal used to determine if dithering alters the quantised code value or
270 * not. */
271 // worst case value for delta is 0x7d400
272 tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
273 tmp_reg64.s32.h += absDiffSignal;
274 tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
275 acc = tmp_reg64.s32.h + (1 << 2);
276 acc >>= 3;
277 if (tmp_round0 == 0x40000000L) {
278 acc--;
279 }
280
281 tmp_qCode = qdata_pt->qCode;
282 tmp_altQcode = tmp_qCode - 1;
283 /* Check the sign of the distance penalty. Get the sign from the
284 * full-precision accumulator, as done in the Kalimba code. */
285 if (tmp_reg64.s32.h < 0) {
286 /* The distance is -ve. The optimum code needs decremented by 1 and the
287 * alternative code is 1 greater than this. Get the rounded version of the
288 * -ve distance penalty and negate this (form distance magnitude) before
289 * writing the value out */
290 tmp_qCode = tmp_altQcode;
291 tmp_altQcode++;
292 acc = -acc;
293 }
294
295 qdata_pt->distPenalty = acc;
296 /* If the difference signal is negative, bitwise invert the code (restores
297 * sign to the magnitude). */
298 if (diffSignal < 0) {
299 tmp_qCode = ~tmp_qCode;
300 tmp_altQcode = ~tmp_altQcode;
301 }
302 qdata_pt->altQcode = tmp_altQcode;
303 qdata_pt->qCode = tmp_qCode;
304 }
305
quantiseDifference_HDHH(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)306 void quantiseDifference_HDHH(const int32_t diffSignal, const int32_t ditherVal, const int32_t delta,
307 Quantiser_data* qdata_pt) {
308 int32_t absDiffSignal;
309 int32_t absDiffSignalShifted;
310 int32_t index;
311 int32_t dithSquared;
312 int32_t minusLambdaD;
313 int32_t acc;
314 int32_t threshDiff;
315 reg64_t tmp_acc;
316 reg64_t tmp_reg64;
317 int32_t tmp_accL;
318 int32_t tmp_qCode;
319 int32_t tmp_altQcode;
320 uint32_t tmp_round0;
321 int32_t _delta;
322
323 /* Form the absolute value of the difference signal and maintain a version
324 * that is right-shifted 4 places for delta scaling. */
325 absDiffSignal = -diffSignal;
326 if (diffSignal >= 0) {
327 absDiffSignal = diffSignal;
328 }
329 absDiffSignal = ssat24(absDiffSignal);
330 absDiffSignalShifted = absDiffSignal >> deltaScale;
331
332 /* Binary search for the quantised code. This search terminates with the
333 * table index of the LARGEST threshold table value for which
334 * absDiffSignalShifted >= (delta * threshold)
335 */
336 index = BsearchHH(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
337
338 /* We actually wanted the SMALLEST magnitude quantised code for which
339 * absDiffSignalShifted < (delta * threshold)
340 * i.e. the code with the next highest magnitude than the one we actually
341 * found. We could add +1 to the code magnitude to do this, but we need to
342 * subtract 1 from the code magnitude to compensate for the "phantom
343 * element" at the base of the quantisation table. These two effects cancel
344 * out, so we leave the value of code alone. However, we need to form code+1
345 * to get the proper index into the both the threshold and dither tables,
346 * since we must skip over the phantom element at the base. */
347 qdata_pt->qCode = index;
348
349 /* Square the dither and get the value back from the ALU
350 * (saturated/rounded). */
351 tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
352
353 acc = tmp_acc.s32.h;
354 tmp_round0 = (uint32_t)acc << 8;
355
356 acc = (acc >> 6) + 1;
357 acc >>= 1;
358 if (tmp_round0 == 0x40000000L) {
359 acc--;
360 }
361
362 acc = ssat24(acc);
363
364 dithSquared = acc;
365
366 /* Form the negative difference of the dither values at index and index-1.
367 * Load the accumulator with this value divided by 2. Ensure saturation is
368 * applied to the difference calculation. */
369 minusLambdaD = qdata_pt->minusLambdaDTable[index];
370
371 tmp_accL = (1 << 23) - dithSquared;
372 tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
373
374 tmp_round0 = tmp_acc.s32.l << 8;
375
376 acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
377 acc++;
378 acc >>= 1;
379 if (tmp_round0 == 0x40000000L) {
380 acc--;
381 }
382
383 /* Add the threshold table values at index and index-1 to the accumulated
384 * value. */
385 acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
386 //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
387 acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
388 //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
389
390 // saturation required
391 acc = ssat24(acc);
392
393 /* Form the threshold table difference at index and index-1. Ensure
394 * saturation is applied to the difference calculation. */
395 threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] - qdata_pt->thresholdTablePtr_sl1[index];
396
397 /* Based on the sign of the difference signal, either add or subtract the
398 * threshold table difference from the accumulated value. Recover the final
399 * accumulated value (saturated/rounded) */
400 if (diffSignal < 0) {
401 threshDiff = -threshDiff;
402 }
403 tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
404 tmp_reg64.s32.h += acc;
405 acc = tmp_reg64.s32.h;
406
407 if (tmp_reg64.u32.l >= 0x80000000) {
408 acc++;
409 }
410 tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
411
412 acc = ssat24(acc);
413
414 if (tmp_round0 == 0x40000000L) {
415 acc--;
416 }
417 _delta = -delta << 8;
418
419 acc = (int32_t)((uint32_t)acc << 4);
420
421 /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
422 * signal used to determine if dithering alters the quantised code value or
423 * not. */
424 // worst case value for delta is 0x7d400
425 tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
426 tmp_reg64.s32.h += absDiffSignal;
427 tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
428 acc = tmp_reg64.s32.h + (1 << 2);
429 acc >>= 3;
430 if (tmp_round0 == 0x40000000L) {
431 acc--;
432 }
433
434 tmp_qCode = qdata_pt->qCode;
435 tmp_altQcode = tmp_qCode - 1;
436 /* Check the sign of the distance penalty. Get the sign from the
437 * full-precision accumulator, as done in the Kalimba code. */
438 if (tmp_reg64.s32.h < 0) {
439 /* The distance is -ve. The optimum code needs decremented by 1 and the
440 * alternative code is 1 greater than this. Get the rounded version of the
441 * -ve distance penalty and negate this (form distance magnitude) before
442 * writing the value out */
443 tmp_qCode = tmp_altQcode;
444 tmp_altQcode++;
445 acc = -acc;
446 }
447
448 qdata_pt->distPenalty = acc;
449 /* If the difference signal is negative, bitwise invert the code (restores
450 * sign to the magnitude). */
451 if (diffSignal < 0) {
452 tmp_qCode = ~tmp_qCode;
453 tmp_altQcode = ~tmp_altQcode;
454 }
455 qdata_pt->altQcode = tmp_altQcode;
456 qdata_pt->qCode = tmp_qCode;
457 }
458
quantiseDifference_HDLL(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)459 void quantiseDifference_HDLL(const int32_t diffSignal, const int32_t ditherVal, const int32_t delta,
460 Quantiser_data* qdata_pt) {
461 int32_t absDiffSignal;
462 int32_t absDiffSignalShifted;
463 int32_t index;
464 int32_t dithSquared;
465 int32_t minusLambdaD;
466 int32_t acc;
467 int32_t threshDiff;
468 reg64_t tmp_acc;
469 reg64_t tmp_reg64;
470 int32_t tmp_accL;
471 int32_t tmp_qCode;
472 int32_t tmp_altQcode;
473 uint32_t tmp_round0;
474 int32_t _delta;
475
476 /* Form the absolute value of the difference signal and maintain a version
477 * that is right-shifted 4 places for delta scaling. */
478 absDiffSignal = -diffSignal;
479 if (diffSignal >= 0) {
480 absDiffSignal = diffSignal;
481 }
482 absDiffSignal = ssat24(absDiffSignal);
483 absDiffSignalShifted = absDiffSignal >> deltaScale;
484
485 /* Binary search for the quantised code. This search terminates with the
486 * table index of the LARGEST threshold table value for which
487 * absDiffSignalShifted >= (delta * threshold)
488 */
489 index = BsearchLL(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
490
491 /* We actually wanted the SMALLEST magnitude quantised code for which
492 * absDiffSignalShifted < (delta * threshold)
493 * i.e. the code with the next highest magnitude than the one we actually
494 * found. We could add +1 to the code magnitude to do this, but we need to
495 * subtract 1 from the code magnitude to compensate for the "phantom
496 * element" at the base of the quantisation table. These two effects cancel
497 * out, so we leave the value of code alone. However, we need to form code+1
498 * to get the proper index into the both the threshold and dither tables,
499 * since we must skip over the phantom element at the base. */
500 qdata_pt->qCode = index;
501
502 /* Square the dither and get the value back from the ALU
503 * (saturated/rounded). */
504
505 tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
506
507 acc = tmp_acc.s32.h;
508 tmp_round0 = (uint32_t)acc << 8;
509
510 acc = (acc >> 6) + 1;
511 acc >>= 1;
512 if (tmp_round0 == 0x40000000L) {
513 acc--;
514 }
515
516 acc = ssat24(acc);
517
518 dithSquared = acc;
519
520 /* Form the negative difference of the dither values at index and index-1.
521 * Load the accumulator with this value divided by 2. Ensure saturation is
522 * applied to the difference calculation. */
523 minusLambdaD = qdata_pt->minusLambdaDTable[index];
524
525 tmp_accL = (1 << 23) - dithSquared;
526 tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
527
528 tmp_round0 = tmp_acc.s32.l << 8;
529
530 acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
531 acc++;
532 acc >>= 1;
533 if (tmp_round0 == 0x40000000L) {
534 acc--;
535 }
536
537 /* Add the threshold table values at index and index-1 to the accumulated
538 * value. */
539
540 acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
541 //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
542 acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
543 //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
544 // saturation required
545 acc = ssat24(acc);
546
547 /* Form the threshold table difference at index and index-1. Ensure
548 * saturation is applied to the difference calculation. */
549 threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] - qdata_pt->thresholdTablePtr_sl1[index];
550
551 /* Based on the sign of the difference signal, either add or subtract the
552 * threshold table difference from the accumulated value. Recover the final
553 * accumulated value (saturated/rounded) */
554
555 if (diffSignal < 0) {
556 threshDiff = -threshDiff;
557 }
558 tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
559 tmp_reg64.s32.h += acc;
560 acc = tmp_reg64.s32.h;
561
562 if (tmp_reg64.u32.l >= 0x80000000) {
563 acc++;
564 }
565 tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
566
567 acc = ssat24(acc);
568
569 if (tmp_round0 == 0x40000000L) {
570 acc--;
571 }
572 _delta = -delta << 8;
573
574 acc = (int32_t)((uint32_t)acc << 4);
575
576 /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
577 * signal used to determine if dithering alters the quantised code value or
578 * not. */
579 // worst case value for delta is 0x7d400
580
581 tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
582 tmp_reg64.s32.h += absDiffSignal;
583 tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
584 acc = tmp_reg64.s32.h + (1 << 2);
585 acc >>= 3;
586 if (tmp_round0 == 0x40000000L) {
587 acc--;
588 }
589
590 tmp_qCode = qdata_pt->qCode;
591 tmp_altQcode = tmp_qCode - 1;
592 /* Check the sign of the distance penalty. Get the sign from the
593 * full-precision accumulator, as done in the Kalimba code. */
594
595 if (tmp_reg64.s32.h < 0) {
596 /* The distance is -ve. The optimum code needs decremented by 1 and the
597 * alternative code is 1 greater than this. Get the rounded version of the
598 * -ve distance penalty and negate this (form distance magnitude) before
599 * writing the value out */
600 tmp_qCode = tmp_altQcode;
601 tmp_altQcode++;
602 acc = -acc;
603 }
604
605 qdata_pt->distPenalty = acc;
606 /* If the difference signal is negative, bitwise invert the code (restores
607 * sign to the magnitude). */
608 if (diffSignal < 0) {
609 tmp_qCode = ~tmp_qCode;
610 tmp_altQcode = ~tmp_altQcode;
611 }
612 qdata_pt->altQcode = tmp_altQcode;
613 qdata_pt->qCode = tmp_qCode;
614 }
615
BsearchLH(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)616 static int32_t BsearchLH(const int32_t absDiffSignalShifted, const int32_t delta,
617 const int32_t* dqbitTablePrt) {
618 int32_t qCode;
619 reg64_t tmp_acc;
620 int32_t tmp;
621 int32_t lc_delta = delta << 8;
622
623 qCode = 0;
624
625 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[16];
626 tmp_acc.s32.h -= absDiffSignalShifted;
627 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
628 if (tmp <= 0) {
629 qCode = 16;
630 }
631
632 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 8];
633 tmp_acc.s32.h -= absDiffSignalShifted;
634 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
635 if (tmp <= 0) {
636 qCode += 8;
637 }
638
639 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 4];
640 tmp_acc.s32.h -= absDiffSignalShifted;
641 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
642 if (tmp <= 0) {
643 qCode += 4;
644 }
645
646 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
647 tmp_acc.s32.h -= absDiffSignalShifted;
648 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
649 if (tmp <= 0) {
650 qCode += 2;
651 }
652
653 tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
654 tmp_acc.s32.h -= absDiffSignalShifted;
655 tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
656 if (tmp <= 0) {
657 qCode++;
658 }
659
660 return qCode;
661 }
662
quantiseDifference_HDLH(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)663 void quantiseDifference_HDLH(const int32_t diffSignal, const int32_t ditherVal, const int32_t delta,
664 Quantiser_data* qdata_pt) {
665 int32_t absDiffSignal = 0;
666 int32_t absDiffSignalShifted = 0;
667 int32_t index = 0;
668 int32_t dithSquared = 0;
669 int32_t minusLambdaD = 0;
670 int32_t acc = 0;
671 int32_t threshDiff = 0;
672 reg64_t tmp_acc;
673 reg64_t tmp_reg64;
674 int32_t tmp_accL = 0;
675 int32_t tmp_qCode = 0;
676 int32_t tmp_altQcode = 0;
677
678 uint32_t tmp_round0 = 0;
679 int32_t _delta = 0;
680
681 /* Form the absolute value of the difference signal and maintain a version
682 * that is right-shifted 4 places for delta scaling. */
683 absDiffSignal = -diffSignal;
684 if (diffSignal >= 0) {
685 absDiffSignal = diffSignal;
686 }
687 absDiffSignal = ssat24(absDiffSignal);
688 absDiffSignalShifted = absDiffSignal >> deltaScale;
689
690 /* Binary search for the quantised code. This search terminates with the
691 * table index of the LARGEST threshold table value for which
692 * absDiffSignalShifted >= (delta * threshold)
693 */
694
695 /* first iteration */
696 index = BsearchLH(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
697
698 /* We actually wanted the SMALLEST magnitude quantised code for which
699 * absDiffSignalShifted < (delta * threshold)
700 * i.e. the code with the next highest magnitude than the one we actually
701 * found. We could add +1 to the code magnitude to do this, but we need to
702 * subtract 1 from the code magnitude to compensate for the "phantom
703 * element" at the base of the quantisation table. These two effects cancel
704 * out, so we leave the value of code alone. However, we need to form code+1
705 * to get the proper index into the both the threshold and dither tables,
706 * since we must skip over the phantom element at the base. */
707 qdata_pt->qCode = index;
708
709 /* Square the dither and get the value back from the ALU
710 * (saturated/rounded). */
711
712 tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
713
714 acc = tmp_reg64.s32.h;
715
716 tmp_round0 = (uint32_t)acc << 8;
717
718 acc = (acc >> 6) + 1;
719 acc >>= 1;
720 if (tmp_round0 == 0x40000000L) {
721 acc--;
722 }
723 acc = ssat24(acc);
724
725 dithSquared = acc;
726
727 /* Form the negative difference of the dither values at index and index-1.
728 * Load the accumulator with this value divided by 2. Ensure saturation is
729 * applied to the difference calculation. */
730
731 minusLambdaD = qdata_pt->minusLambdaDTable[index];
732
733 tmp_accL = (1 << 23) - dithSquared;
734 tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
735
736 tmp_round0 = tmp_acc.s32.l << 8;
737
738 acc = (int32_t)(tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
739 if (tmp_round0 == 0x40000000L) {
740 acc -= 2;
741 }
742 acc++;
743
744 /* Add the threshold table values at index and index-1 to the accumulated
745 * value. */
746
747 acc += qdata_pt->thresholdTablePtr_sl1[index + 1];
748 //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
749 acc += qdata_pt->thresholdTablePtr_sl1[index];
750 acc >>= 1;
751
752 // saturation required
753 acc = ssat24(acc);
754
755 /* Form the threshold table difference at index and index-1. Ensure
756 * saturation is applied to the difference calculation. */
757 threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] - qdata_pt->thresholdTablePtr_sl1[index];
758
759 /* Based on the sign of the difference signal, either add or subtract the
760 * threshold table difference from the accumulated value. Recover the final
761 * accumulated value (saturated/rounded) */
762
763 if (diffSignal < 0) {
764 threshDiff = -threshDiff;
765 }
766 tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
767
768 tmp_reg64.s32.h += acc;
769 acc = tmp_reg64.s32.h;
770
771 if (tmp_reg64.u32.l >= 0x80000000) {
772 acc++;
773 }
774 tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
775
776 acc = ssat24(acc);
777
778 if (tmp_round0 == 0x40000000L) {
779 acc--;
780 }
781 _delta = -delta << 8;
782
783 acc = (int32_t)((uint32_t)acc << 4);
784
785 /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
786 * signal used to determine if dithering alters the quantised code value or
787 * not. */
788 // worst case value for delta is 0x7d400
789
790 tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
791 tmp_reg64.s32.h += absDiffSignal;
792 tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
793 acc = tmp_reg64.s32.h + (1 << 2);
794 acc >>= 3;
795 if (tmp_round0 == 0x40000000L) {
796 acc--;
797 }
798
799 tmp_qCode = qdata_pt->qCode;
800 tmp_altQcode = tmp_qCode - 1;
801 /* Check the sign of the distance penalty. Get the sign from the
802 * full-precision accumulator, as done in the Kalimba code. */
803
804 if (tmp_reg64.s32.h < 0) {
805 /* The distance is -ve. The optimum code needs decremented by 1 and the
806 * alternative code is 1 greater than this. Get the rounded version of the
807 * -ve distance penalty and negate this (form distance magnitude) before
808 * writing the value out */
809 tmp_qCode = tmp_altQcode;
810 tmp_altQcode++;
811 acc = -acc;
812 }
813
814 qdata_pt->distPenalty = acc;
815 /* If the difference signal is negative, bitwise invert the code (restores
816 * sign to the magnitude). */
817 if (diffSignal < 0) {
818 tmp_qCode = ~tmp_qCode;
819 tmp_altQcode = ~tmp_altQcode;
820 }
821 qdata_pt->altQcode = tmp_altQcode;
822 qdata_pt->qCode = tmp_qCode;
823 }
824