1 /**
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "AptxParameters.h"
17 #include "AptxTables.h"
18 #include "Quantiser.h"
19 
BsearchLL(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)20 XBT_INLINE_ int32_t BsearchLL(const int32_t absDiffSignalShifted, const int32_t delta,
21                               const int32_t* dqbitTablePrt) {
22   int32_t qCode;
23   reg64_t tmp_acc;
24   int32_t tmp;
25   int32_t lc_delta = delta << 8;
26 
27   qCode = 0;
28 
29   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[32];
30   tmp_acc.s32.h -= absDiffSignalShifted;
31   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
32   if (tmp <= 0) {
33     qCode = 32;
34   }
35 
36   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 16];
37   tmp_acc.s32.h -= absDiffSignalShifted;
38   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
39   if (tmp <= 0) {
40     qCode += 16;
41   }
42   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 8];
43   tmp_acc.s32.h -= absDiffSignalShifted;
44   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
45   if (tmp <= 0) {
46     qCode += 8;
47   }
48 
49   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 4];
50   tmp_acc.s32.h -= absDiffSignalShifted;
51   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
52   if (tmp <= 0) {
53     qCode += 4;
54   }
55 
56   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
57   tmp_acc.s32.h -= absDiffSignalShifted;
58   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
59   if (tmp <= 0) {
60     qCode += 2;
61   }
62 
63   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
64   tmp_acc.s32.h -= absDiffSignalShifted;
65   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
66   if (tmp <= 0) {
67     qCode++;
68   }
69 
70   return qCode;
71 }
72 
BsearchHL(const int32_t absDiffSignalShifted,const int32_t delta)73 XBT_INLINE_ int32_t BsearchHL(const int32_t absDiffSignalShifted, const int32_t delta) {
74   reg64_t tmp_acc;
75   int32_t lc_delta = delta << 8;
76 
77   /* first iteration */
78   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)(97040 << 1);
79   tmp_acc.s32.h -= absDiffSignalShifted;
80   return tmp_acc.s64 <= 0;
81 }
82 
BsearchHH(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)83 XBT_INLINE_ int32_t BsearchHH(const int32_t absDiffSignalShifted, const int32_t delta,
84                               const int32_t* dqbitTablePrt) {
85   int32_t qCode;
86   reg64_t tmp_acc;
87   int32_t tmp;
88   int32_t lc_delta = delta << 8;
89   qCode = 0;
90 
91   /* first iteration */
92   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[2];
93   tmp_acc.s32.h -= absDiffSignalShifted;
94   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
95   if (tmp <= 0) {
96     qCode += 2;
97   }
98   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
99   tmp_acc.s32.h -= absDiffSignalShifted;
100   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
101   if (tmp <= 0) {
102     qCode++;
103   }
104 
105   return qCode;
106 }
107 
BsearchLH(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)108 XBT_INLINE_ int32_t BsearchLH(const int32_t absDiffSignalShifted, const int32_t delta,
109                               const int32_t* dqbitTablePrt) {
110   int32_t qCode;
111   reg64_t tmp_acc;
112   int32_t tmp;
113   int32_t lc_delta = delta << 8;
114 
115   /* first iteration */
116   qCode = 0;
117 
118   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[4];
119   tmp_acc.s32.h -= absDiffSignalShifted;
120   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
121   if (tmp <= 0) {
122     qCode = 4;
123   }
124 
125   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
126   tmp_acc.s32.h -= absDiffSignalShifted;
127   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
128   if (tmp <= 0) {
129     qCode += 2;
130   }
131   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
132   tmp_acc.s32.h -= absDiffSignalShifted;
133   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
134   if (tmp <= 0) {
135     qCode++;
136   }
137 
138   return qCode;
139 }
140 
quantiseDifferenceHL(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)141 void quantiseDifferenceHL(const int32_t diffSignal, const int32_t ditherVal, const int32_t delta,
142                           Quantiser_data* qdata_pt) {
143   int32_t absDiffSignal;
144   int32_t absDiffSignalShifted;
145   int32_t index;
146   int32_t dithSquared;
147   int32_t minusLambdaD;
148   int32_t acc;
149   int32_t threshDiff;
150   reg64_t tmp_acc;
151   reg64_t tmp_reg64;
152   int32_t tmp_accL;
153   int32_t tmp_qCode;
154   int32_t tmp_altQcode;
155   uint32_t tmp_round0;
156   int32_t _delta;
157 
158   /* Form the absolute value of the difference signal and maintain a version
159    * that is right-shifted 4 places for delta scaling. */
160   absDiffSignal = -diffSignal;
161   if (diffSignal >= 0) {
162     absDiffSignal = diffSignal;
163   }
164   absDiffSignal = ssat24(absDiffSignal);
165   absDiffSignalShifted = absDiffSignal >> deltaScale;
166   absDiffSignalShifted = ssat24(absDiffSignalShifted);
167 
168   /* Binary search for the quantised code. This search terminates with the
169    * table index of the LARGEST threshold table value for which
170    * absDiffSignalShifted >= (delta * threshold)
171    */
172   index = BsearchHL(absDiffSignalShifted, delta);
173 
174   /* We actually wanted the SMALLEST magnitude quantised code for which
175    * absDiffSignalShifted < (delta * threshold)
176    * i.e. the code with the next highest magnitude than the one we actually
177    * found. We could add +1 to the code magnitude to do this, but we need to
178    * subtract 1 from the code magnitude to compensate for the "phantom
179    * element" at the base of the quantisation table. These two effects cancel
180    * out, so we leave the value of code alone. However, we need to form code+1
181    * to get the proper index into the both the threshold and dither tables,
182    * since we must skip over the phantom element at the base. */
183   qdata_pt->qCode = index;
184 
185   /* Square the dither and get the value back from the ALU
186    * (saturated/rounded). */
187   tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
188 
189   acc = tmp_acc.s32.h;
190 
191   tmp_round0 = (uint32_t)acc << 8;
192 
193   acc = (acc >> 6) + 1;
194   acc >>= 1;
195   if (tmp_round0 == 0x40000000L) {
196     acc--;
197   }
198 
199   acc = ssat24(acc);
200 
201   dithSquared = acc;
202 
203   /* Form the negative difference of the dither values at index and index-1.
204    * Load the accumulator with this value divided by 2. Ensure saturation is
205    * applied to the difference calculation. */
206   minusLambdaD = qdata_pt->minusLambdaDTable[index];
207 
208   tmp_accL = (1 << 23) - dithSquared;
209   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
210 
211   tmp_round0 = tmp_acc.s32.l << 8;
212 
213   acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
214   acc++;
215   acc >>= 1;
216   if (tmp_round0 == 0x40000000L) {
217     acc--;
218   }
219 
220   // worst case value for acc = 0x000d3e08
221   // no saturation required
222 
223   /* Add the threshold table values at index and index-1 to the accumulated
224    * value. */
225   acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
226   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
227   acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
228   //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
229 
230   /* Form the threshold table difference at index and index-1. Ensure
231    * saturation is applied to the difference calculation. */
232   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] - qdata_pt->thresholdTablePtr_sl1[index];
233 
234   /* Based on the sign of the difference signal, either add or subtract the
235    * threshold table difference from the accumulated value. Recover the final
236    * accumulated value (saturated/rounded) */
237   if (diffSignal < 0) {
238     threshDiff = -threshDiff;
239   }
240   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
241 
242   tmp_reg64.s32.h += acc;
243   acc = tmp_reg64.s32.h;
244 
245   if (tmp_reg64.u32.l >= 0x80000000) {
246     acc++;
247   }
248   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
249 
250   acc = ssat24(acc);
251 
252   if (tmp_round0 == 0x40000000L) {
253     acc--;
254   }
255   _delta = -delta << 8;
256 
257   acc = (int32_t)((uint32_t)acc << 4);
258 
259   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
260    * signal used to determine if dithering alters the quantised code value or
261    * not. */
262   // worst case value for delta is 0x7d400
263   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
264   tmp_reg64.s32.h += absDiffSignal;
265   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
266   acc = tmp_reg64.s32.h + (1 << 2);
267   acc >>= 3;
268   if (tmp_round0 == 0x40000000L) {
269     acc--;
270   }
271 
272   tmp_qCode = qdata_pt->qCode;
273   tmp_altQcode = tmp_qCode - 1;
274   /* Check the sign of the distance penalty. Get the sign from the
275    * full-precision accumulator, as done in the Kalimba code. */
276   if (tmp_reg64.s32.h < 0) {
277     /* The distance is -ve. The optimum code needs decremented by 1 and the
278      * alternative code is 1 greater than this. Get the rounded version of the
279      * -ve distance penalty and negate this (form distance magnitude) before
280      *  writing the value out */
281     tmp_qCode = tmp_altQcode;
282     tmp_altQcode++;
283     acc = -acc;
284   }
285 
286   qdata_pt->distPenalty = acc;
287   /* If the difference signal is negative, bitwise invert the code (restores
288    * sign to the magnitude). */
289   if (diffSignal < 0) {
290     tmp_qCode = ~tmp_qCode;
291     tmp_altQcode = ~tmp_altQcode;
292   }
293   qdata_pt->altQcode = tmp_altQcode;
294   qdata_pt->qCode = tmp_qCode;
295 }
296 
quantiseDifferenceHH(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)297 void quantiseDifferenceHH(const int32_t diffSignal, const int32_t ditherVal, const int32_t delta,
298                           Quantiser_data* qdata_pt) {
299   int32_t absDiffSignal;
300   int32_t absDiffSignalShifted;
301   int32_t index;
302   int32_t dithSquared;
303   int32_t minusLambdaD;
304   int32_t acc;
305   int32_t threshDiff;
306   reg64_t tmp_acc;
307   reg64_t tmp_reg64;
308   int32_t tmp_accL;
309   int32_t tmp_qCode;
310   int32_t tmp_altQcode;
311   uint32_t tmp_round0;
312   int32_t _delta;
313 
314   /* Form the absolute value of the difference signal and maintain a version
315    * that is right-shifted 4 places for delta scaling. */
316   absDiffSignal = -diffSignal;
317   if (diffSignal >= 0) {
318     absDiffSignal = diffSignal;
319   }
320   absDiffSignal = ssat24(absDiffSignal);
321   absDiffSignalShifted = absDiffSignal >> deltaScale;
322   absDiffSignalShifted = ssat24(absDiffSignalShifted);
323 
324   /* Binary search for the quantised code. This search terminates with the
325    * table index of the LARGEST threshold table value for which
326    * absDiffSignalShifted >= (delta * threshold)
327    */
328   index = BsearchHH(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
329 
330   /* We actually wanted the SMALLEST magnitude quantised code for which
331    * absDiffSignalShifted < (delta * threshold)
332    * i.e. the code with the next highest magnitude than the one we actually
333    * found. We could add +1 to the code magnitude to do this, but we need to
334    * subtract 1 from the code magnitude to compensate for the "phantom
335    * element" at the base of the quantisation table. These two effects cancel
336    * out, so we leave the value of code alone. However, we need to form code+1
337    * to get the proper index into the both the threshold and dither tables,
338    * since we must skip over the phantom element at the base. */
339   qdata_pt->qCode = index;
340 
341   /* Square the dither and get the value back from the ALU
342    * (saturated/rounded). */
343   tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
344 
345   acc = tmp_acc.s32.h;
346 
347   tmp_round0 = (uint32_t)acc << 8;
348 
349   acc = (acc >> 6) + 1;
350   acc >>= 1;
351   if (tmp_round0 == 0x40000000L) {
352     acc--;
353   }
354 
355   acc = ssat24(acc);
356 
357   dithSquared = acc;
358 
359   /* Form the negative difference of the dither values at index and index-1.
360    * Load the accumulator with this value divided by 2. Ensure saturation is
361    * applied to the difference calculation. */
362   minusLambdaD = qdata_pt->minusLambdaDTable[index];
363 
364   tmp_accL = (1 << 23) - dithSquared;
365   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
366 
367   tmp_round0 = tmp_acc.s32.l << 8;
368 
369   acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
370   acc++;
371   acc >>= 1;
372   if (tmp_round0 == 0x40000000L) {
373     acc--;
374   }
375 
376   // worst case value for acc = 0x000d3e08
377   // no saturation required
378 
379   /* Add the threshold table values at index and index-1 to the accumulated
380    * value. */
381   acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
382   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
383   acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
384   //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
385 
386   /* Form the threshold table difference at index and index-1. Ensure
387    * saturation is applied to the difference calculation. */
388   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] - qdata_pt->thresholdTablePtr_sl1[index];
389 
390   /* Based on the sign of the difference signal, either add or subtract the
391    * threshold table difference from the accumulated value. Recover the final
392    * accumulated value (saturated/rounded) */
393   if (diffSignal < 0) {
394     threshDiff = -threshDiff;
395   }
396   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
397   tmp_reg64.s32.h += acc;
398   acc = tmp_reg64.s32.h;
399 
400   if (tmp_reg64.u32.l >= 0x80000000) {
401     acc++;
402   }
403   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
404 
405   acc = ssat24(acc);
406 
407   if (tmp_round0 == 0x40000000L) {
408     acc--;
409   }
410   _delta = -delta << 8;
411 
412   acc = (int32_t)((uint32_t)acc << 4);
413 
414   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
415    * signal used to determine if dithering alters the quantised code value or
416    * not. */
417   // worst case value for delta is 0x7d400
418   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
419   tmp_reg64.s32.h += absDiffSignal;
420   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
421   acc = tmp_reg64.s32.h + (1 << 2);
422   acc >>= 3;
423   if (tmp_round0 == 0x40000000L) {
424     acc--;
425   }
426 
427   tmp_qCode = qdata_pt->qCode;
428   tmp_altQcode = tmp_qCode - 1;
429   /* Check the sign of the distance penalty. Get the sign from the
430    * full-precision accumulator, as done in the Kalimba code. */
431   if (tmp_reg64.s32.h < 0) {
432     /* The distance is -ve. The optimum code needs decremented by 1 and the
433      * alternative code is 1 greater than this. Get the rounded version of the
434      * -ve distance penalty and negate this (form distance magnitude) before
435      *  writing the value out */
436     tmp_qCode = tmp_altQcode;
437     tmp_altQcode++;
438     acc = -acc;
439   }
440 
441   qdata_pt->distPenalty = acc;
442   /* If the difference signal is negative, bitwise invert the code (restores
443    * sign to the magnitude). */
444   if (diffSignal < 0) {
445     tmp_qCode = ~tmp_qCode;
446     tmp_altQcode = ~tmp_altQcode;
447   }
448   qdata_pt->altQcode = tmp_altQcode;
449   qdata_pt->qCode = tmp_qCode;
450 }
451 
quantiseDifferenceLL(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)452 void quantiseDifferenceLL(const int32_t diffSignal, const int32_t ditherVal, const int32_t delta,
453                           Quantiser_data* qdata_pt) {
454   int32_t absDiffSignal;
455   int32_t absDiffSignalShifted;
456   int32_t index;
457   int32_t dithSquared;
458   int32_t minusLambdaD;
459   int32_t acc;
460   int32_t threshDiff;
461   reg64_t tmp_acc;
462   reg64_t tmp_reg64;
463   int32_t tmp_accL;
464   int32_t tmp_qCode;
465   int32_t tmp_altQcode;
466   uint32_t tmp_round0;
467   int32_t _delta;
468 
469   /* Form the absolute value of the difference signal and maintain a version
470    * that is right-shifted 4 places for delta scaling. */
471   absDiffSignal = -diffSignal;
472   if (diffSignal >= 0) {
473     absDiffSignal = diffSignal;
474   }
475   absDiffSignal = ssat24(absDiffSignal);
476   absDiffSignalShifted = absDiffSignal >> deltaScale;
477 
478   /* Binary search for the quantised code. This search terminates with the
479    * table index of the LARGEST threshold table value for which
480    * absDiffSignalShifted >= (delta * threshold)
481    */
482   index = BsearchLL(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
483 
484   /* We actually wanted the SMALLEST magnitude quantised code for which
485    * absDiffSignalShifted < (delta * threshold)
486    * i.e. the code with the next highest magnitude than the one we actually
487    * found. We could add +1 to the code magnitude to do this, but we need to
488    * subtract 1 from the code magnitude to compensate for the "phantom
489    * element" at the base of the quantisation table. These two effects cancel
490    * out, so we leave the value of code alone. However, we need to form code+1
491    * to get the proper index into the both the threshold and dither tables,
492    * since we must skip over the phantom element at the base. */
493   qdata_pt->qCode = index;
494 
495   /* Square the dither and get the value back from the ALU
496    * (saturated/rounded). */
497   tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
498 
499   acc = tmp_acc.s32.h;
500 
501   tmp_round0 = (uint32_t)acc << 8;
502 
503   acc = (acc >> 6) + 1;
504   acc >>= 1;
505   if (tmp_round0 == 0x40000000L) {
506     acc--;
507   }
508 
509   acc = ssat24(acc);
510 
511   dithSquared = acc;
512 
513   /* Form the negative difference of the dither values at index and index-1.
514    * Load the accumulator with this value divided by 2. Ensure saturation is
515    * applied to the difference calculation. */
516   minusLambdaD = qdata_pt->minusLambdaDTable[index];
517 
518   tmp_accL = (1 << 23) - dithSquared;
519   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
520 
521   tmp_round0 = tmp_acc.s32.l << 8;
522 
523   tmp_acc.s64 >>= 22;
524   acc = tmp_acc.s32.l;
525   acc++;
526   acc >>= 1;
527   if (tmp_round0 == 0x40000000L) {
528     acc--;
529   }
530 
531   // worst case value for acc = 0x000d3e08
532   // no saturation required
533 
534   /* Add the threshold table values at index and index-1 to the accumulated
535    * value. */
536   acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
537   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
538   acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
539   //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
540 
541   /* Form the threshold table difference at index and index-1. Ensure
542    * saturation is applied to the difference calculation. */
543   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] - qdata_pt->thresholdTablePtr_sl1[index];
544 
545   /* Based on the sign of the difference signal, either add or subtract the
546    * threshold table difference from the accumulated value. Recover the final
547    * accumulated value (saturated/rounded) */
548   if (diffSignal < 0) {
549     threshDiff = -threshDiff;
550   }
551   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
552   tmp_reg64.s32.h += acc;
553   acc = tmp_reg64.s32.h;
554 
555   if (tmp_reg64.u32.l >= 0x80000000) {
556     acc++;
557   }
558   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
559 
560   acc = ssat24(acc);
561 
562   if (tmp_round0 == 0x40000000L) {
563     acc--;
564   }
565   _delta = -delta << 8;
566 
567   acc = (int32_t)((uint32_t)acc << 4);
568 
569   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
570    * signal used to determine if dithering alters the quantised code value or
571    * not. */
572   // worst case value for delta is 0x7d400
573 
574   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
575   tmp_reg64.s32.h += absDiffSignal;
576   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
577   acc = tmp_reg64.s32.h + (1 << 2);
578   acc >>= 3;
579   if (tmp_round0 == 0x40000000L) {
580     acc--;
581   }
582 
583   tmp_qCode = qdata_pt->qCode;
584   tmp_altQcode = tmp_qCode - 1;
585   /* Check the sign of the distance penalty. Get the sign from the
586    * full-precision accumulator, as done in the Kalimba code. */
587   if (tmp_reg64.s32.h < 0) {
588     /* The distance is -ve. The optimum code needs decremented by 1 and the
589      * alternative code is 1 greater than this. Get the rounded version of the
590      * -ve distance penalty and negate this (form distance magnitude) before
591      *  writing the value out */
592     tmp_qCode = tmp_altQcode;
593     tmp_altQcode++;
594     acc = -acc;
595   }
596 
597   qdata_pt->distPenalty = acc;
598   /* If the difference signal is negative, bitwise invert the code (restores
599    * sign to the magnitude). */
600   if (diffSignal < 0) {
601     tmp_qCode = ~tmp_qCode;
602     tmp_altQcode = ~tmp_altQcode;
603   }
604   qdata_pt->altQcode = tmp_altQcode;
605   qdata_pt->qCode = tmp_qCode;
606 }
607 
quantiseDifferenceLH(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)608 void quantiseDifferenceLH(const int32_t diffSignal, const int32_t ditherVal, const int32_t delta,
609                           Quantiser_data* qdata_pt) {
610   int32_t absDiffSignal;
611   int32_t absDiffSignalShifted;
612   int32_t index;
613   int32_t dithSquared;
614   int32_t minusLambdaD;
615   int32_t acc;
616   int32_t threshDiff;
617   reg64_t tmp_acc;
618   reg64_t tmp_reg64;
619   int32_t tmp_accL;
620   int32_t tmp_qCode;
621   int32_t tmp_altQcode;
622   uint32_t tmp_round0;
623   int32_t _delta;
624 
625   /* Form the absolute value of the difference signal and maintain a version
626    * that is right-shifted 4 places for delta scaling. */
627   absDiffSignal = -diffSignal;
628   if (diffSignal >= 0) {
629     absDiffSignal = diffSignal;
630   }
631   absDiffSignal = ssat24(absDiffSignal);
632   absDiffSignalShifted = absDiffSignal >> deltaScale;
633 
634   /* Binary search for the quantised code. This search terminates with the
635    * table index of the LARGEST threshold table value for which
636    * absDiffSignalShifted >= (delta * threshold)
637    */
638   index = BsearchLH(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
639 
640   /* We actually wanted the SMALLEST magnitude quantised code for which
641    * absDiffSignalShifted < (delta * threshold)
642    * i.e. the code with the next highest magnitude than the one we actually
643    * found. We could add +1 to the code magnitude to do this, but we need to
644    * subtract 1 from the code magnitude to compensate for the "phantom
645    * element" at the base of the quantisation table. These two effects cancel
646    * out, so we leave the value of code alone. However, we need to form code+1
647    * to get the proper index into the both the threshold and dither tables,
648    * since we must skip over the phantom element at the base. */
649   qdata_pt->qCode = index;
650 
651   /* Square the dither and get the value back from the ALU
652    * (saturated/rounded). */
653   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
654 
655   acc = tmp_reg64.s32.h;
656 
657   tmp_round0 = (uint32_t)acc << 8;
658 
659   acc = (acc >> 6) + 1;
660   acc >>= 1;
661   if (tmp_round0 == 0x40000000L) {
662     acc--;
663   }
664 
665   acc = ssat24(acc);
666 
667   dithSquared = acc;
668 
669   /* Form the negative difference of the dither values at index and index-1.
670    * Load the accumulator with this value divided by 2. Ensure saturation is
671    * applied to the difference calculation. */
672   minusLambdaD = qdata_pt->minusLambdaDTable[index];
673 
674   tmp_accL = (1 << 23) - dithSquared;
675   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
676 
677   tmp_round0 = tmp_acc.s32.l << 8;
678 
679   acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
680   if (tmp_round0 == 0x40000000L) {
681     acc -= 2;
682   }
683   acc++;
684 
685   // worst case value for acc = 0x000d3e08
686   // no saturation required
687   /* Add the threshold table values at index and index-1 to the accumulated
688    * value. */
689   acc += qdata_pt->thresholdTablePtr_sl1[index + 1];
690   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
691   acc += qdata_pt->thresholdTablePtr_sl1[index];
692   acc >>= 1;
693 
694   /* Form the threshold table difference at index and index-1. Ensure
695    * saturation is applied to the difference calculation. */
696   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] - qdata_pt->thresholdTablePtr_sl1[index];
697 
698   /* Based on the sign of the difference signal, either add or subtract the
699    * threshold table difference from the accumulated value. Recover the final
700    * accumulated value (saturated/rounded) */
701   if (diffSignal < 0) {
702     threshDiff = -threshDiff;
703   }
704   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
705 
706   tmp_reg64.s32.h += acc;
707   acc = tmp_reg64.s32.h;
708 
709   if (tmp_reg64.u32.l >= 0x80000000) {
710     acc++;
711   }
712   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
713 
714   acc = ssat24(acc);
715 
716   if (tmp_round0 == 0x40000000L) {
717     acc--;
718   }
719   _delta = -delta << 8;
720 
721   acc = (int32_t)((uint32_t)acc << 4);
722 
723   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
724    * signal used to determine if dithering alters the quantised code value or
725    * not. */
726   // worst case value for delta is 0x7d400
727   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
728   tmp_reg64.s32.h += absDiffSignal;
729   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
730   acc = tmp_reg64.s32.h + (1 << 2);
731   acc >>= 3;
732   if (tmp_round0 == 0x40000000L) {
733     acc--;
734   }
735 
736   tmp_qCode = qdata_pt->qCode;
737   tmp_altQcode = tmp_qCode - 1;
738   /* Check the sign of the distance penalty. Get the sign from the
739    * full-precision accumulator, as done in the Kalimba code. */
740 
741   if (tmp_reg64.s32.h < 0) {
742     /* The distance is -ve. The optimum code needs decremented by 1 and the
743      * alternative code is 1 greater than this. Get the rounded version of the
744      * -ve distance penalty and negate this (form distance magnitude) before
745      *  writing the value out */
746     tmp_qCode = tmp_altQcode;
747     tmp_altQcode++;
748     acc = -acc;
749   }
750 
751   qdata_pt->distPenalty = acc;
752   /* If the difference signal is negative, bitwise invert the code (restores
753    * sign to the magnitude). */
754   if (diffSignal < 0) {
755     tmp_qCode = ~tmp_qCode;
756     tmp_altQcode = ~tmp_altQcode;
757   }
758   qdata_pt->altQcode = tmp_altQcode;
759   qdata_pt->qCode = tmp_qCode;
760 }
761