1 /**
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "Quantiser.h"
18 
BsearchLL(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)19 XBT_INLINE_ int32_t BsearchLL(const int32_t absDiffSignalShifted, const int32_t delta,
20                               const int32_t* dqbitTablePrt) {
21   int32_t qCode = 0;
22   reg64_t tmp_acc;
23   int32_t tmp = 0;
24   int32_t lc_delta = delta << 8;
25 
26   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[128];
27   tmp_acc.s32.h -= absDiffSignalShifted;
28   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
29   if (tmp <= 0) {
30     qCode = 128;
31   }
32 
33   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 64];
34   tmp_acc.s32.h -= absDiffSignalShifted;
35   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
36   if (tmp <= 0) {
37     qCode += 64;
38   }
39 
40   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 32];
41   tmp_acc.s32.h -= absDiffSignalShifted;
42   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
43   if (tmp <= 0) {
44     qCode += 32;
45   }
46 
47   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 16];
48   tmp_acc.s32.h -= absDiffSignalShifted;
49   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
50   if (tmp <= 0) {
51     qCode += 16;
52   }
53   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 8];
54   tmp_acc.s32.h -= absDiffSignalShifted;
55   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
56   if (tmp <= 0) {
57     qCode += 8;
58   }
59 
60   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 4];
61   tmp_acc.s32.h -= absDiffSignalShifted;
62   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
63   if (tmp <= 0) {
64     qCode += 4;
65   }
66 
67   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
68   tmp_acc.s32.h -= absDiffSignalShifted;
69   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
70   if (tmp <= 0) {
71     qCode += 2;
72   }
73 
74   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
75   tmp_acc.s32.h -= absDiffSignalShifted;
76   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
77   if (tmp <= 0) {
78     qCode++;
79   }
80 
81   return qCode;
82 }
83 
BsearchHL(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)84 XBT_INLINE_ int32_t BsearchHL(const int32_t absDiffSignalShifted, const int32_t delta,
85                               const int32_t* dqbitTablePrt) {
86   int32_t qCode = 0;
87   reg64_t tmp_acc;
88   int32_t tmp = 0;
89   int32_t lc_delta = delta << 8;
90 
91   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[4];
92   tmp_acc.s32.h -= absDiffSignalShifted;
93   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
94   if (tmp <= 0) {
95     qCode = 4;
96   }
97 
98   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
99   tmp_acc.s32.h -= absDiffSignalShifted;
100   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
101   if (tmp <= 0) {
102     qCode += 2;
103   }
104 
105   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
106   tmp_acc.s32.h -= absDiffSignalShifted;
107   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
108   if (tmp <= 0) {
109     qCode++;
110   }
111 
112   return qCode;
113 }
114 
BsearchHH(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)115 XBT_INLINE_ int32_t BsearchHH(const int32_t absDiffSignalShifted, const int32_t delta,
116                               const int32_t* dqbitTablePrt) {
117   int32_t qCode = 0;
118   reg64_t tmp_acc;
119   int32_t tmp = 0;
120   int32_t lc_delta = delta << 8;
121 
122   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[8];
123   tmp_acc.s32.h -= absDiffSignalShifted;
124   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
125   if (tmp <= 0) {
126     qCode = 8;
127   }
128 
129   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 4];
130   tmp_acc.s32.h -= absDiffSignalShifted;
131   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
132   if (tmp <= 0) {
133     qCode += 4;
134   }
135 
136   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
137   tmp_acc.s32.h -= absDiffSignalShifted;
138   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
139   if (tmp <= 0) {
140     qCode += 2;
141   }
142 
143   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
144   tmp_acc.s32.h -= absDiffSignalShifted;
145   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
146   if (tmp <= 0) {
147     qCode++;
148   }
149 
150   return qCode;
151 }
152 
quantiseDifference_HDHL(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)153 void quantiseDifference_HDHL(const int32_t diffSignal, const int32_t ditherVal, const int32_t delta,
154                              Quantiser_data* qdata_pt) {
155   int32_t absDiffSignal = 0;
156   int32_t absDiffSignalShifted = 0;
157   int32_t index = 0;
158   int32_t dithSquared = 0;
159   int32_t minusLambdaD = 0;
160   int32_t acc = 0;
161   int32_t threshDiff = 0;
162   reg64_t tmp_acc;
163   reg64_t tmp_reg64;
164   int32_t tmp_accL = 0;
165   int32_t tmp_qCode = 0;
166   int32_t tmp_altQcode = 0;
167   uint32_t tmp_round0 = 0;
168   int32_t _delta = 0;
169 
170   /* Form the absolute value of the difference signal and maintain a version
171    * that is right-shifted 4 places for delta scaling. */
172   absDiffSignal = -diffSignal;
173   if (diffSignal >= 0) {
174     absDiffSignal = diffSignal;
175   }
176   absDiffSignal = ssat24(absDiffSignal);
177   absDiffSignalShifted = absDiffSignal >> deltaScale;
178 
179   /* Binary search for the quantised code. This search terminates with the
180    * table index of the LARGEST threshold table value for which
181    * absDiffSignalShifted >= (delta * threshold)
182    */
183   index = BsearchHL(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
184 
185   /* We actually wanted the SMALLEST magnitude quantised code for which
186    * absDiffSignalShifted < (delta * threshold)
187    * i.e. the code with the next highest magnitude than the one we actually
188    * found. We could add +1 to the code magnitude to do this, but we need to
189    * subtract 1 from the code magnitude to compensate for the "phantom
190    * element" at the base of the quantisation table. These two effects cancel
191    * out, so we leave the value of code alone. However, we need to form code+1
192    * to get the proper index into the both the threshold and dither tables,
193    * since we must skip over the phantom element at the base. */
194   qdata_pt->qCode = index;
195 
196   /* Square the dither and get the value back from the ALU
197    * (saturated/rounded). */
198   tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
199 
200   acc = tmp_acc.s32.h;
201   tmp_round0 = (uint32_t)acc << 8;
202 
203   acc = (acc >> 6) + 1;
204   acc >>= 1;
205   if (tmp_round0 == 0x40000000L) {
206     acc--;
207   }
208   acc = ssat24(acc);
209 
210   dithSquared = acc;
211 
212   /* Form the negative difference of the dither values at index and index-1.
213    * Load the accumulator with this value divided by 2. Ensure saturation is
214    * applied to the difference calculation. */
215   minusLambdaD = qdata_pt->minusLambdaDTable[index];
216 
217   tmp_accL = (1 << 23) - dithSquared;
218   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
219 
220   tmp_round0 = tmp_acc.s32.l << 8;
221 
222   acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
223   acc++;
224   acc >>= 1;
225   if (tmp_round0 == 0x40000000L) {
226     acc--;
227   }
228 
229   /* Add the threshold table values at index and index-1 to the accumulated
230    * value. */
231   acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
232   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
233   acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
234   //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
235 
236   // saturation required
237   acc = ssat24(acc);
238 
239   /* Form the threshold table difference at index and index-1. Ensure
240    * saturation is applied to the difference calculation. */
241   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] - qdata_pt->thresholdTablePtr_sl1[index];
242 
243   /* Based on the sign of the difference signal, either add or subtract the
244    * threshold table difference from the accumulated value. Recover the final
245    * accumulated value (saturated/rounded) */
246   if (diffSignal < 0) {
247     threshDiff = -threshDiff;
248   }
249   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
250 
251   tmp_reg64.s32.h += acc;
252   acc = tmp_reg64.s32.h;
253 
254   if (tmp_reg64.u32.l >= 0x80000000) {
255     acc++;
256   }
257   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
258 
259   acc = ssat24(acc);
260 
261   if (tmp_round0 == 0x40000000L) {
262     acc--;
263   }
264   _delta = -delta << 8;
265 
266   acc = (int32_t)((uint32_t)acc << 4);
267 
268   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
269    * signal used to determine if dithering alters the quantised code value or
270    * not. */
271   // worst case value for delta is 0x7d400
272   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
273   tmp_reg64.s32.h += absDiffSignal;
274   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
275   acc = tmp_reg64.s32.h + (1 << 2);
276   acc >>= 3;
277   if (tmp_round0 == 0x40000000L) {
278     acc--;
279   }
280 
281   tmp_qCode = qdata_pt->qCode;
282   tmp_altQcode = tmp_qCode - 1;
283   /* Check the sign of the distance penalty. Get the sign from the
284    * full-precision accumulator, as done in the Kalimba code. */
285   if (tmp_reg64.s32.h < 0) {
286     /* The distance is -ve. The optimum code needs decremented by 1 and the
287      * alternative code is 1 greater than this. Get the rounded version of the
288      * -ve distance penalty and negate this (form distance magnitude) before
289      *  writing the value out */
290     tmp_qCode = tmp_altQcode;
291     tmp_altQcode++;
292     acc = -acc;
293   }
294 
295   qdata_pt->distPenalty = acc;
296   /* If the difference signal is negative, bitwise invert the code (restores
297    * sign to the magnitude). */
298   if (diffSignal < 0) {
299     tmp_qCode = ~tmp_qCode;
300     tmp_altQcode = ~tmp_altQcode;
301   }
302   qdata_pt->altQcode = tmp_altQcode;
303   qdata_pt->qCode = tmp_qCode;
304 }
305 
quantiseDifference_HDHH(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)306 void quantiseDifference_HDHH(const int32_t diffSignal, const int32_t ditherVal, const int32_t delta,
307                              Quantiser_data* qdata_pt) {
308   int32_t absDiffSignal;
309   int32_t absDiffSignalShifted;
310   int32_t index;
311   int32_t dithSquared;
312   int32_t minusLambdaD;
313   int32_t acc;
314   int32_t threshDiff;
315   reg64_t tmp_acc;
316   reg64_t tmp_reg64;
317   int32_t tmp_accL;
318   int32_t tmp_qCode;
319   int32_t tmp_altQcode;
320   uint32_t tmp_round0;
321   int32_t _delta;
322 
323   /* Form the absolute value of the difference signal and maintain a version
324    * that is right-shifted 4 places for delta scaling. */
325   absDiffSignal = -diffSignal;
326   if (diffSignal >= 0) {
327     absDiffSignal = diffSignal;
328   }
329   absDiffSignal = ssat24(absDiffSignal);
330   absDiffSignalShifted = absDiffSignal >> deltaScale;
331 
332   /* Binary search for the quantised code. This search terminates with the
333    * table index of the LARGEST threshold table value for which
334    * absDiffSignalShifted >= (delta * threshold)
335    */
336   index = BsearchHH(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
337 
338   /* We actually wanted the SMALLEST magnitude quantised code for which
339    * absDiffSignalShifted < (delta * threshold)
340    * i.e. the code with the next highest magnitude than the one we actually
341    * found. We could add +1 to the code magnitude to do this, but we need to
342    * subtract 1 from the code magnitude to compensate for the "phantom
343    * element" at the base of the quantisation table. These two effects cancel
344    * out, so we leave the value of code alone. However, we need to form code+1
345    * to get the proper index into the both the threshold and dither tables,
346    * since we must skip over the phantom element at the base. */
347   qdata_pt->qCode = index;
348 
349   /* Square the dither and get the value back from the ALU
350    * (saturated/rounded). */
351   tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
352 
353   acc = tmp_acc.s32.h;
354   tmp_round0 = (uint32_t)acc << 8;
355 
356   acc = (acc >> 6) + 1;
357   acc >>= 1;
358   if (tmp_round0 == 0x40000000L) {
359     acc--;
360   }
361 
362   acc = ssat24(acc);
363 
364   dithSquared = acc;
365 
366   /* Form the negative difference of the dither values at index and index-1.
367    * Load the accumulator with this value divided by 2. Ensure saturation is
368    * applied to the difference calculation. */
369   minusLambdaD = qdata_pt->minusLambdaDTable[index];
370 
371   tmp_accL = (1 << 23) - dithSquared;
372   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
373 
374   tmp_round0 = tmp_acc.s32.l << 8;
375 
376   acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
377   acc++;
378   acc >>= 1;
379   if (tmp_round0 == 0x40000000L) {
380     acc--;
381   }
382 
383   /* Add the threshold table values at index and index-1 to the accumulated
384    * value. */
385   acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
386   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
387   acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
388   //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
389 
390   // saturation required
391   acc = ssat24(acc);
392 
393   /* Form the threshold table difference at index and index-1. Ensure
394    * saturation is applied to the difference calculation. */
395   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] - qdata_pt->thresholdTablePtr_sl1[index];
396 
397   /* Based on the sign of the difference signal, either add or subtract the
398    * threshold table difference from the accumulated value. Recover the final
399    * accumulated value (saturated/rounded) */
400   if (diffSignal < 0) {
401     threshDiff = -threshDiff;
402   }
403   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
404   tmp_reg64.s32.h += acc;
405   acc = tmp_reg64.s32.h;
406 
407   if (tmp_reg64.u32.l >= 0x80000000) {
408     acc++;
409   }
410   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
411 
412   acc = ssat24(acc);
413 
414   if (tmp_round0 == 0x40000000L) {
415     acc--;
416   }
417   _delta = -delta << 8;
418 
419   acc = (int32_t)((uint32_t)acc << 4);
420 
421   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
422    * signal used to determine if dithering alters the quantised code value or
423    * not. */
424   // worst case value for delta is 0x7d400
425   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
426   tmp_reg64.s32.h += absDiffSignal;
427   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
428   acc = tmp_reg64.s32.h + (1 << 2);
429   acc >>= 3;
430   if (tmp_round0 == 0x40000000L) {
431     acc--;
432   }
433 
434   tmp_qCode = qdata_pt->qCode;
435   tmp_altQcode = tmp_qCode - 1;
436   /* Check the sign of the distance penalty. Get the sign from the
437    * full-precision accumulator, as done in the Kalimba code. */
438   if (tmp_reg64.s32.h < 0) {
439     /* The distance is -ve. The optimum code needs decremented by 1 and the
440      * alternative code is 1 greater than this. Get the rounded version of the
441      * -ve distance penalty and negate this (form distance magnitude) before
442      *  writing the value out */
443     tmp_qCode = tmp_altQcode;
444     tmp_altQcode++;
445     acc = -acc;
446   }
447 
448   qdata_pt->distPenalty = acc;
449   /* If the difference signal is negative, bitwise invert the code (restores
450    * sign to the magnitude). */
451   if (diffSignal < 0) {
452     tmp_qCode = ~tmp_qCode;
453     tmp_altQcode = ~tmp_altQcode;
454   }
455   qdata_pt->altQcode = tmp_altQcode;
456   qdata_pt->qCode = tmp_qCode;
457 }
458 
quantiseDifference_HDLL(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)459 void quantiseDifference_HDLL(const int32_t diffSignal, const int32_t ditherVal, const int32_t delta,
460                              Quantiser_data* qdata_pt) {
461   int32_t absDiffSignal;
462   int32_t absDiffSignalShifted;
463   int32_t index;
464   int32_t dithSquared;
465   int32_t minusLambdaD;
466   int32_t acc;
467   int32_t threshDiff;
468   reg64_t tmp_acc;
469   reg64_t tmp_reg64;
470   int32_t tmp_accL;
471   int32_t tmp_qCode;
472   int32_t tmp_altQcode;
473   uint32_t tmp_round0;
474   int32_t _delta;
475 
476   /* Form the absolute value of the difference signal and maintain a version
477    * that is right-shifted 4 places for delta scaling. */
478   absDiffSignal = -diffSignal;
479   if (diffSignal >= 0) {
480     absDiffSignal = diffSignal;
481   }
482   absDiffSignal = ssat24(absDiffSignal);
483   absDiffSignalShifted = absDiffSignal >> deltaScale;
484 
485   /* Binary search for the quantised code. This search terminates with the
486    * table index of the LARGEST threshold table value for which
487    * absDiffSignalShifted >= (delta * threshold)
488    */
489   index = BsearchLL(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
490 
491   /* We actually wanted the SMALLEST magnitude quantised code for which
492    * absDiffSignalShifted < (delta * threshold)
493    * i.e. the code with the next highest magnitude than the one we actually
494    * found. We could add +1 to the code magnitude to do this, but we need to
495    * subtract 1 from the code magnitude to compensate for the "phantom
496    * element" at the base of the quantisation table. These two effects cancel
497    * out, so we leave the value of code alone. However, we need to form code+1
498    * to get the proper index into the both the threshold and dither tables,
499    * since we must skip over the phantom element at the base. */
500   qdata_pt->qCode = index;
501 
502   /* Square the dither and get the value back from the ALU
503    * (saturated/rounded). */
504 
505   tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
506 
507   acc = tmp_acc.s32.h;
508   tmp_round0 = (uint32_t)acc << 8;
509 
510   acc = (acc >> 6) + 1;
511   acc >>= 1;
512   if (tmp_round0 == 0x40000000L) {
513     acc--;
514   }
515 
516   acc = ssat24(acc);
517 
518   dithSquared = acc;
519 
520   /* Form the negative difference of the dither values at index and index-1.
521    * Load the accumulator with this value divided by 2. Ensure saturation is
522    * applied to the difference calculation. */
523   minusLambdaD = qdata_pt->minusLambdaDTable[index];
524 
525   tmp_accL = (1 << 23) - dithSquared;
526   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
527 
528   tmp_round0 = tmp_acc.s32.l << 8;
529 
530   acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
531   acc++;
532   acc >>= 1;
533   if (tmp_round0 == 0x40000000L) {
534     acc--;
535   }
536 
537   /* Add the threshold table values at index and index-1 to the accumulated
538    * value. */
539 
540   acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
541   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
542   acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
543   //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
544   // saturation required
545   acc = ssat24(acc);
546 
547   /* Form the threshold table difference at index and index-1. Ensure
548    * saturation is applied to the difference calculation. */
549   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] - qdata_pt->thresholdTablePtr_sl1[index];
550 
551   /* Based on the sign of the difference signal, either add or subtract the
552    * threshold table difference from the accumulated value. Recover the final
553    * accumulated value (saturated/rounded) */
554 
555   if (diffSignal < 0) {
556     threshDiff = -threshDiff;
557   }
558   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
559   tmp_reg64.s32.h += acc;
560   acc = tmp_reg64.s32.h;
561 
562   if (tmp_reg64.u32.l >= 0x80000000) {
563     acc++;
564   }
565   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
566 
567   acc = ssat24(acc);
568 
569   if (tmp_round0 == 0x40000000L) {
570     acc--;
571   }
572   _delta = -delta << 8;
573 
574   acc = (int32_t)((uint32_t)acc << 4);
575 
576   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
577    * signal used to determine if dithering alters the quantised code value or
578    * not. */
579   // worst case value for delta is 0x7d400
580 
581   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
582   tmp_reg64.s32.h += absDiffSignal;
583   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
584   acc = tmp_reg64.s32.h + (1 << 2);
585   acc >>= 3;
586   if (tmp_round0 == 0x40000000L) {
587     acc--;
588   }
589 
590   tmp_qCode = qdata_pt->qCode;
591   tmp_altQcode = tmp_qCode - 1;
592   /* Check the sign of the distance penalty. Get the sign from the
593    * full-precision accumulator, as done in the Kalimba code. */
594 
595   if (tmp_reg64.s32.h < 0) {
596     /* The distance is -ve. The optimum code needs decremented by 1 and the
597      * alternative code is 1 greater than this. Get the rounded version of the
598      * -ve distance penalty and negate this (form distance magnitude) before
599      *  writing the value out */
600     tmp_qCode = tmp_altQcode;
601     tmp_altQcode++;
602     acc = -acc;
603   }
604 
605   qdata_pt->distPenalty = acc;
606   /* If the difference signal is negative, bitwise invert the code (restores
607    * sign to the magnitude). */
608   if (diffSignal < 0) {
609     tmp_qCode = ~tmp_qCode;
610     tmp_altQcode = ~tmp_altQcode;
611   }
612   qdata_pt->altQcode = tmp_altQcode;
613   qdata_pt->qCode = tmp_qCode;
614 }
615 
BsearchLH(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)616 static int32_t BsearchLH(const int32_t absDiffSignalShifted, const int32_t delta,
617                          const int32_t* dqbitTablePrt) {
618   int32_t qCode;
619   reg64_t tmp_acc;
620   int32_t tmp;
621   int32_t lc_delta = delta << 8;
622 
623   qCode = 0;
624 
625   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[16];
626   tmp_acc.s32.h -= absDiffSignalShifted;
627   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
628   if (tmp <= 0) {
629     qCode = 16;
630   }
631 
632   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 8];
633   tmp_acc.s32.h -= absDiffSignalShifted;
634   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
635   if (tmp <= 0) {
636     qCode += 8;
637   }
638 
639   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 4];
640   tmp_acc.s32.h -= absDiffSignalShifted;
641   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
642   if (tmp <= 0) {
643     qCode += 4;
644   }
645 
646   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
647   tmp_acc.s32.h -= absDiffSignalShifted;
648   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
649   if (tmp <= 0) {
650     qCode += 2;
651   }
652 
653   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
654   tmp_acc.s32.h -= absDiffSignalShifted;
655   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
656   if (tmp <= 0) {
657     qCode++;
658   }
659 
660   return qCode;
661 }
662 
quantiseDifference_HDLH(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)663 void quantiseDifference_HDLH(const int32_t diffSignal, const int32_t ditherVal, const int32_t delta,
664                              Quantiser_data* qdata_pt) {
665   int32_t absDiffSignal = 0;
666   int32_t absDiffSignalShifted = 0;
667   int32_t index = 0;
668   int32_t dithSquared = 0;
669   int32_t minusLambdaD = 0;
670   int32_t acc = 0;
671   int32_t threshDiff = 0;
672   reg64_t tmp_acc;
673   reg64_t tmp_reg64;
674   int32_t tmp_accL = 0;
675   int32_t tmp_qCode = 0;
676   int32_t tmp_altQcode = 0;
677 
678   uint32_t tmp_round0 = 0;
679   int32_t _delta = 0;
680 
681   /* Form the absolute value of the difference signal and maintain a version
682    * that is right-shifted 4 places for delta scaling. */
683   absDiffSignal = -diffSignal;
684   if (diffSignal >= 0) {
685     absDiffSignal = diffSignal;
686   }
687   absDiffSignal = ssat24(absDiffSignal);
688   absDiffSignalShifted = absDiffSignal >> deltaScale;
689 
690   /* Binary search for the quantised code. This search terminates with the
691    * table index of the LARGEST threshold table value for which
692    * absDiffSignalShifted >= (delta * threshold)
693    */
694 
695   /* first iteration */
696   index = BsearchLH(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
697 
698   /* We actually wanted the SMALLEST magnitude quantised code for which
699    * absDiffSignalShifted < (delta * threshold)
700    * i.e. the code with the next highest magnitude than the one we actually
701    * found. We could add +1 to the code magnitude to do this, but we need to
702    * subtract 1 from the code magnitude to compensate for the "phantom
703    * element" at the base of the quantisation table. These two effects cancel
704    * out, so we leave the value of code alone. However, we need to form code+1
705    * to get the proper index into the both the threshold and dither tables,
706    * since we must skip over the phantom element at the base. */
707   qdata_pt->qCode = index;
708 
709   /* Square the dither and get the value back from the ALU
710    * (saturated/rounded). */
711 
712   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
713 
714   acc = tmp_reg64.s32.h;
715 
716   tmp_round0 = (uint32_t)acc << 8;
717 
718   acc = (acc >> 6) + 1;
719   acc >>= 1;
720   if (tmp_round0 == 0x40000000L) {
721     acc--;
722   }
723   acc = ssat24(acc);
724 
725   dithSquared = acc;
726 
727   /* Form the negative difference of the dither values at index and index-1.
728    * Load the accumulator with this value divided by 2. Ensure saturation is
729    * applied to the difference calculation. */
730 
731   minusLambdaD = qdata_pt->minusLambdaDTable[index];
732 
733   tmp_accL = (1 << 23) - dithSquared;
734   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
735 
736   tmp_round0 = tmp_acc.s32.l << 8;
737 
738   acc = (int32_t)(tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
739   if (tmp_round0 == 0x40000000L) {
740     acc -= 2;
741   }
742   acc++;
743 
744   /* Add the threshold table values at index and index-1 to the accumulated
745    * value. */
746 
747   acc += qdata_pt->thresholdTablePtr_sl1[index + 1];
748   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
749   acc += qdata_pt->thresholdTablePtr_sl1[index];
750   acc >>= 1;
751 
752   // saturation required
753   acc = ssat24(acc);
754 
755   /* Form the threshold table difference at index and index-1. Ensure
756    * saturation is applied to the difference calculation. */
757   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] - qdata_pt->thresholdTablePtr_sl1[index];
758 
759   /* Based on the sign of the difference signal, either add or subtract the
760    * threshold table difference from the accumulated value. Recover the final
761    * accumulated value (saturated/rounded) */
762 
763   if (diffSignal < 0) {
764     threshDiff = -threshDiff;
765   }
766   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
767 
768   tmp_reg64.s32.h += acc;
769   acc = tmp_reg64.s32.h;
770 
771   if (tmp_reg64.u32.l >= 0x80000000) {
772     acc++;
773   }
774   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
775 
776   acc = ssat24(acc);
777 
778   if (tmp_round0 == 0x40000000L) {
779     acc--;
780   }
781   _delta = -delta << 8;
782 
783   acc = (int32_t)((uint32_t)acc << 4);
784 
785   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
786    * signal used to determine if dithering alters the quantised code value or
787    * not. */
788   // worst case value for delta is 0x7d400
789 
790   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
791   tmp_reg64.s32.h += absDiffSignal;
792   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
793   acc = tmp_reg64.s32.h + (1 << 2);
794   acc >>= 3;
795   if (tmp_round0 == 0x40000000L) {
796     acc--;
797   }
798 
799   tmp_qCode = qdata_pt->qCode;
800   tmp_altQcode = tmp_qCode - 1;
801   /* Check the sign of the distance penalty. Get the sign from the
802    * full-precision accumulator, as done in the Kalimba code. */
803 
804   if (tmp_reg64.s32.h < 0) {
805     /* The distance is -ve. The optimum code needs decremented by 1 and the
806      * alternative code is 1 greater than this. Get the rounded version of the
807      * -ve distance penalty and negate this (form distance magnitude) before
808      *  writing the value out */
809     tmp_qCode = tmp_altQcode;
810     tmp_altQcode++;
811     acc = -acc;
812   }
813 
814   qdata_pt->distPenalty = acc;
815   /* If the difference signal is negative, bitwise invert the code (restores
816    * sign to the magnitude). */
817   if (diffSignal < 0) {
818     tmp_qCode = ~tmp_qCode;
819     tmp_altQcode = ~tmp_altQcode;
820   }
821   qdata_pt->altQcode = tmp_altQcode;
822   qdata_pt->qCode = tmp_qCode;
823 }
824