1 /******************************************************************************
2  *
3  *  Copyright 2014 The Android Open Source Project
4  *  Copyright 2003 - 2004 Open Interface North America, Inc. All rights
5  *                        reserved.
6  *
7  *  Licensed under the Apache License, Version 2.0 (the "License");
8  *  you may not use this file except in compliance with the License.
9  *  You may obtain a copy of the License at:
10  *
11  *  http://www.apache.org/licenses/LICENSE-2.0
12  *
13  *  Unless required by applicable law or agreed to in writing, software
14  *  distributed under the License is distributed on an "AS IS" BASIS,
15  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  *  See the License for the specific language governing permissions and
17  *  limitations under the License.
18  *
19  ******************************************************************************/
20 
21 /*******************************************************************************
22   $Revision: #1 $
23  ******************************************************************************/
24 
25 /** @file
26 @ingroup codec_internal
27 */
28 
29 /**@addgroup codec_internal*/
30 /**@{*/
31 
32 /*
33  * Performs an 8-point Type-II scaled DCT using the Arai-Agui-Nakajima
34  * factorization. The scaling factors are folded into the windowing
35  * constants. 29 adds and 5 16x32 multiplies per 8 samples.
36  */
37 
38 #include "oi_codec_sbc_private.h"
39 
40 #define AAN_C4_FIX (759250125) /* S1.30  759250125   0.707107*/
41 
42 #define AAN_C6_FIX (410903207) /* S1.30  410903207   0.382683*/
43 
44 #define AAN_Q0_FIX (581104888) /* S1.30  581104888   0.541196*/
45 
46 #define AAN_Q1_FIX (1402911301) /* S1.30 1402911301   1.306563*/
47 
48 /** Scales x by y bits to the right, adding a rounding factor.
49  */
50 #ifndef SCALE
51 #define SCALE(x, y) (((x) + (1 << ((y) - 1))) >> (y))
52 #endif
53 
54 /**
55  * Default C language implementation of a 32x32->32 multiply. This function may
56  * be replaced by a platform-specific version for speed.
57  *
58  * @param u A signed 32-bit multiplicand
59  * @param v A signed 32-bit multiplier
60 
61  * @return  A signed 32-bit value corresponding to the 32 most significant bits
62  * of the 64-bit product of u and v.
63  */
default_mul_32s_32s_hi(int32_t u,int32_t v)64 INLINE int32_t default_mul_32s_32s_hi(int32_t u, int32_t v) {
65   uint32_t u0, v0;
66   int32_t u1, v1, w1, w2, t;
67 
68   u0 = u & 0xFFFF;
69   u1 = u >> 16;
70   v0 = v & 0xFFFF;
71   v1 = v >> 16;
72   t = u0 * v0;
73   t = u1 * v0 + ((uint32_t)t >> 16);
74   w1 = t & 0xFFFF;
75   w2 = t >> 16;
76   w1 = u0 * v1 + w1;
77   return u1 * v1 + w2 + (w1 >> 16);
78 }
79 
80 #define MUL_32S_32S_HI(_x, _y) default_mul_32s_32s_hi(_x, _y)
81 
82 #ifdef DEBUG_DCT
float_dct2_8(float * RESTRICT out,int32_t const * RESTRICT in)83 PRIVATE void float_dct2_8(float* RESTRICT out, int32_t const* RESTRICT in) {
84 #define FIX(x, bits) (((int)floor(0.5f + ((x) * ((float)(1 << bits))))) / ((float)(1 << bits)))
85 #define FLOAT_BUTTERFLY(x, y) \
86   x += y;                     \
87   y = x - (y * 2);            \
88   OI_ASSERT(VALID_INT32(x));  \
89   OI_ASSERT(VALID_INT32(y));
90 #define FLOAT_MULT_DCT(K, sample) (FIX(K, 20) * sample)
91 #define FLOAT_SCALE(x, y) (((x) / (double)(1 << (y))))
92 
93   double L00, L01, L02, L03, L04, L05, L06, L07;
94   double L25;
95 
96   double in0, in1, in2, in3;
97   double in4, in5, in6, in7;
98 
99   in0 = FLOAT_SCALE(in[0], DCTII_8_SHIFT_IN);
100   OI_ASSERT(VALID_INT32(in0));
101   in1 = FLOAT_SCALE(in[1], DCTII_8_SHIFT_IN);
102   OI_ASSERT(VALID_INT32(in1));
103   in2 = FLOAT_SCALE(in[2], DCTII_8_SHIFT_IN);
104   OI_ASSERT(VALID_INT32(in2));
105   in3 = FLOAT_SCALE(in[3], DCTII_8_SHIFT_IN);
106   OI_ASSERT(VALID_INT32(in3));
107   in4 = FLOAT_SCALE(in[4], DCTII_8_SHIFT_IN);
108   OI_ASSERT(VALID_INT32(in4));
109   in5 = FLOAT_SCALE(in[5], DCTII_8_SHIFT_IN);
110   OI_ASSERT(VALID_INT32(in5));
111   in6 = FLOAT_SCALE(in[6], DCTII_8_SHIFT_IN);
112   OI_ASSERT(VALID_INT32(in6));
113   in7 = FLOAT_SCALE(in[7], DCTII_8_SHIFT_IN);
114   OI_ASSERT(VALID_INT32(in7));
115 
116   L00 = (in0 + in7);
117   OI_ASSERT(VALID_INT32(L00));
118   L01 = (in1 + in6);
119   OI_ASSERT(VALID_INT32(L01));
120   L02 = (in2 + in5);
121   OI_ASSERT(VALID_INT32(L02));
122   L03 = (in3 + in4);
123   OI_ASSERT(VALID_INT32(L03));
124 
125   L04 = (in3 - in4);
126   OI_ASSERT(VALID_INT32(L04));
127   L05 = (in2 - in5);
128   OI_ASSERT(VALID_INT32(L05));
129   L06 = (in1 - in6);
130   OI_ASSERT(VALID_INT32(L06));
131   L07 = (in0 - in7);
132   OI_ASSERT(VALID_INT32(L07));
133 
134   FLOAT_BUTTERFLY(L00, L03);
135   FLOAT_BUTTERFLY(L01, L02);
136 
137   L02 += L03;
138   OI_ASSERT(VALID_INT32(L02));
139 
140   L02 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L02);
141   OI_ASSERT(VALID_INT32(L02));
142 
143   FLOAT_BUTTERFLY(L00, L01);
144 
145   out[0] = (float)FLOAT_SCALE(L00, DCTII_8_SHIFT_0);
146   OI_ASSERT(VALID_INT16(out[0]));
147   out[4] = (float)FLOAT_SCALE(L01, DCTII_8_SHIFT_4);
148   OI_ASSERT(VALID_INT16(out[4]));
149 
150   FLOAT_BUTTERFLY(L03, L02);
151   out[6] = (float)FLOAT_SCALE(L02, DCTII_8_SHIFT_6);
152   OI_ASSERT(VALID_INT16(out[6]));
153   out[2] = (float)FLOAT_SCALE(L03, DCTII_8_SHIFT_2);
154   OI_ASSERT(VALID_INT16(out[2]));
155 
156   L04 += L05;
157   OI_ASSERT(VALID_INT32(L04));
158   L05 += L06;
159   OI_ASSERT(VALID_INT32(L05));
160   L06 += L07;
161   OI_ASSERT(VALID_INT32(L06));
162 
163   L04 /= 2;
164   L05 /= 2;
165   L06 /= 2;
166   L07 /= 2;
167 
168   L05 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L05);
169   OI_ASSERT(VALID_INT32(L05));
170 
171   L25 = L06 - L04;
172   OI_ASSERT(VALID_INT32(L25));
173   L25 = FLOAT_MULT_DCT(AAN_C6_FLOAT, L25);
174   OI_ASSERT(VALID_INT32(L25));
175 
176   L04 = FLOAT_MULT_DCT(AAN_Q0_FLOAT, L04);
177   OI_ASSERT(VALID_INT32(L04));
178   L04 -= L25;
179   OI_ASSERT(VALID_INT32(L04));
180 
181   L06 = FLOAT_MULT_DCT(AAN_Q1_FLOAT, L06);
182   OI_ASSERT(VALID_INT32(L06));
183   L06 -= L25;
184   OI_ASSERT(VALID_INT32(L25));
185 
186   FLOAT_BUTTERFLY(L07, L05);
187 
188   FLOAT_BUTTERFLY(L05, L04);
189   out[3] = (float)(FLOAT_SCALE(L04, DCTII_8_SHIFT_3 - 1));
190   OI_ASSERT(VALID_INT16(out[3]));
191   out[5] = (float)(FLOAT_SCALE(L05, DCTII_8_SHIFT_5 - 1));
192   OI_ASSERT(VALID_INT16(out[5]));
193 
194   FLOAT_BUTTERFLY(L07, L06);
195   out[7] = (float)(FLOAT_SCALE(L06, DCTII_8_SHIFT_7 - 1));
196   OI_ASSERT(VALID_INT16(out[7]));
197   out[1] = (float)(FLOAT_SCALE(L07, DCTII_8_SHIFT_1 - 1));
198   OI_ASSERT(VALID_INT16(out[1]));
199 }
200 #undef BUTTERFLY
201 #endif
202 
203 /*
204  * This function calculates the AAN DCT. Its inputs are in S16.15 format, as
205  * returned by OI_SBC_Dequant. In practice, abs(in[x]) < 52429.0 / 1.38
206  * (1244918057 integer). The function it computes is an approximation to the
207  * array defined by:
208  *
209  * diag(aan_s) * AAN= C2
210  *
211  *   or
212  *
213  * AAN = diag(1/aan_s) * C2
214  *
215  * where C2 is as it is defined in the comment at the head of this file, and
216  *
217  * aan_s[i] = aan_s = 1/(2*cos(i*pi/16)) with i = 1..7, aan_s[0] = 1;
218  *
219  * aan_s[i] = [ 1.000  0.510  0.541  0.601  0.707  0.900  1.307  2.563 ]
220  *
221  * The output ranges are shown as follows:
222  *
223  * Let Y[0..7] = AAN * X[0..7]
224  *
225  * Without loss of generality, assume the input vector X consists of elements
226  * between -1 and 1. The maximum possible value of a given output element occurs
227  * with some particular combination of input vector elements each of which is -1
228  * or 1. Consider the computation of Y[i]. Y[i] = sum t=0..7 of AAN[t,i]*X[i]. Y
229  * is maximized if the sign of X[i] matches the sign of AAN[t,i], ensuring a
230  * positive contribution to the sum. Equivalently, one may simply sum
231  * abs(AAN)[t,i] over t to get the maximum possible value of Y[i].
232  *
233  * This yields approximately:
234  *  [8.00  10.05   9.66   8.52   8.00   5.70   4.00   2.00]
235  *
236  * Given the maximum magnitude sensible input value of +/-37992, this yields the
237  * following vector of maximum output magnitudes:
238  *
239  * [ 303936  381820  367003  323692  303936  216555  151968   75984 ]
240  *
241  * Ultimately, these values must fit into 16 bit signed integers, so they must
242  * be scaled. A non-uniform scaling helps maximize the kept precision. The
243  * relative number of extra bits of precision maintainable with respect to the
244  * largest value is given here:
245  *
246  * [ 0  0  0  0  0  0  1  2 ]
247  *
248  */
dct2_8(SBC_BUFFER_T * RESTRICT out,int32_t const * RESTRICT in)249 PRIVATE void dct2_8(SBC_BUFFER_T* RESTRICT out, int32_t const* RESTRICT in) {
250 #define BUTTERFLY(x, y) \
251   x += (y);             \
252   (y) = (x) - ((y) << 1);
253 #define FIX_MULT_DCT(K, x) (MUL_32S_32S_HI(K, x) << 2)
254 
255   int32_t L00, L01, L02, L03, L04, L05, L06, L07;
256   int32_t L25;
257 
258   int32_t in0, in1, in2, in3;
259   int32_t in4, in5, in6, in7;
260 
261 #if DCTII_8_SHIFT_IN != 0
262   in0 = SCALE(in[0], DCTII_8_SHIFT_IN);
263   in1 = SCALE(in[1], DCTII_8_SHIFT_IN);
264   in2 = SCALE(in[2], DCTII_8_SHIFT_IN);
265   in3 = SCALE(in[3], DCTII_8_SHIFT_IN);
266   in4 = SCALE(in[4], DCTII_8_SHIFT_IN);
267   in5 = SCALE(in[5], DCTII_8_SHIFT_IN);
268   in6 = SCALE(in[6], DCTII_8_SHIFT_IN);
269   in7 = SCALE(in[7], DCTII_8_SHIFT_IN);
270 #else
271   in0 = in[0];
272   in1 = in[1];
273   in2 = in[2];
274   in3 = in[3];
275   in4 = in[4];
276   in5 = in[5];
277   in6 = in[6];
278   in7 = in[7];
279 #endif
280 
281   L00 = in0 + in7;
282   L01 = in1 + in6;
283   L02 = in2 + in5;
284   L03 = in3 + in4;
285 
286   L04 = in3 - in4;
287   L05 = in2 - in5;
288   L06 = in1 - in6;
289   L07 = in0 - in7;
290 
291   BUTTERFLY(L00, L03);
292   BUTTERFLY(L01, L02);
293 
294   L02 += L03;
295 
296   L02 = FIX_MULT_DCT(AAN_C4_FIX, L02);
297 
298   BUTTERFLY(L00, L01);
299 
300   out[0] = (int16_t)SCALE(L00, DCTII_8_SHIFT_0);
301   out[4] = (int16_t)SCALE(L01, DCTII_8_SHIFT_4);
302 
303   BUTTERFLY(L03, L02);
304   out[6] = (int16_t)SCALE(L02, DCTII_8_SHIFT_6);
305   out[2] = (int16_t)SCALE(L03, DCTII_8_SHIFT_2);
306 
307   L04 += L05;
308   L05 += L06;
309   L06 += L07;
310 
311   L04 /= 2;
312   L05 /= 2;
313   L06 /= 2;
314   L07 /= 2;
315 
316   L05 = FIX_MULT_DCT(AAN_C4_FIX, L05);
317 
318   L25 = L06 - L04;
319   L25 = FIX_MULT_DCT(AAN_C6_FIX, L25);
320 
321   L04 = FIX_MULT_DCT(AAN_Q0_FIX, L04);
322   L04 -= L25;
323 
324   L06 = FIX_MULT_DCT(AAN_Q1_FIX, L06);
325   L06 -= L25;
326 
327   BUTTERFLY(L07, L05);
328 
329   BUTTERFLY(L05, L04);
330   out[3] = (int16_t)SCALE(L04, DCTII_8_SHIFT_3 - 1);
331   out[5] = (int16_t)SCALE(L05, DCTII_8_SHIFT_5 - 1);
332 
333   BUTTERFLY(L07, L06);
334   out[7] = (int16_t)SCALE(L06, DCTII_8_SHIFT_7 - 1);
335   out[1] = (int16_t)SCALE(L07, DCTII_8_SHIFT_1 - 1);
336 #undef BUTTERFLY
337 
338 #ifdef DEBUG_DCT
339   {
340     float float_out[8];
341     float_dct2_8(float_out, in);
342   }
343 #endif
344 }
345 
346 /**@}*/
347