1 /****************************************************************************** 2 * 3 * Copyright (C) 2014 The Android Open Source Project 4 * Copyright 2003 - 2004 Open Interface North America, Inc. All rights reserved. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at: 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 * 18 ******************************************************************************/ 19 20 /********************************************************************************** 21 $Revision: #1 $ 22 ***********************************************************************************/ 23 24 /** @file 25 @ingroup codec_internal 26 */ 27 28 /**@addgroup codec_internal*/ 29 /**@{*/ 30 31 /* 32 * Performs an 8-point Type-II scaled DCT using the Arai-Agui-Nakajima 33 * factorization. The scaling factors are folded into the windowing 34 * constants. 29 adds and 5 16x32 multiplies per 8 samples. 35 */ 36 37 #include "oi_codec_sbc_private.h" 38 39 #define AAN_C4_FIX (759250125)/* S1.30 759250125 0.707107*/ 40 41 #define AAN_C6_FIX (410903207)/* S1.30 410903207 0.382683*/ 42 43 #define AAN_Q0_FIX (581104888)/* S1.30 581104888 0.541196*/ 44 45 #define AAN_Q1_FIX (1402911301)/* S1.30 1402911301 1.306563*/ 46 47 /** Scales x by y bits to the right, adding a rounding factor. 48 */ 49 #ifndef SCALE 50 #define SCALE(x, y) (((x) + (1 <<((y)-1))) >> (y)) 51 #endif 52 53 /** 54 * Default C language implementation of a 32x32->32 multiply. This function may 55 * be replaced by a platform-specific version for speed. 56 * 57 * @param u A signed 32-bit multiplicand 58 * @param v A signed 32-bit multiplier 59 60 * @return A signed 32-bit value corresponding to the 32 most significant bits 61 * of the 64-bit product of u and v. 62 */ 63 INLINE OI_INT32 default_mul_32s_32s_hi(OI_INT32 u, OI_INT32 v); 64 INLINE OI_INT32 default_mul_32s_32s_hi(OI_INT32 u, OI_INT32 v) 65 { 66 OI_UINT32 u0, v0; 67 OI_INT32 u1, v1, w1, w2, t; 68 69 u0 = u & 0xFFFF; u1 = u >> 16; 70 v0 = v & 0xFFFF; v1 = v >> 16; 71 t = u0*v0; 72 t = (u1*v0) + ((OI_UINT32)t >> 16); 73 w1 = t & 0xFFFF; 74 w2 = t >> 16; 75 w1 = (u0*v1) + w1; 76 return (u1*v1) + w2 + (w1 >> 16); 77 } 78 79 #define MUL_32S_32S_HI(_x, _y) default_mul_32s_32s_hi(_x, _y) 80 81 82 #ifdef DEBUG_DCT 83 PRIVATE void float_dct2_8(float * RESTRICT out, OI_INT32 const *RESTRICT in) 84 { 85 #define FIX(x,bits) (((int)floor(0.5f+((x)*((float)(1<<bits)))))/((float)(1<<bits))) 86 #define FLOAT_BUTTERFLY(x,y) x += y; y = x - (y*2); OI_ASSERT(VALID_INT32(x)); OI_ASSERT(VALID_INT32(y)); 87 #define FLOAT_MULT_DCT(K, sample) (FIX(K,20) * sample) 88 #define FLOAT_SCALE(x, y) (((x) / (double)(1 << (y)))) 89 90 double L00,L01,L02,L03,L04,L05,L06,L07; 91 double L25; 92 93 double in0,in1,in2,in3; 94 double in4,in5,in6,in7; 95 96 in0 = FLOAT_SCALE(in[0], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in0)); 97 in1 = FLOAT_SCALE(in[1], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in1)); 98 in2 = FLOAT_SCALE(in[2], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in2)); 99 in3 = FLOAT_SCALE(in[3], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in3)); 100 in4 = FLOAT_SCALE(in[4], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in4)); 101 in5 = FLOAT_SCALE(in[5], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in5)); 102 in6 = FLOAT_SCALE(in[6], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in6)); 103 in7 = FLOAT_SCALE(in[7], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in7)); 104 105 L00 = (in0 + in7); OI_ASSERT(VALID_INT32(L00)); 106 L01 = (in1 + in6); OI_ASSERT(VALID_INT32(L01)); 107 L02 = (in2 + in5); OI_ASSERT(VALID_INT32(L02)); 108 L03 = (in3 + in4); OI_ASSERT(VALID_INT32(L03)); 109 110 L04 = (in3 - in4); OI_ASSERT(VALID_INT32(L04)); 111 L05 = (in2 - in5); OI_ASSERT(VALID_INT32(L05)); 112 L06 = (in1 - in6); OI_ASSERT(VALID_INT32(L06)); 113 L07 = (in0 - in7); OI_ASSERT(VALID_INT32(L07)); 114 115 FLOAT_BUTTERFLY(L00, L03); 116 FLOAT_BUTTERFLY(L01, L02); 117 118 L02 += L03; OI_ASSERT(VALID_INT32(L02)); 119 120 L02 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L02); OI_ASSERT(VALID_INT32(L02)); 121 122 FLOAT_BUTTERFLY(L00, L01); 123 124 out[0] = (float)FLOAT_SCALE(L00, DCTII_8_SHIFT_0); OI_ASSERT(VALID_INT16(out[0])); 125 out[4] = (float)FLOAT_SCALE(L01, DCTII_8_SHIFT_4); OI_ASSERT(VALID_INT16(out[4])); 126 127 FLOAT_BUTTERFLY(L03, L02); 128 out[6] = (float)FLOAT_SCALE(L02, DCTII_8_SHIFT_6); OI_ASSERT(VALID_INT16(out[6])); 129 out[2] = (float)FLOAT_SCALE(L03, DCTII_8_SHIFT_2); OI_ASSERT(VALID_INT16(out[2])); 130 131 L04 += L05; OI_ASSERT(VALID_INT32(L04)); 132 L05 += L06; OI_ASSERT(VALID_INT32(L05)); 133 L06 += L07; OI_ASSERT(VALID_INT32(L06)); 134 135 L04/=2; 136 L05/=2; 137 L06/=2; 138 L07/=2; 139 140 L05 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L05); OI_ASSERT(VALID_INT32(L05)); 141 142 L25 = L06 - L04; OI_ASSERT(VALID_INT32(L25)); 143 L25 = FLOAT_MULT_DCT(AAN_C6_FLOAT, L25); OI_ASSERT(VALID_INT32(L25)); 144 145 L04 = FLOAT_MULT_DCT(AAN_Q0_FLOAT, L04); OI_ASSERT(VALID_INT32(L04)); 146 L04 -= L25; OI_ASSERT(VALID_INT32(L04)); 147 148 L06 = FLOAT_MULT_DCT(AAN_Q1_FLOAT, L06); OI_ASSERT(VALID_INT32(L06)); 149 L06 -= L25; OI_ASSERT(VALID_INT32(L25)); 150 151 FLOAT_BUTTERFLY(L07, L05); 152 153 FLOAT_BUTTERFLY(L05, L04); 154 out[3] = (float)(FLOAT_SCALE(L04, DCTII_8_SHIFT_3-1)); OI_ASSERT(VALID_INT16(out[3])); 155 out[5] = (float)(FLOAT_SCALE(L05, DCTII_8_SHIFT_5-1)); OI_ASSERT(VALID_INT16(out[5])); 156 157 FLOAT_BUTTERFLY(L07, L06); 158 out[7] = (float)(FLOAT_SCALE(L06, DCTII_8_SHIFT_7-1)); OI_ASSERT(VALID_INT16(out[7])); 159 out[1] = (float)(FLOAT_SCALE(L07, DCTII_8_SHIFT_1-1)); OI_ASSERT(VALID_INT16(out[1])); 160 } 161 #undef BUTTERFLY 162 #endif 163 164 165 /* 166 * This function calculates the AAN DCT. Its inputs are in S16.15 format, as 167 * returned by OI_SBC_Dequant. In practice, abs(in[x]) < 52429.0 / 1.38 168 * (1244918057 integer). The function it computes is an approximation to the array defined 169 * by: 170 * 171 * diag(aan_s) * AAN= C2 172 * 173 * or 174 * 175 * AAN = diag(1/aan_s) * C2 176 * 177 * where C2 is as it is defined in the comment at the head of this file, and 178 * 179 * aan_s[i] = aan_s = 1/(2*cos(i*pi/16)) with i = 1..7, aan_s[0] = 1; 180 * 181 * aan_s[i] = [ 1.000 0.510 0.541 0.601 0.707 0.900 1.307 2.563 ] 182 * 183 * The output ranges are shown as follows: 184 * 185 * Let Y[0..7] = AAN * X[0..7] 186 * 187 * Without loss of generality, assume the input vector X consists of elements 188 * between -1 and 1. The maximum possible value of a given output element occurs 189 * with some particular combination of input vector elements each of which is -1 190 * or 1. Consider the computation of Y[i]. Y[i] = sum t=0..7 of AAN[t,i]*X[i]. Y is 191 * maximized if the sign of X[i] matches the sign of AAN[t,i], ensuring a 192 * positive contribution to the sum. Equivalently, one may simply sum 193 * abs(AAN)[t,i] over t to get the maximum possible value of Y[i]. 194 * 195 * This yields approximately [8.00 10.05 9.66 8.52 8.00 5.70 4.00 2.00] 196 * 197 * Given the maximum magnitude sensible input value of +/-37992, this yields the 198 * following vector of maximum output magnitudes: 199 * 200 * [ 303936 381820 367003 323692 303936 216555 151968 75984 ] 201 * 202 * Ultimately, these values must fit into 16 bit signed integers, so they must 203 * be scaled. A non-uniform scaling helps maximize the kept precision. The 204 * relative number of extra bits of precision maintainable with respect to the 205 * largest value is given here: 206 * 207 * [ 0 0 0 0 0 0 1 2 ] 208 * 209 */ 210 PRIVATE void dct2_8(SBC_BUFFER_T * RESTRICT out, OI_INT32 const *RESTRICT in); 211 PRIVATE void dct2_8(SBC_BUFFER_T * RESTRICT out, OI_INT32 const *RESTRICT in) 212 { 213 #define BUTTERFLY(x,y) x += y; y = x - (y<<1); 214 #define FIX_MULT_DCT(K, x) (MUL_32S_32S_HI(K,x)<<2) 215 216 OI_INT32 L00,L01,L02,L03,L04,L05,L06,L07; 217 OI_INT32 L25; 218 219 OI_INT32 in0,in1,in2,in3; 220 OI_INT32 in4,in5,in6,in7; 221 222 #if DCTII_8_SHIFT_IN != 0 223 in0 = SCALE(in[0], DCTII_8_SHIFT_IN); 224 in1 = SCALE(in[1], DCTII_8_SHIFT_IN); 225 in2 = SCALE(in[2], DCTII_8_SHIFT_IN); 226 in3 = SCALE(in[3], DCTII_8_SHIFT_IN); 227 in4 = SCALE(in[4], DCTII_8_SHIFT_IN); 228 in5 = SCALE(in[5], DCTII_8_SHIFT_IN); 229 in6 = SCALE(in[6], DCTII_8_SHIFT_IN); 230 in7 = SCALE(in[7], DCTII_8_SHIFT_IN); 231 #else 232 in0 = in[0]; 233 in1 = in[1]; 234 in2 = in[2]; 235 in3 = in[3]; 236 in4 = in[4]; 237 in5 = in[5]; 238 in6 = in[6]; 239 in7 = in[7]; 240 #endif 241 242 L00 = in0 + in7; 243 L01 = in1 + in6; 244 L02 = in2 + in5; 245 L03 = in3 + in4; 246 247 L04 = in3 - in4; 248 L05 = in2 - in5; 249 L06 = in1 - in6; 250 L07 = in0 - in7; 251 252 BUTTERFLY(L00, L03); 253 BUTTERFLY(L01, L02); 254 255 L02 += L03; 256 257 L02 = FIX_MULT_DCT(AAN_C4_FIX, L02); 258 259 BUTTERFLY(L00, L01); 260 261 out[0] = (OI_INT16)SCALE(L00, DCTII_8_SHIFT_0); 262 out[4] = (OI_INT16)SCALE(L01, DCTII_8_SHIFT_4); 263 264 BUTTERFLY(L03, L02); 265 out[6] = (OI_INT16)SCALE(L02, DCTII_8_SHIFT_6); 266 out[2] = (OI_INT16)SCALE(L03, DCTII_8_SHIFT_2); 267 268 L04 += L05; 269 L05 += L06; 270 L06 += L07; 271 272 L04/=2; 273 L05/=2; 274 L06/=2; 275 L07/=2; 276 277 L05 = FIX_MULT_DCT(AAN_C4_FIX, L05); 278 279 L25 = L06 - L04; 280 L25 = FIX_MULT_DCT(AAN_C6_FIX, L25); 281 282 L04 = FIX_MULT_DCT(AAN_Q0_FIX, L04); 283 L04 -= L25; 284 285 L06 = FIX_MULT_DCT(AAN_Q1_FIX, L06); 286 L06 -= L25; 287 288 BUTTERFLY(L07, L05); 289 290 BUTTERFLY(L05, L04); 291 out[3] = (OI_INT16)SCALE(L04, DCTII_8_SHIFT_3-1); 292 out[5] = (OI_INT16)SCALE(L05, DCTII_8_SHIFT_5-1); 293 294 BUTTERFLY(L07, L06); 295 out[7] = (OI_INT16)SCALE(L06, DCTII_8_SHIFT_7-1); 296 out[1] = (OI_INT16)SCALE(L07, DCTII_8_SHIFT_1-1); 297 #undef BUTTERFLY 298 299 #ifdef DEBUG_DCT 300 { 301 float float_out[8]; 302 float_dct2_8(float_out, in); 303 } 304 #endif 305 } 306 307 /**@}*/ 308