xref: /aosp_15_r20/external/libavc/decoder/svc/isvcd_iquant_itrans.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2022 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 /**
21 *******************************************************************************
22 * @file
23 *  isvcd_iquant_itrans.c
24 *
25 * @brief
26 *  Contains definition of functions for h264 inverse quantization inverse
27 transformation and resd comp
28 *
29 * @author
30 *  Kishore
31 *
32 *  @par List of Functions:
33 *  - isvcd_iquant_itrans_4x4()
34 *  - isvcd_iquant_itrans_8x8()
35 *  - isvcd_iquant_itrans_4x4_dc()
36 *  - isvcd_iquant_itrans_8x8_dc()
37 *  - isvcd_iquant_itrans_chroma_4x4()
38 *  - isvcd_iquant_itrans_chroma_4x4_dc()
39 
40 *
41 * @remarks
42 *
43 *******************************************************************************
44 */
45 
46 /*****************************************************************************/
47 /* File Includes                                                             */
48 /*****************************************************************************/
49 
50 /* User include files */
51 #include "ih264_typedefs.h"
52 #include "ih264_defs.h"
53 #include "ih264_trans_macros.h"
54 #include "ih264_macros.h"
55 #include "ih264_platform_macros.h"
56 #include "ih264_trans_data.h"
57 #include "ih264_size_defs.h"
58 #include "ih264_structs.h"
59 #include "isvcd_iquant_itrans.h"
60 
61 /*****************************************************************************/
62 /*                                                                           */
63 /*  Function Name : isvcd_iquant_itrans_4x4                                   */
64 /*                                                                           */
65 /*  Description   : this function computes the inverse quantized and         */
66 /*                   inverse transformed output                              */
67 /*                                                                           */
68 /*  Inputs        :                                                          */
69 /*  Globals       : none                                                     */
70 /*  Processing    :                                                          */
71 /*                                                                           */
72 /*  Outputs       : none                                                     */
73 /*  Returns       : none                                                     */
74 /*                                                                           */
75 /*  Issues        : none                                                     */
76 /*                                                                           */
77 /*  Revision History:                                                        */
78 /*                                                                           */
79 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
80 /*         25 11 2021   Kishore               creation                       */
81 /*                                                                           */
82 /*****************************************************************************/
isvcd_iquant_itrans_4x4(WORD16 * pi2_src,WORD16 * pi2_out,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)83 void isvcd_iquant_itrans_4x4(WORD16 *pi2_src, WORD16 *pi2_out, WORD32 out_strd,
84                              const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat,
85                              UWORD32 u4_qp_div_6, WORD16 *pi2_tmp, WORD32 iq_start_idx,
86                              WORD16 *pi2_dc_ld_addr)
87 {
88     WORD16 *pi2_src_ptr = pi2_src;
89     WORD16 *pi2_tmp_ptr = pi2_tmp;
90     WORD16 *pi2_out_ptr = pi2_out;
91     WORD16 x0, x1, x2, x3, i;
92     WORD32 q0, q1, q2, q3;
93     WORD16 i_macro;
94     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
95 
96     /* inverse quant */
97     /*horizontal inverse transform */
98     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
99     {
100         q0 = pi2_src_ptr[0];
101         INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
102         if(i == 0 && iq_start_idx == 1)
103             q0 = pi2_dc_ld_addr[0];  // Restoring dc value for intra case
104 
105         q2 = pi2_src_ptr[2];
106         INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
107 
108         x0 = q0 + q2;
109         x1 = q0 - q2;
110 
111         q1 = pi2_src_ptr[1];
112         INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
113 
114         q3 = pi2_src_ptr[3];
115         INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
116 
117         x2 = (q1 >> 1) - q3;
118         x3 = q1 + (q3 >> 1);
119 
120         pi2_tmp_ptr[0] = x0 + x3;
121         pi2_tmp_ptr[1] = x1 + x2;
122         pi2_tmp_ptr[2] = x1 - x2;
123         pi2_tmp_ptr[3] = x0 - x3;
124 
125         pi2_src_ptr += SUB_BLK_WIDTH_4x4;
126         pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
127         pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
128         pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
129     }
130 
131     /* vertical inverse transform */
132     pi2_tmp_ptr = pi2_tmp;
133     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
134     {
135         pi2_out = pi2_out_ptr;
136 
137         x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
138         x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
139         x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
140         x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
141 
142         /* inverse prediction */
143         i_macro = x0 + x3;
144         *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
145         pi2_out += out_strd;
146 
147         i_macro = x1 + x2;
148         *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
149         pi2_out += out_strd;
150 
151         i_macro = x1 - x2;
152         *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
153         pi2_out += out_strd;
154 
155         i_macro = x0 - x3;
156         *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
157         pi2_tmp_ptr++;
158         pi2_out_ptr++;
159     }
160 }
161 
162 /*****************************************************************************/
163 /*                                                                           */
164 /*  Function Name : isvcd_iquant_itrans_4x4_dc                                */
165 /*                                                                           */
166 /*  Description   : this function computes the inverse quantized and         */
167 /*                   inverse transformed output                              */
168 /*                                                                           */
169 /*  Inputs        :                                                          */
170 /*  Globals       : none                                                     */
171 /*  Processing    :                                                          */
172 /*                                                                           */
173 /*  Outputs       : none                                                     */
174 /*  Returns       : none                                                     */
175 /*                                                                           */
176 /*  Issues        : none                                                     */
177 /*                                                                           */
178 /*  Revision History:                                                        */
179 /*                                                                           */
180 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
181 /*         25 11 2021   Kishore               creation                       */
182 /*                                                                           */
183 /*****************************************************************************/
isvcd_iquant_itrans_4x4_dc(WORD16 * pi2_src,WORD16 * pi2_out,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)184 void isvcd_iquant_itrans_4x4_dc(WORD16 *pi2_src, WORD16 *pi2_out, WORD32 out_strd,
185                                 const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat,
186                                 UWORD32 u4_qp_div_6, WORD16 *pi2_tmp, WORD32 iq_start_idx,
187                                 WORD16 *pi2_dc_ld_addr)
188 {
189     WORD16 *pi2_out_ptr = pi2_out;
190     WORD32 q0;
191     WORD16 i_macro, i;
192     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
193     UNUSED(pi2_tmp);
194 
195     if(iq_start_idx == 0)
196     {
197         q0 = pi2_src[0];
198         INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
199     }
200     else
201     {
202         q0 = pi2_dc_ld_addr[0];  // Restoring dc value for intra case3
203     }
204     i_macro = CLIP_RSD((q0 + 32) >> 6);
205     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
206     {
207         pi2_out = pi2_out_ptr;
208 
209         /* inverse prediction */
210         *pi2_out = i_macro;
211         pi2_out += out_strd;
212         *pi2_out = i_macro;
213         pi2_out += out_strd;
214         *pi2_out = i_macro;
215         pi2_out += out_strd;
216         *pi2_out = i_macro;
217         pi2_out_ptr++;
218     }
219 }
220 
221 /*****************************************************************************/
222 /*                                                                           */
223 /*  Function Name : isvcd_iquant_itrans_chroma_4x4                            */
224 /*                                                                           */
225 /*  Description   : this function computes the inverse quantized and         */
226 /*                   inverse transformed output                              */
227 /*                                                                           */
228 /*  Inputs        :                                                          */
229 /*  Globals       : none                                                     */
230 /*  Processing    :                                                          */
231 /*                                                                           */
232 /*  Outputs       : none                                                     */
233 /*  Returns       : none                                                     */
234 /*                                                                           */
235 /*  Issues        : none                                                     */
236 /*                                                                           */
237 /*  Revision History:                                                        */
238 /*                                                                           */
239 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
240 /*         25 11 2021   Kishore               creation                       */
241 /*                                                                           */
242 /*****************************************************************************/
isvcd_iquant_itrans_chroma_4x4(WORD16 * pi2_src,WORD16 * pi2_out,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)243 void isvcd_iquant_itrans_chroma_4x4(WORD16 *pi2_src, WORD16 *pi2_out, WORD32 out_strd,
244                                     const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat,
245                                     UWORD32 u4_qp_div_6, WORD16 *pi2_tmp, WORD16 *pi2_dc_src)
246 {
247     WORD16 *pi2_src_ptr = pi2_src;
248     WORD16 *pi2_tmp_ptr = pi2_tmp;
249     WORD16 *pi2_out_ptr = pi2_out;
250     WORD16 x0, x1, x2, x3, i;
251     WORD32 q0, q1, q2, q3;
252     WORD16 i_macro;
253     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
254 
255     /* inverse quant */
256     /*horizontal inverse transform */
257     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
258     {
259         if(i == 0)
260         {
261             q0 = pi2_dc_src[0];
262         }
263         else
264         {
265             q0 = pi2_src_ptr[0];
266             INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
267         }
268 
269         q2 = pi2_src_ptr[2];
270         INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
271 
272         x0 = q0 + q2;
273         x1 = q0 - q2;
274 
275         q1 = pi2_src_ptr[1];
276         INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
277 
278         q3 = pi2_src_ptr[3];
279         INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
280 
281         x2 = (q1 >> 1) - q3;
282         x3 = q1 + (q3 >> 1);
283 
284         pi2_tmp_ptr[0] = x0 + x3;
285         pi2_tmp_ptr[1] = x1 + x2;
286         pi2_tmp_ptr[2] = x1 - x2;
287         pi2_tmp_ptr[3] = x0 - x3;
288 
289         pi2_src_ptr += SUB_BLK_WIDTH_4x4;
290         pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
291         pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
292         pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
293     }
294 
295     /* vertical inverse transform */
296     pi2_tmp_ptr = pi2_tmp;
297     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
298     {
299         pi2_out = pi2_out_ptr;
300 
301         x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
302         x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
303         x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
304         x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
305 
306         /* inverse prediction */
307         i_macro = x0 + x3;
308         *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
309         pi2_out += out_strd;
310 
311         i_macro = x1 + x2;
312         *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
313         pi2_out += out_strd;
314 
315         i_macro = x1 - x2;
316         *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
317         pi2_out += out_strd;
318 
319         i_macro = x0 - x3;
320         *pi2_out = CLIP_RSD((i_macro + 32) >> 6);
321         pi2_tmp_ptr++;
322         pi2_out_ptr += 2;  // Interleaved store for output
323     }
324 }
325 /*****************************************************************************/
326 /*                                                                           */
327 /*  Function Name : isvcd_iquant_itrans_chroma_4x4_dc                         */
328 /*                                                                           */
329 /*  Description   : this function computes the inverse quantized and         */
330 /*                   inverse transformed output                              */
331 /*                                                                           */
332 /*  Inputs        :                                                          */
333 /*  Globals       : none                                                     */
334 /*  Processing    :                                                          */
335 /*                                                                           */
336 /*  Outputs       : none                                                     */
337 /*  Returns       : none                                                     */
338 /*                                                                           */
339 /*  Issues        : none                                                     */
340 /*                                                                           */
341 /*  Revision History:                                                        */
342 /*                                                                           */
343 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
344 /*         25 11 2021   Kishore               creation                       */
345 /*                                                                           */
346 /*****************************************************************************/
347 
isvcd_iquant_itrans_chroma_4x4_dc(WORD16 * pi2_src,WORD16 * pi2_out,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)348 void isvcd_iquant_itrans_chroma_4x4_dc(WORD16 *pi2_src, WORD16 *pi2_out, WORD32 out_strd,
349                                        const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat,
350                                        UWORD32 u4_qp_div_6, WORD16 *pi2_tmp, WORD16 *pi2_dc_src)
351 {
352     WORD16 *pi2_out_ptr = pi2_out;
353     WORD32 q0;
354     WORD16 i_macro, i;
355     UNUSED(pi2_src);
356     UNUSED(pu2_iscal_mat);
357     UNUSED(pu2_weigh_mat);
358     UNUSED(u4_qp_div_6);
359     UNUSED(pi2_tmp);
360 
361     q0 = pi2_dc_src[0];  // Restoring dc value for intra case3
362     i_macro = CLIP_RSD((q0 + 32) >> 6);
363 
364     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
365     {
366         pi2_out = pi2_out_ptr;
367 
368         /* inverse prediction */
369         *pi2_out = i_macro;
370         pi2_out += out_strd;
371 
372         *pi2_out = i_macro;
373         pi2_out += out_strd;
374 
375         *pi2_out = i_macro;
376         pi2_out += out_strd;
377 
378         *pi2_out = i_macro;
379 
380         pi2_out_ptr += 2;
381     }
382 }
383 
384 /**
385 *******************************************************************************
386 *
387 * @brief
388 *  This function performs inverse quant and Inverse transform type Ci4 for 8x8
389 *block
390 *
391 * @par Description:
392 *  Performs inverse transform Ci8 and adds the residue to get the
393 *  reconstructed block
394 *
395 * @param[in] pi2_src
396 *  Input 8x8coefficients
397 *
398 * @param[in] pu1_pred
399 *  Prediction 8x8 block
400 *
401 * @param[out] pu1_recon
402 *  Output 8x8 block
403 *
404 * @param[in] q_div
405 *  QP/6
406 *
407 * @param[in] q_rem
408 *  QP%6
409 *
410 * @param[in] q_lev
411 *  Quantizer level
412 *
413 * @param[in] src_strd
414 *  Input stride
415 *
416 * @param[in] pred_strd,
417 *  Prediction stride
418 *
419 * @param[in] out_strd
420 *  Output Stride
421 *
422 * @param[in] pi4_tmp
423 *  temporary buffer of size 1*16 we dont need a bigger blcok since we reuse
424 *  the tmp for each block
425 *
426 * @param[in] pu4_iquant_mat
427 *  Pointer to the inverse quantization matrix
428 *
429 * @returns  Void
430 *
431 * @remarks
432 *  None
433 *
434 *******************************************************************************
435 */
isvcd_iquant_itrans_8x8_dc(WORD16 * pi2_src,WORD16 * pi2_out,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)436 void isvcd_iquant_itrans_8x8_dc(WORD16 *pi2_src, WORD16 *pi2_out, WORD32 out_strd,
437                                 const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat,
438                                 UWORD32 qp_div, WORD16 *pi2_tmp, WORD32 iq_start_idx,
439                                 WORD16 *pi2_dc_ld_addr)
440 {
441     WORD16 *pi2_out_ptr = pi2_out;
442     WORD16 i, i_macro;
443     WORD32 q;
444     WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
445     UNUSED(pi2_tmp);
446     UNUSED(iq_start_idx);
447     UNUSED(pi2_dc_ld_addr);
448     /*************************************************************/
449     /* Dequantization of coefficients. Will be replaced by SIMD  */
450     /* operations on platform. Note : DC coeff is not scaled     */
451     /*************************************************************/
452     q = pi2_src[0];
453     INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
454     i_macro = CLIP_RSD((q + 32) >> 6);
455     /* Perform Inverse transform */
456     /*--------------------------------------------------------------------*/
457     /* IDCT [ Horizontal transformation ]                                 */
458     /*--------------------------------------------------------------------*/
459     /*--------------------------------------------------------------------*/
460     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
461     /*                                                                    */
462     /* Add the prediction and store it back to reconstructed frame buffer */
463     /* [Prediction buffer itself in this case]                            */
464     /*--------------------------------------------------------------------*/
465     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
466     {
467         pi2_out = pi2_out_ptr;
468 
469         *pi2_out = i_macro;
470         /* Change uc_recBuffer to Point to next element in the same column*/
471         pi2_out += out_strd;
472 
473         *pi2_out = i_macro;
474         pi2_out += out_strd;
475 
476         *pi2_out = i_macro;
477         pi2_out += out_strd;
478 
479         *pi2_out = i_macro;
480         pi2_out += out_strd;
481 
482         *pi2_out = i_macro;
483         pi2_out += out_strd;
484 
485         *pi2_out = i_macro;
486         pi2_out += out_strd;
487 
488         *pi2_out = i_macro;
489         pi2_out += out_strd;
490 
491         *pi2_out = i_macro;
492 
493         pi2_out_ptr++;
494     }
495 }
496 /*****************************************************************************/
497 /*                                                                           */
498 /*  Function Name : isvcd_iquant_itrans_8x8                                   */
499 /*                                                                           */
500 /*  Description   : this function computes the inverse quantized and         */
501 /*                   inverse transformed output                              */
502 /*                                                                           */
503 /*  Inputs        :                                                          */
504 /*  Globals       : none                                                     */
505 /*  Processing    :                                                          */
506 /*                                                                           */
507 /*  Outputs       : none                                                     */
508 /*  Returns       : none                                                     */
509 /*                                                                           */
510 /*  Issues        : none                                                     */
511 /*                                                                           */
512 /*  Revision History:                                                        */
513 /*                                                                           */
514 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
515 /*         25 11 2021   Kishore               creation                       */
516 /*                                                                           */
517 /*****************************************************************************/
518 
isvcd_iquant_itrans_8x8(WORD16 * pi2_src,WORD16 * pi2_out,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)519 void isvcd_iquant_itrans_8x8(WORD16 *pi2_src, WORD16 *pi2_out, WORD32 out_strd,
520                              const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat,
521                              UWORD32 qp_div, WORD16 *pi2_tmp, WORD32 iq_start_idx,
522                              WORD16 *pi2_dc_ld_addr)
523 {
524     WORD32 i;
525     WORD16 *pi2_tmp_ptr = pi2_tmp;
526     WORD16 *pi2_out_ptr = pi2_out;
527     WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
528     WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
529     WORD32 q;
530     WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
531     UNUSED(iq_start_idx);
532     UNUSED(pi2_dc_ld_addr);
533     /*************************************************************/
534     /* De quantization of coefficients. Will be replaced by SIMD */
535     /* operations on platform. Note : DC coeff is not scaled     */
536     /*************************************************************/
537     for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
538     {
539         q = pi2_src[i];
540         INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
541         pi2_tmp_ptr[i] = q;
542     }
543     /* Perform Inverse transform */
544     /*--------------------------------------------------------------------*/
545     /* IDCT [ Horizontal transformation ]                                 */
546     /*--------------------------------------------------------------------*/
547     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
548     {
549         /*------------------------------------------------------------------*/
550         /* y0 = w0 + w4                                                     */
551         /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
552         /* y2 = w0 - w4                                                     */
553         /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
554         /* y4 = (w2 >> 1) - w6                                              */
555         /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
556         /* y6 = w2 + (w6 >> 1)                                              */
557         /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
558         /*------------------------------------------------------------------*/
559         i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4]);
560 
561         i_y1 =
562             ((WORD32) (-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7] - (pi2_tmp_ptr[7] >> 1));
563 
564         i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4]);
565 
566         i_y3 = ((WORD32) pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3] - (pi2_tmp_ptr[3] >> 1));
567 
568         i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6]);
569 
570         i_y5 =
571             ((WORD32) (-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5] + (pi2_tmp_ptr[5] >> 1));
572 
573         i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
574 
575         i_y7 = ((WORD32) pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1] + (pi2_tmp_ptr[1] >> 1));
576 
577         /*------------------------------------------------------------------*/
578         /* z0 = y0 + y6                                                     */
579         /* z1 = y1 + (y7 >> 2)                                              */
580         /* z2 = y2 + y4                                                     */
581         /* z3 = y3 + (y5 >> 2)                                              */
582         /* z4 = y2 - y4                                                     */
583         /* z5 = (y3 >> 2) - y5                                              */
584         /* z6 = y0 - y6                                                     */
585         /* z7 = y7 - (y1 >> 2)                                              */
586         /*------------------------------------------------------------------*/
587         i_z0 = i_y0 + i_y6;
588         i_z1 = i_y1 + (i_y7 >> 2);
589         i_z2 = i_y2 + i_y4;
590         i_z3 = i_y3 + (i_y5 >> 2);
591         i_z4 = i_y2 - i_y4;
592         i_z5 = (i_y3 >> 2) - i_y5;
593         i_z6 = i_y0 - i_y6;
594         i_z7 = i_y7 - (i_y1 >> 2);
595 
596         /*------------------------------------------------------------------*/
597         /* x0 = z0 + z7                                                     */
598         /* x1 = z2 + z5                                                     */
599         /* x2 = z4 + z3                                                     */
600         /* x3 = z6 + z1                                                     */
601         /* x4 = z6 - z1                                                     */
602         /* x5 = z4 - z3                                                     */
603         /* x6 = z2 - z5                                                     */
604         /* x7 = z0 - z7                                                     */
605         /*------------------------------------------------------------------*/
606         pi2_tmp_ptr[0] = i_z0 + i_z7;
607         pi2_tmp_ptr[1] = i_z2 + i_z5;
608         pi2_tmp_ptr[2] = i_z4 + i_z3;
609         pi2_tmp_ptr[3] = i_z6 + i_z1;
610         pi2_tmp_ptr[4] = i_z6 - i_z1;
611         pi2_tmp_ptr[5] = i_z4 - i_z3;
612         pi2_tmp_ptr[6] = i_z2 - i_z5;
613         pi2_tmp_ptr[7] = i_z0 - i_z7;
614 
615         /* move to the next row */
616         // pi2_src_ptr += SUB_BLK_WIDTH_8x8;
617         pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
618     }
619     /*--------------------------------------------------------------------*/
620     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
621     /*                                                                    */
622     /* Add the prediction and store it back to reconstructed frame buffer */
623     /* [Prediction buffer itself in this case]                            */
624     /*--------------------------------------------------------------------*/
625 
626     pi2_tmp_ptr = pi2_tmp;
627     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
628     {
629         pi2_out = pi2_out_ptr;
630         /*------------------------------------------------------------------*/
631         /* y0j = w0j + w4j                                                  */
632         /* y1j = -w3j + w5j -w7j -(w7j >> 1)                                */
633         /* y2j = w0j -w4j                                                   */
634         /* y3j = w1j + w7j -w3j -(w3j >> 1)                                 */
635         /* y4j = ( w2j >> 1 ) -w6j                                          */
636         /* y5j = -w1j + w7j + w5j + (w5j >> 1)                              */
637         /* y6j = w2j + ( w6j >> 1 )                                         */
638         /* y7j = w3j + w5j + w1j + (w1j >> 1)                               */
639         /*------------------------------------------------------------------*/
640         i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
641 
642         i_y1 = (WORD32) (-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56] -
643                (pi2_tmp_ptr[56] >> 1);
644 
645         i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
646 
647         i_y3 = (WORD32) pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24] - (pi2_tmp_ptr[24] >> 1);
648 
649         i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
650 
651         i_y5 =
652             (WORD32) (-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40] + (pi2_tmp_ptr[40] >> 1);
653 
654         i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
655 
656         i_y7 = (WORD32) pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8] + (pi2_tmp_ptr[8] >> 1);
657 
658         /*------------------------------------------------------------------*/
659         /* z0j = y0j + y6j                                                  */
660         /* z1j = y1j + (y7j >> 2)                                           */
661         /* z2j = y2j + y4j                                                  */
662         /* z3j = y3j + (y5j >> 2)                                           */
663         /* z4j = y2j -y4j                                                   */
664         /* z5j = (y3j >> 2) -y5j                                            */
665         /* z6j = y0j -y6j                                                   */
666         /* z7j = y7j -(y1j >> 2)                                            */
667         /*------------------------------------------------------------------*/
668         i_z0 = i_y0 + i_y6;
669         i_z1 = i_y1 + (i_y7 >> 2);
670         i_z2 = i_y2 + i_y4;
671         i_z3 = i_y3 + (i_y5 >> 2);
672         i_z4 = i_y2 - i_y4;
673         i_z5 = (i_y3 >> 2) - i_y5;
674         i_z6 = i_y0 - i_y6;
675         i_z7 = i_y7 - (i_y1 >> 2);
676 
677         /*------------------------------------------------------------------*/
678         /* x0j = z0j + z7j                                                  */
679         /* x1j = z2j + z5j                                                  */
680         /* x2j = z4j + z3j                                                  */
681         /* x3j = z6j + z1j                                                  */
682         /* x4j = z6j -z1j                                                   */
683         /* x5j = z4j -z3j                                                   */
684         /* x6j = z2j -z5j                                                   */
685         /* x7j = z0j -z7j                                                   */
686         /*------------------------------------------------------------------*/
687         *pi2_out = CLIP_RSD((i_z0 + i_z7 + 32) >> 6);
688         /* Change uc_recBuffer to Point to next element in the same column*/
689         pi2_out += out_strd;
690 
691         *pi2_out = CLIP_RSD((i_z2 + i_z5 + 32) >> 6);
692         pi2_out += out_strd;
693 
694         *pi2_out = CLIP_RSD((i_z4 + i_z3 + 32) >> 6);
695         pi2_out += out_strd;
696 
697         *pi2_out = CLIP_RSD((i_z6 + i_z1 + 32) >> 6);
698         pi2_out += out_strd;
699 
700         *pi2_out = CLIP_RSD((i_z6 - i_z1 + 32) >> 6);
701         pi2_out += out_strd;
702 
703         *pi2_out = CLIP_RSD((i_z4 - i_z3 + 32) >> 6);
704         pi2_out += out_strd;
705 
706         *pi2_out = CLIP_RSD((i_z2 - i_z5 + 32) >> 6);
707         pi2_out += out_strd;
708 
709         *pi2_out = CLIP_RSD((i_z0 - i_z7 + 32) >> 6);
710 
711         pi2_tmp_ptr++;
712         pi2_out_ptr++;
713     }
714 }
715