xref: /aosp_15_r20/external/libavc/common/ih264_iquant_itrans_recon.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  ih264_iquant_itrans_recon.c
25 *
26 * @brief
27 *  Contains definition of functions for h264 inverse quantization,
28 *  inverse transformation and recon
29 *
30 * @author
31 *  ittiam
32 *
33 * @par List of Functions:
34 *  - ih264_iquant_itrans_recon_4x4
35 *  - ih264_iquant_itrans_recon_8x8
36 *  - ih264_iquant_itrans_recon_4x4_dc
37 *  - ih264_iquant_itrans_recon_8x8_dc
38 *  - ih264_iquant_itrans_recon_chroma_4x4
39 *  - ih264_iquant_itrans_recon_chroma_4x4_dc
40 *
41 * @remarks
42 *
43 *******************************************************************************
44 */
45 
46 /*****************************************************************************/
47 /* File Includes                                                             */
48 /*****************************************************************************/
49 
50 /* User Include Files */
51 #include "ih264_typedefs.h"
52 #include "ih264_defs.h"
53 #include "ih264_macros.h"
54 #include "ih264_size_defs.h"
55 #include "ih264_trans_macros.h"
56 #include "ih264_trans_data.h"
57 #include "ih264_structs.h"
58 #include "ih264_trans_quant_itrans_iquant.h"
59 #include "ih264_platform_macros.h"
60 
61 /*****************************************************************************/
62 /*  Function definitions                                                     */
63 /*****************************************************************************/
64 
65 /**
66 ********************************************************************************
67 *
68 * @brief This function reconstructs a 4x4 sub block from quantized residue and
69 *  prediction buffer
70 *
71 * @par Description:
72 *  The quantized residue is first inverse quantized, then inverse transformed.
73 *  This inverse transformed content is added to the prediction buffer to recon-
74 *  struct the end output
75 *
76 * @param[in] pi2_src
77 *  quantized 4x4 block
78 *
79 * @param[in] pu1_pred
80 *  prediction 4x4 block
81 *
82 * @param[out] pu1_out
83 *  reconstructed 4x4 block
84 *
85 * @param[in] pred_strd
86 *  Prediction buffer stride
87 *
88 * @param[in] out_strd
89 *  recon buffer Stride
90 *
91 * @param[in] pu2_iscal_mat
92 *  pointer to inverse scaling matrix
93 *
94 * @param[in] pu2_weigh_mat
95 *  pointer to weight matrix
96 *
97 * @param[in] u4_qp_div_6
98 *  Floor (qp/6)
99 *
100 * @param[in] pi2_tmp
101 *  temporary buffer of size 1*16
102 *
103 * @param[in] iq_start_idx
104 *  Differentiates b/w intra or inter
105 *
106 * @param[in] pi2_dc_ld_addr
107 *  Address to load DC value of the 4x4 blk
108 *
109 * @returns none
110 *
111 * @remarks none
112 *
113 *******************************************************************************
114 */
ih264_iquant_itrans_recon_4x4(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)115 void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src,
116                                    UWORD8 *pu1_pred,
117                                    UWORD8 *pu1_out,
118                                    WORD32 pred_strd,
119                                    WORD32 out_strd,
120                                    const UWORD16 *pu2_iscal_mat,
121                                    const UWORD16 *pu2_weigh_mat,
122                                    UWORD32 u4_qp_div_6,
123                                    WORD16 *pi2_tmp,
124                                    WORD32 iq_start_idx,
125                                    WORD16 *pi2_dc_ld_addr)
126 {
127     WORD16 *pi2_src_ptr = pi2_src;
128     WORD16 *pi2_tmp_ptr = pi2_tmp;
129     UWORD8 *pu1_pred_ptr = pu1_pred;
130     UWORD8 *pu1_out_ptr = pu1_out;
131     WORD16 x0, x1, x2, x3, i;
132     WORD32 q0, q1, q2, q3;
133     WORD16 i_macro;
134     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
135 
136     /* inverse quant */
137     /* horizontal inverse transform */
138     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
139     {
140         q0 = pi2_src_ptr[0];
141         INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
142         /* Restoring dc value for intra case */
143         if (i==0 && iq_start_idx == 1)
144         {
145             q0 = pi2_dc_ld_addr[0];
146         }
147 
148         q2 = pi2_src_ptr[2];
149         INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
150 
151         x0 = q0 + q2;
152         x1 = q0 - q2;
153 
154         q1 = pi2_src_ptr[1];
155         INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
156 
157         q3 = pi2_src_ptr[3];
158         INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
159 
160         x2 = (q1 >> 1) - q3;
161         x3 = q1 + (q3 >> 1);
162 
163         pi2_tmp_ptr[0] = x0 + x3;
164         pi2_tmp_ptr[1] = x1 + x2;
165         pi2_tmp_ptr[2] = x1 - x2;
166         pi2_tmp_ptr[3] = x0 - x3;
167 
168         pi2_src_ptr += SUB_BLK_WIDTH_4x4;
169         pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
170         pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
171         pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
172     }
173 
174     /* vertical inverse transform */
175     pi2_tmp_ptr = pi2_tmp;
176     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
177     {
178         pu1_pred_ptr = pu1_pred;
179         pu1_out = pu1_out_ptr;
180 
181         x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
182         x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
183         x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
184         x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
185 
186         /* inverse prediction */
187         i_macro = x0 + x3;
188         i_macro = ((i_macro + 32) >> 6);
189         i_macro += *pu1_pred_ptr;
190         *pu1_out = CLIP_U8(i_macro);
191         pu1_pred_ptr += pred_strd;
192         pu1_out += out_strd;
193 
194         i_macro = x1 + x2;
195         i_macro = ((i_macro + 32) >> 6);
196         i_macro += *pu1_pred_ptr;
197         *pu1_out = CLIP_U8(i_macro);
198         pu1_pred_ptr += pred_strd;
199         pu1_out += out_strd;
200 
201         i_macro = x1 - x2;
202         i_macro = ((i_macro + 32) >> 6);
203         i_macro += *pu1_pred_ptr;
204         *pu1_out = CLIP_U8(i_macro);
205         pu1_pred_ptr += pred_strd;
206         pu1_out += out_strd;
207 
208         i_macro = x0 - x3;
209         i_macro = ((i_macro + 32) >> 6);
210         i_macro += *pu1_pred_ptr;
211         *pu1_out = CLIP_U8(i_macro);
212 
213         pi2_tmp_ptr++;
214         pu1_out_ptr++;
215         pu1_pred++;
216     }
217 }
218 
219 /**
220 ********************************************************************************
221 *
222 * @brief This function reconstructs a 4x4 sub block from quantized residue and
223 *  prediction buffer, if only dc value is present for residue
224 *
225 * @par Description:
226 *  The quantized residue is first inverse quantized, then inverse transformed.
227 *  This inverse transformed content is added to the prediction buffer to recon-
228 *  struct the end output
229 *
230 * @param[in] pi2_src
231 *  quantized 4x4 block
232 *
233 * @param[in] pu1_pred
234 *  prediction 4x4 block
235 *
236 * @param[out] pu1_out
237 *  reconstructed 4x4 block
238 *
239 * @param[in] pred_strd
240 *  Prediction buffer stride
241 *
242 * @param[in] out_strd
243 *  recon buffer Stride
244 *
245 * @param[in] pu2_iscal_mat
246 *  pointer to inverse scaling matrix
247 *
248 * @param[in] pu2_weigh_mat
249 *  pointer to weight matrix
250 *
251 * @param[in] u4_qp_div_6
252 *  Floor (qp/6)
253 *
254 * @param[in] pi2_tmp
255 *  temporary buffer of size 1*16
256 *
257 * @param[in] iq_start_idx
258 *  Differentiates b/w intra or inter
259 *
260 * @param[in] pi2_dc_ld_addr
261 *  Address to load DC value of the 4x4 blk
262 *
263 * @returns none
264 *
265 * @remarks none
266 *
267 *******************************************************************************
268 */
ih264_iquant_itrans_recon_4x4_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)269 void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src,
270                                       UWORD8 *pu1_pred,
271                                       UWORD8 *pu1_out,
272                                       WORD32 pred_strd,
273                                       WORD32 out_strd,
274                                       const UWORD16 *pu2_iscal_mat,
275                                       const UWORD16 *pu2_weigh_mat,
276                                       UWORD32 u4_qp_div_6,
277                                       WORD16 *pi2_tmp,
278                                       WORD32 iq_start_idx,
279                                       WORD16 *pi2_dc_ld_addr)
280 {
281     UWORD8 *pu1_pred_ptr = pu1_pred;
282     UWORD8 *pu1_out_ptr = pu1_out;
283     WORD32 q0;
284     WORD16 x, i_macro, i;
285     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
286 
287     UNUSED(pi2_tmp);
288     if(iq_start_idx == 0)
289     {
290         q0 = pi2_src[0];
291         INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
292     }
293     else
294     {
295         q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case3
296     }
297     i_macro = ((q0 + 32) >> 6);
298     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
299     {
300         pu1_pred_ptr = pu1_pred;
301         pu1_out = pu1_out_ptr;
302 
303         /* inverse prediction */
304         x = i_macro + *pu1_pred_ptr;
305         *pu1_out = CLIP_U8(x);
306         pu1_pred_ptr += pred_strd;
307         pu1_out += out_strd;
308 
309         x = i_macro + *pu1_pred_ptr;
310         *pu1_out = CLIP_U8(x);
311         pu1_pred_ptr += pred_strd;
312         pu1_out += out_strd;
313 
314         x = i_macro + *pu1_pred_ptr;
315         *pu1_out = CLIP_U8(x);
316         pu1_pred_ptr += pred_strd;
317         pu1_out += out_strd;
318 
319         x = i_macro + *pu1_pred_ptr;
320         *pu1_out = CLIP_U8(x);
321 
322         pu1_out_ptr++;
323         pu1_pred++;
324     }
325 }
326 
327 /**
328 ********************************************************************************
329 *
330 * @brief This function reconstructs a 8x8 sub block from quantized residue and
331 *  prediction buffer
332 *
333 * @par Description:
334 *  The quantized residue is first inverse quantized, then inverse transformed.
335 *  This inverse transformed content is added to the prediction buffer to recon-
336 *  struct the end output
337 *
338 * @param[in] pi2_src
339 *  quantized 4x4 block
340 *
341 * @param[in] pu1_pred
342 *  prediction 4x4 block
343 *
344 * @param[out] pu1_out
345 *  reconstructed 4x4 block
346 *
347 * @param[in] pred_strd
348 *  Prediction buffer stride
349 *
350 * @param[in] out_strd
351 *  recon buffer Stride
352 *
353 * @param[in] pu2_iscal_mat
354 *  pointer to inverse scaling matrix
355 *
356 * @param[in] pu2_weigh_mat
357 *  pointer to weight matrix
358 *
359 * @param[in] u4_qp_div_6
360 *  Floor (qp/6)
361 *
362 * @param[in] pi2_tmp
363 *  temporary buffer of size 1*16. we dont need a bigger block since we reuse
364 *  the tmp for each block
365 *
366 * @param[in] iq_start_idx
367 *  UNUSED
368 *
369 * @param[in] pi2_dc_ld_addr
370 *  UNUSED
371 *
372 * @returns none
373 *
374 * @remarks none
375 *
376 *******************************************************************************
377 */
ih264_iquant_itrans_recon_8x8(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)378 void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src,
379                                    UWORD8 *pu1_pred,
380                                    UWORD8 *pu1_out,
381                                    WORD32 pred_strd,
382                                    WORD32 out_strd,
383                                    const UWORD16 *pu2_iscale_mat,
384                                    const UWORD16 *pu2_weigh_mat,
385                                    UWORD32 qp_div,
386                                    WORD16 *pi2_tmp,
387                                    WORD32 iq_start_idx,
388                                    WORD16 *pi2_dc_ld_addr)
389 {
390     WORD32 i;
391     WORD16 *pi2_tmp_ptr = pi2_tmp;
392     UWORD8 *pu1_pred_ptr = pu1_pred;
393     UWORD8 *pu1_out_ptr = pu1_out;
394     WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
395     WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
396     WORD16 i_macro;
397     WORD32 q;
398     WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
399 
400     UNUSED(iq_start_idx);
401     UNUSED(pi2_dc_ld_addr);
402     /*************************************************************/
403     /* De quantization of coefficients. Will be replaced by SIMD */
404     /* operations on platform. Note : DC coeff is not scaled     */
405     /*************************************************************/
406     for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
407     {
408         q = pi2_src[i];
409         INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
410         pi2_tmp_ptr[i] = q;
411     }
412     /* Perform Inverse transform */
413     /*--------------------------------------------------------------------*/
414     /* IDCT [ Horizontal transformation ]                                 */
415     /*--------------------------------------------------------------------*/
416     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
417     {
418         /*------------------------------------------------------------------*/
419         /* y0 = w0 + w4                                                     */
420         /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
421         /* y2 = w0 - w4                                                     */
422         /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
423         /* y4 = (w2 >> 1) - w6                                              */
424         /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
425         /* y6 = w2 + (w6 >> 1)                                              */
426         /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
427         /*------------------------------------------------------------------*/
428         i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4] );
429 
430         i_y1 = ((WORD32)(-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7]
431                         - (pi2_tmp_ptr[7] >> 1));
432 
433         i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4] );
434 
435         i_y3 = ((WORD32)pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3]
436                         - (pi2_tmp_ptr[3] >> 1));
437 
438         i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6] );
439 
440         i_y5 = ((WORD32)(-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5]
441                         + (pi2_tmp_ptr[5] >> 1));
442 
443         i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
444 
445         i_y7 = ((WORD32)pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1]
446                         + (pi2_tmp_ptr[1] >> 1));
447 
448         /*------------------------------------------------------------------*/
449         /* z0 = y0 + y6                                                     */
450         /* z1 = y1 + (y7 >> 2)                                              */
451         /* z2 = y2 + y4                                                     */
452         /* z3 = y3 + (y5 >> 2)                                              */
453         /* z4 = y2 - y4                                                     */
454         /* z5 = (y3 >> 2) - y5                                              */
455         /* z6 = y0 - y6                                                     */
456         /* z7 = y7 - (y1 >> 2)                                              */
457         /*------------------------------------------------------------------*/
458         i_z0 = i_y0 + i_y6;
459         i_z1 = i_y1 + (i_y7 >> 2);
460         i_z2 = i_y2 + i_y4;
461         i_z3 = i_y3 + (i_y5 >> 2);
462         i_z4 = i_y2 - i_y4;
463         i_z5 = (i_y3 >> 2) - i_y5;
464         i_z6 = i_y0 - i_y6;
465         i_z7 = i_y7 - (i_y1 >> 2);
466 
467         /*------------------------------------------------------------------*/
468         /* x0 = z0 + z7                                                     */
469         /* x1 = z2 + z5                                                     */
470         /* x2 = z4 + z3                                                     */
471         /* x3 = z6 + z1                                                     */
472         /* x4 = z6 - z1                                                     */
473         /* x5 = z4 - z3                                                     */
474         /* x6 = z2 - z5                                                     */
475         /* x7 = z0 - z7                                                     */
476         /*------------------------------------------------------------------*/
477         pi2_tmp_ptr[0] = i_z0 + i_z7;
478         pi2_tmp_ptr[1] = i_z2 + i_z5;
479         pi2_tmp_ptr[2] = i_z4 + i_z3;
480         pi2_tmp_ptr[3] = i_z6 + i_z1;
481         pi2_tmp_ptr[4] = i_z6 - i_z1;
482         pi2_tmp_ptr[5] = i_z4 - i_z3;
483         pi2_tmp_ptr[6] = i_z2 - i_z5;
484         pi2_tmp_ptr[7] = i_z0 - i_z7;
485 
486         /* move to the next row */
487         //pi2_src_ptr += SUB_BLK_WIDTH_8x8;
488         pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
489     }
490 
491     /*--------------------------------------------------------------------*/
492     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
493     /*                                                                    */
494     /* Add the prediction and store it back to reconstructed frame buffer */
495     /* [Prediction buffer itself in this case]                            */
496     /*--------------------------------------------------------------------*/
497     pi2_tmp_ptr = pi2_tmp;
498     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
499     {
500         pu1_pred_ptr = pu1_pred;
501         pu1_out = pu1_out_ptr;
502         /*------------------------------------------------------------------*/
503         /* y0j = w0j + w4j                                                  */
504         /* y1j = -w3j + w5j -w7j -(w7j >> 1)                                */
505         /* y2j = w0j -w4j                                                   */
506         /* y3j = w1j + w7j -w3j -(w3j >> 1)                                 */
507         /* y4j = ( w2j >> 1 ) -w6j                                          */
508         /* y5j = -w1j + w7j + w5j + (w5j >> 1)                              */
509         /* y6j = w2j + ( w6j >> 1 )                                         */
510         /* y7j = w3j + w5j + w1j + (w1j >> 1)                               */
511         /*------------------------------------------------------------------*/
512         i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
513 
514         i_y1 = (WORD32)(-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56]
515                         - (pi2_tmp_ptr[56] >> 1);
516 
517         i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
518 
519         i_y3 = (WORD32)pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24]
520                         - (pi2_tmp_ptr[24] >> 1);
521 
522         i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
523 
524         i_y5 = (WORD32)(-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40]
525                         + (pi2_tmp_ptr[40] >> 1);
526 
527         i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
528 
529         i_y7 = (WORD32)pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8]
530                         + (pi2_tmp_ptr[8] >> 1);
531 
532         /*------------------------------------------------------------------*/
533         /* z0j = y0j + y6j                                                  */
534         /* z1j = y1j + (y7j >> 2)                                           */
535         /* z2j = y2j + y4j                                                  */
536         /* z3j = y3j + (y5j >> 2)                                           */
537         /* z4j = y2j -y4j                                                   */
538         /* z5j = (y3j >> 2) -y5j                                            */
539         /* z6j = y0j -y6j                                                   */
540         /* z7j = y7j -(y1j >> 2)                                            */
541         /*------------------------------------------------------------------*/
542         i_z0 = i_y0 + i_y6;
543         i_z1 = i_y1 + (i_y7 >> 2);
544         i_z2 = i_y2 + i_y4;
545         i_z3 = i_y3 + (i_y5 >> 2);
546         i_z4 = i_y2 - i_y4;
547         i_z5 = (i_y3 >> 2) - i_y5;
548         i_z6 = i_y0 - i_y6;
549         i_z7 = i_y7 - (i_y1 >> 2);
550 
551         /*------------------------------------------------------------------*/
552         /* x0j = z0j + z7j                                                  */
553         /* x1j = z2j + z5j                                                  */
554         /* x2j = z4j + z3j                                                  */
555         /* x3j = z6j + z1j                                                  */
556         /* x4j = z6j -z1j                                                   */
557         /* x5j = z4j -z3j                                                   */
558         /* x6j = z2j -z5j                                                   */
559         /* x7j = z0j -z7j                                                   */
560         /*------------------------------------------------------------------*/
561         i_macro = ((i_z0 + i_z7 + 32) >> 6) + *pu1_pred_ptr;
562         *pu1_out = CLIP_U8(i_macro);
563         /* Change uc_recBuffer to Point to next element in the same column*/
564         pu1_pred_ptr += pred_strd;
565         pu1_out += out_strd;
566 
567         i_macro = ((i_z2 + i_z5 + 32) >> 6) + *pu1_pred_ptr;
568         *pu1_out = CLIP_U8(i_macro);
569         pu1_pred_ptr += pred_strd;
570         pu1_out += out_strd;
571 
572         i_macro = ((i_z4 + i_z3 + 32) >> 6) + *pu1_pred_ptr;
573         *pu1_out = CLIP_U8(i_macro);
574         pu1_pred_ptr += pred_strd;
575         pu1_out += out_strd;
576 
577         i_macro = ((i_z6 + i_z1 + 32) >> 6) + *pu1_pred_ptr;
578         *pu1_out = CLIP_U8(i_macro);
579         pu1_pred_ptr += pred_strd;
580         pu1_out += out_strd;
581 
582         i_macro = ((i_z6 - i_z1 + 32) >> 6) + *pu1_pred_ptr;
583         *pu1_out = CLIP_U8(i_macro);
584         pu1_pred_ptr += pred_strd;
585         pu1_out += out_strd;
586 
587         i_macro = ((i_z4 - i_z3 + 32) >> 6) + *pu1_pred_ptr;
588         *pu1_out = CLIP_U8(i_macro);
589         pu1_pred_ptr += pred_strd;
590         pu1_out += out_strd;
591 
592         i_macro = ((i_z2 - i_z5 + 32) >> 6) + *pu1_pred_ptr;
593         *pu1_out = CLIP_U8(i_macro);
594         pu1_pred_ptr += pred_strd;
595         pu1_out += out_strd;
596 
597         i_macro = ((i_z0 - i_z7 + 32) >> 6) + *pu1_pred_ptr;
598         *pu1_out = CLIP_U8(i_macro);
599 
600         pi2_tmp_ptr++;
601         pu1_out_ptr++;
602         pu1_pred++;
603     }
604 }
605 
606 /**
607 ********************************************************************************
608 *
609 * @brief This function reconstructs a 8x8 sub block from quantized residue and
610 *  prediction buffer, if only dc value is present
611 *
612 * @par Description:
613 *  The quantized residue is first inverse quantized, then inverse transformed.
614 *  This inverse transformed content is added to the prediction buffer to recon-
615 *  struct the end output
616 *
617 * @param[in] pi2_src
618 *  quantized 4x4 block
619 *
620 * @param[in] pu1_pred
621 *  prediction 4x4 block
622 *
623 * @param[out] pu1_out
624 *  reconstructed 4x4 block
625 *
626 * @param[in] pred_strd
627 *  Prediction buffer stride
628 *
629 * @param[in] out_strd
630 *  recon buffer Stride
631 *
632 * @param[in] pu2_iscal_mat
633 *  pointer to inverse scaling matrix
634 *
635 * @param[in] pu2_weigh_mat
636 *  pointer to weight matrix
637 *
638 * @param[in] u4_qp_div_6
639 *  Floor (qp/6)
640 *
641 * @param[in] pi2_tmp
642 *  temporary buffer of size 1*16. we dont need a bigger block since we reuse
643 *  the tmp for each block
644 *
645 * @param[in] iq_start_idx
646 *  UNUSED
647 *
648 * @param[in] pi2_dc_ld_addr
649 *  UNUSED
650 *
651 * @returns none
652 *
653 * @remarks none
654 *
655 *******************************************************************************
656 */
ih264_iquant_itrans_recon_8x8_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)657 void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src,
658                                       UWORD8 *pu1_pred,
659                                       UWORD8 *pu1_out,
660                                       WORD32 pred_strd,
661                                       WORD32 out_strd,
662                                       const UWORD16 *pu2_iscale_mat,
663                                       const UWORD16 *pu2_weigh_mat,
664                                       UWORD32 qp_div,
665                                       WORD16 *pi2_tmp,
666                                       WORD32 iq_start_idx,
667                                       WORD16 *pi2_dc_ld_addr)
668 {
669     UWORD8 *pu1_pred_ptr = pu1_pred;
670     UWORD8 *pu1_out_ptr = pu1_out;
671     WORD16 x, i, i_macro;
672     WORD32 q;
673     WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
674 
675     UNUSED(pi2_tmp);
676     UNUSED(iq_start_idx);
677     UNUSED(pi2_dc_ld_addr);
678     /*************************************************************/
679     /* Dequantization of coefficients. Will be replaced by SIMD  */
680     /* operations on platform. Note : DC coeff is not scaled     */
681     /*************************************************************/
682     q = pi2_src[0];
683     INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
684     i_macro = (q + 32) >> 6;
685     /* Perform Inverse transform */
686     /*--------------------------------------------------------------------*/
687     /* IDCT [ Horizontal transformation ]                                 */
688     /*--------------------------------------------------------------------*/
689     /*--------------------------------------------------------------------*/
690     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
691     /*                                                                    */
692     /* Add the prediction and store it back to reconstructed frame buffer */
693     /* [Prediction buffer itself in this case]                            */
694     /*--------------------------------------------------------------------*/
695     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
696     {
697         pu1_pred_ptr = pu1_pred;
698         pu1_out = pu1_out_ptr;
699 
700         x = i_macro + *pu1_pred_ptr;
701         *pu1_out = CLIP_U8(x);
702         /* Change uc_recBuffer to Point to next element in the same column*/
703         pu1_pred_ptr += pred_strd;
704         pu1_out += out_strd;
705 
706         x = i_macro + *pu1_pred_ptr;
707         *pu1_out = CLIP_U8(x);
708         pu1_pred_ptr += pred_strd;
709         pu1_out += out_strd;
710 
711         x = i_macro + *pu1_pred_ptr;
712         *pu1_out = CLIP_U8(x);
713         pu1_pred_ptr += pred_strd;
714         pu1_out += out_strd;
715 
716         x = i_macro + *pu1_pred_ptr;
717         *pu1_out = CLIP_U8(x);
718         pu1_pred_ptr += pred_strd;
719         pu1_out += out_strd;
720 
721         x = i_macro + *pu1_pred_ptr;
722         *pu1_out = CLIP_U8(x);
723         pu1_pred_ptr += pred_strd;
724         pu1_out += out_strd;
725 
726         x = i_macro + *pu1_pred_ptr;
727         *pu1_out = CLIP_U8(x);
728         pu1_pred_ptr += pred_strd;
729         pu1_out += out_strd;
730 
731         x = i_macro + *pu1_pred_ptr;
732         *pu1_out = CLIP_U8(x);
733         pu1_pred_ptr += pred_strd;
734         pu1_out += out_strd;
735 
736         x = i_macro + *pu1_pred_ptr;
737         *pu1_out = CLIP_U8(x);
738 
739         pu1_out_ptr++;
740         pu1_pred++;
741     }
742 }
743 
744 /**
745 ********************************************************************************
746 *
747 * @brief This function reconstructs a 4x4 sub block from quantized residue and
748 *  prediction buffer
749 *
750 * @par Description:
751 *  The quantized residue is first inverse quantized, then inverse transformed.
752 *  This inverse transformed content is added to the prediction buffer to recon-
753 *  struct the end output
754 *
755 * @param[in] pi2_src
756 *  quantized 4x4 block
757 *
758 * @param[in] pu1_pred
759 *  prediction 4x4 block
760 *
761 * @param[out] pu1_out
762 *  reconstructed 4x4 block
763 *
764 * @param[in] pred_strd
765 *  Prediction buffer stride
766 *
767 * @param[in] out_strd
768 *  recon buffer Stride
769 *
770 * @param[in] pu2_iscal_mat
771 *  pointer to inverse scaling matrix
772 *
773 * @param[in] pu2_weigh_mat
774 *  pointer to weight matrix
775 *
776 * @param[in] u4_qp_div_6
777 *  Floor (qp/6)
778 *
779 * @param[in] pi2_tmp
780 *  temporary buffer of size 1*16
781 *
782 * @param[in] pi2_dc_src
783 *  Address to load DC value of the 4x4 blk
784 *
785 * @returns none
786 *
787 * @remarks none
788 *
789 *******************************************************************************
790 */
ih264_iquant_itrans_recon_chroma_4x4(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)791 void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src,
792                                           UWORD8 *pu1_pred,
793                                           UWORD8 *pu1_out,
794                                           WORD32 pred_strd,
795                                           WORD32 out_strd,
796                                           const UWORD16 *pu2_iscal_mat,
797                                           const UWORD16 *pu2_weigh_mat,
798                                           UWORD32 u4_qp_div_6,
799                                           WORD16 *pi2_tmp,
800                                           WORD16 *pi2_dc_src)
801 {
802     WORD16 *pi2_src_ptr = pi2_src;
803     WORD16 *pi2_tmp_ptr = pi2_tmp;
804     UWORD8 *pu1_pred_ptr = pu1_pred;
805     UWORD8 *pu1_out_ptr = pu1_out;
806     WORD16 x0, x1, x2, x3, i;
807     WORD32 q0, q1, q2, q3;
808     WORD16 i_macro;
809     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
810 
811     /* inverse quant */
812     /* horizontal inverse transform */
813     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
814     {
815         if(i == 0)
816         {
817             q0 = pi2_dc_src[0];
818         }
819         else
820         {
821             q0 = pi2_src_ptr[0];
822             INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
823         }
824 
825         q2 = pi2_src_ptr[2];
826         INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
827 
828         x0 = q0 + q2;
829         x1 = q0 - q2;
830 
831         q1 = pi2_src_ptr[1];
832         INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
833 
834         q3 = pi2_src_ptr[3];
835         INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
836 
837         x2 = (q1 >> 1) - q3;
838         x3 = q1 + (q3 >> 1);
839 
840         pi2_tmp_ptr[0] = x0 + x3;
841         pi2_tmp_ptr[1] = x1 + x2;
842         pi2_tmp_ptr[2] = x1 - x2;
843         pi2_tmp_ptr[3] = x0 - x3;
844 
845         pi2_src_ptr += SUB_BLK_WIDTH_4x4;
846         pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
847         pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
848         pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
849     }
850 
851     /* vertical inverse transform */
852     pi2_tmp_ptr = pi2_tmp;
853     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
854     {
855         pu1_pred_ptr = pu1_pred;
856         pu1_out = pu1_out_ptr;
857 
858         x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
859         x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
860         x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
861         x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
862 
863         /* inverse prediction */
864         i_macro = x0 + x3;
865         i_macro = ((i_macro + 32) >> 6);
866         i_macro += *pu1_pred_ptr;
867         *pu1_out = CLIP_U8(i_macro);
868         pu1_pred_ptr += pred_strd;
869         pu1_out += out_strd;
870 
871         i_macro = x1 + x2;
872         i_macro = ((i_macro + 32) >> 6);
873         i_macro += *pu1_pred_ptr;
874         *pu1_out = CLIP_U8(i_macro);
875         pu1_pred_ptr += pred_strd;
876         pu1_out += out_strd;
877 
878         i_macro = x1 - x2;
879         i_macro = ((i_macro + 32) >> 6);
880         i_macro += *pu1_pred_ptr;
881         *pu1_out = CLIP_U8(i_macro);
882         pu1_pred_ptr += pred_strd;
883         pu1_out += out_strd;
884 
885         i_macro = x0 - x3;
886         i_macro = ((i_macro + 32) >> 6);
887         i_macro += *pu1_pred_ptr;
888         *pu1_out = CLIP_U8(i_macro);
889 
890         pi2_tmp_ptr++;
891         pu1_out_ptr += 2; // Interleaved store for output
892         pu1_pred += 2; // Interleaved load for pred buffer
893     }
894 }
895 
896 /**
897 ********************************************************************************
898 *
899 * @brief This function reconstructs a 4x4 sub block from quantized residue and
900 *  prediction buffer if only dc value is present for residue
901 *
902 * @par Description:
903 *  The quantized residue is first inverse quantized,
904 *  This inverse quantized content is added to the prediction buffer to recon-
905 *  struct the end output
906 *
907 * @param[in] pi2_src
908 *  quantized dc coefficient
909 *
910 * @param[in] pu1_pred
911 *  prediction 4x4 block in interleaved format
912 *
913 * @param[in] pred_strd,
914 *  Prediction buffer stride in interleaved format
915 *
916 * @param[in] out_strd
917 *  recon buffer Stride
918 *
919 * @param[in] pu2_iscal_mat
920 *  pointer to inverse scaling matrix
921 *
922 * @param[in] pu2_weigh_mat
923 *  pointer to weight matrix
924 *
925 * @param[in] u4_qp_div_6
926 *  Floor (qp/6)
927 *
928 * @param[in] pi2_tmp
929 *  temporary buffer of size 1*16
930 *
931 * @param[in] pi2_dc_src
932 *  Address to load DC value of the 4x4 blk
933 *
934 * @returns none
935 *
936 * @remarks none
937 *
938 *******************************************************************************
939 */
ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)940 void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src,
941                                              UWORD8 *pu1_pred,
942                                              UWORD8 *pu1_out,
943                                              WORD32 pred_strd,
944                                              WORD32 out_strd,
945                                              const UWORD16 *pu2_iscal_mat,
946                                              const UWORD16 *pu2_weigh_mat,
947                                              UWORD32 u4_qp_div_6,
948                                              WORD16 *pi2_tmp,
949                                              WORD16 *pi2_dc_src)
950 {
951     UWORD8 *pu1_pred_ptr = pu1_pred;
952     UWORD8 *pu1_out_ptr = pu1_out;
953     WORD32 q0;
954     WORD16 x, i_macro, i;
955 
956     UNUSED(pi2_src);
957     UNUSED(pu2_iscal_mat);
958     UNUSED(pu2_weigh_mat);
959     UNUSED(u4_qp_div_6);
960     UNUSED(pi2_tmp);
961 
962     q0 = pi2_dc_src[0];    // Restoring dc value for intra case3
963     i_macro = ((q0 + 32) >> 6);
964 
965     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
966     {
967         pu1_pred_ptr = pu1_pred;
968         pu1_out = pu1_out_ptr;
969 
970         /* inverse prediction */
971         x = i_macro + *pu1_pred_ptr;
972         *pu1_out =  CLIP_U8(x);
973         pu1_pred_ptr += pred_strd;
974         pu1_out += out_strd;
975 
976         x = i_macro + *pu1_pred_ptr;
977         *pu1_out = CLIP_U8(x);
978         pu1_pred_ptr += pred_strd;
979         pu1_out += out_strd;
980 
981         x = i_macro + *pu1_pred_ptr;
982         *pu1_out = CLIP_U8(x);
983         pu1_pred_ptr += pred_strd;
984         pu1_out += out_strd;
985 
986         x = i_macro + *pu1_pred_ptr;
987         *pu1_out = CLIP_U8(x);
988 
989         pu1_out_ptr+=2;
990         pu1_pred+=2;
991     }
992 }
993