xref: /aosp_15_r20/external/libavc/decoder/svc/isvcd_iquant_itrans_residual_recon.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2022 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 /**
21  *******************************************************************************
22  * @file
23  *  isvcd_iquant_itrans_residual_recon.c
24  *
25  * @brief
26  *  Contains definition of functions for h264 inverse quantization inverse
27  *transformation and recon
28  *
29  * @author
30  *  Kishore
31  *
32  *  @par List of Functions:
33  *  - isvcd_iquant_itrans_residual_recon_4x4()
34  *  - isvcd_iquant_itrans_residual_recon_8x8()
35  *  - isvcd_iquant_itrans_residual_recon_4x4_dc()
36  *  - isvcd_iquant_itrans_residual_recon_8x8_dc()
37  *  - isvcd_iquant_itrans_residual_recon_chroma_4x4()
38  *  - isvcd_iquant_itrans_residual_recon_chroma_4x4_dc()
39  *
40  * @remarks
41  *  None
42  *
43  *******************************************************************************
44  */
45 
46 /* User include files */
47 #include "ih264_typedefs.h"
48 #include "ih264_defs.h"
49 #include "ih264_trans_macros.h"
50 #include "ih264_macros.h"
51 #include "ih264_platform_macros.h"
52 #include "ih264_trans_data.h"
53 #include "ih264_size_defs.h"
54 #include "ih264_structs.h"
55 #include "isvcd_iquant_itrans_residual_recon.h"
56 
57 /*****************************************************************************/
58 /*                                                                           */
59 /*  Function Name : isvcd_iquant_itrans_residual_recon_4x4                    */
60 /*                                                                           */
61 /*  Description   : this function computes the recon output from the         */
62 /*                  IQ+IT+RESD                                               */
63 /*                                                                           */
64 /*  Inputs        :                                                          */
65 /*  Globals       : none                                                     */
66 /*  Processing    :                                                          */
67 /*                                                                           */
68 /*  Outputs       : i4_nnz                                                   */
69 /*  Returns       : none                                                     */
70 /*                                                                           */
71 /*  Issues        : none                                                     */
72 /*                                                                           */
73 /*  Revision History:                                                        */
74 /*                                                                           */
75 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
76 /*         25 11 2021   Kishore               creation                       */
77 /*                                                                           */
78 /*****************************************************************************/
79 
isvcd_iquant_itrans_residual_recon_4x4(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_rsd,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 rsd_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)80 WORD32 isvcd_iquant_itrans_residual_recon_4x4(WORD16 *pi2_src, UWORD8 *pu1_pred, WORD16 *pi2_rsd,
81                                               UWORD8 *pu1_out, WORD32 pred_strd, WORD32 rsd_strd,
82                                               WORD32 out_strd, const UWORD16 *pu2_iscal_mat,
83                                               const UWORD16 *pu2_weigh_mat, UWORD32 u4_qp_div_6,
84                                               WORD16 *pi2_tmp, WORD32 iq_start_idx,
85                                               WORD16 *pi2_dc_ld_addr)
86 {
87     WORD32 i4_nnz = 0;
88     WORD16 *pi2_src_ptr = pi2_src;
89     WORD16 *pi2_tmp_ptr = pi2_tmp;
90     UWORD8 *pu1_pred_ptr = pu1_pred;
91     WORD16 *pi2_rsd_ptr = pi2_rsd;
92     UWORD8 *pu1_out_ptr = pu1_out;
93     WORD16 x0, x1, x2, x3, i;
94     WORD32 q0, q1, q2, q3;
95     WORD16 i_macro;
96     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
97 
98     /* inverse quant */
99     /*horizontal inverse transform */
100     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
101     {
102         q0 = pi2_src_ptr[0];
103         INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
104         if(i == 0 && iq_start_idx == 1)
105             q0 = pi2_dc_ld_addr[0];  // Restoring dc value for intra case
106 
107         q2 = pi2_src_ptr[2];
108         INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
109 
110         x0 = q0 + q2;
111         x1 = q0 - q2;
112 
113         q1 = pi2_src_ptr[1];
114         INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
115 
116         q3 = pi2_src_ptr[3];
117         INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
118 
119         x2 = (q1 >> 1) - q3;
120         x3 = q1 + (q3 >> 1);
121 
122         pi2_tmp_ptr[0] = x0 + x3;
123         pi2_tmp_ptr[1] = x1 + x2;
124         pi2_tmp_ptr[2] = x1 - x2;
125         pi2_tmp_ptr[3] = x0 - x3;
126 
127         pi2_src_ptr += SUB_BLK_WIDTH_4x4;
128         pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
129         pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
130         pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
131     }
132 
133     /* vertical inverse transform */
134     pi2_tmp_ptr = pi2_tmp;
135     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
136     {
137         pu1_pred_ptr = pu1_pred;
138         pi2_rsd_ptr = pi2_rsd;
139         pu1_out = pu1_out_ptr;
140 
141         x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
142         x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
143         x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
144         x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
145 
146         /* inverse prediction */
147         i_macro = x0 + x3;
148         i_macro = ((i_macro + 32) >> 6);
149         i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
150         i4_nnz |= !!i_macro;
151         i_macro += *pu1_pred_ptr;
152         *pu1_out = CLIP_U8(i_macro);
153         pu1_pred_ptr += pred_strd;
154         pi2_rsd_ptr += rsd_strd;
155         pu1_out += out_strd;
156 
157         i_macro = x1 + x2;
158         i_macro = ((i_macro + 32) >> 6);
159         i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
160         i4_nnz |= !!i_macro;
161         i_macro += *pu1_pred_ptr;
162         *pu1_out = CLIP_U8(i_macro);
163         pu1_pred_ptr += pred_strd;
164         pi2_rsd_ptr += rsd_strd;
165         pu1_out += out_strd;
166 
167         i_macro = x1 - x2;
168         i_macro = ((i_macro + 32) >> 6);
169         i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
170         i4_nnz |= !!i_macro;
171         i_macro += *pu1_pred_ptr;
172         *pu1_out = CLIP_U8(i_macro);
173         pu1_pred_ptr += pred_strd;
174         pi2_rsd_ptr += rsd_strd;
175         pu1_out += out_strd;
176 
177         i_macro = x0 - x3;
178         i_macro = ((i_macro + 32) >> 6);
179         i4_nnz |= !!i_macro;
180         i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
181         i_macro += *pu1_pred_ptr;
182         *pu1_out = CLIP_U8(i_macro);
183 
184         pi2_tmp_ptr++;
185         pu1_out_ptr++;
186         pi2_rsd++;
187         pu1_pred++;
188     }
189     return i4_nnz;
190 }
191 
192 /*****************************************************************************/
193 /*                                                                           */
194 /*  Function Name : isvcd_iquant_itrans_residual_recon_4x4_dc                 */
195 /*                                                                           */
196 /*  Description   : this function computes the recon output from the         */
197 /*                  IQ+IT+RESD                                               */
198 /*                                                                           */
199 /*  Inputs        :                                                          */
200 /*  Globals       : none                                                     */
201 /*  Processing    :                                                          */
202 /*                                                                           */
203 /*  Outputs       : i4_nnz                                                   */
204 /*  Returns       : none                                                     */
205 /*                                                                           */
206 /*  Issues        : none                                                     */
207 /*                                                                           */
208 /*  Revision History:                                                        */
209 /*                                                                           */
210 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
211 /*         25 11 2021   Kishore               creation                       */
212 /*                                                                           */
213 /*****************************************************************************/
214 
isvcd_iquant_itrans_residual_recon_4x4_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_rsd,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 rsd_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)215 WORD32 isvcd_iquant_itrans_residual_recon_4x4_dc(WORD16 *pi2_src, UWORD8 *pu1_pred, WORD16 *pi2_rsd,
216                                                  UWORD8 *pu1_out, WORD32 pred_strd, WORD32 rsd_strd,
217                                                  WORD32 out_strd, const UWORD16 *pu2_iscal_mat,
218                                                  const UWORD16 *pu2_weigh_mat, UWORD32 u4_qp_div_6,
219                                                  WORD16 *pi2_tmp, WORD32 iq_start_idx,
220                                                  WORD16 *pi2_dc_ld_addr)
221 {
222     WORD32 i4_nnz = 0;
223     UWORD8 *pu1_pred_ptr = pu1_pred;
224     WORD16 *pi2_rsd_ptr = pi2_rsd;
225     UWORD8 *pu1_out_ptr = pu1_out;
226     WORD32 q0;
227     WORD16 x, i_macro, i;
228     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
229     UNUSED(pi2_tmp);
230 
231     if(iq_start_idx == 0)
232     {
233         q0 = pi2_src[0];
234         INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
235     }
236     else
237     {
238         q0 = pi2_dc_ld_addr[0];  // Restoring dc value for intra case3
239     }
240     i_macro = ((q0 + 32) >> 6);
241     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
242     {
243         pu1_pred_ptr = pu1_pred;
244         pi2_rsd_ptr = pi2_rsd;
245         pu1_out = pu1_out_ptr;
246 
247         /* inverse prediction */
248         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
249         i4_nnz |= !!x;
250         x += *pu1_pred_ptr;
251         *pu1_out = CLIP_U8(x);
252         pu1_pred_ptr += pred_strd;
253         pi2_rsd_ptr += rsd_strd;
254         pu1_out += out_strd;
255 
256         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
257         i4_nnz |= !!x;
258         x += *pu1_pred_ptr;
259         *pu1_out = CLIP_U8(x);
260         pu1_pred_ptr += pred_strd;
261         pi2_rsd_ptr += rsd_strd;
262         pu1_out += out_strd;
263 
264         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
265         i4_nnz |= !!x;
266         x += *pu1_pred_ptr;
267         *pu1_out = CLIP_U8(x);
268         pu1_pred_ptr += pred_strd;
269         pi2_rsd_ptr += rsd_strd;
270         pu1_out += out_strd;
271 
272         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
273         i4_nnz |= !!x;
274         x += *pu1_pred_ptr;
275         *pu1_out = CLIP_U8(x);
276 
277         pu1_out_ptr++;
278         pu1_pred++;
279         pi2_rsd++;
280     }
281     return i4_nnz;
282 }
283 
284 /*****************************************************************************/
285 /*                                                                           */
286 /*  Function Name : isvcd_iquant_itrans_residual_recon_chroma_4x4             */
287 /*                                                                           */
288 /*  Description   : this function computes the recon output from the         */
289 /*                  IQ+IT+RESD                                               */
290 /*                                                                           */
291 /*  Inputs        :                                                          */
292 /*  Globals       : none                                                     */
293 /*  Processing    :                                                          */
294 /*                                                                           */
295 /*  Outputs       : i4_nnz                                                   */
296 /*  Returns       : none                                                     */
297 /*                                                                           */
298 /*  Issues        : none                                                     */
299 /*                                                                           */
300 /*  Revision History:                                                        */
301 /*                                                                           */
302 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
303 /*         25 11 2021   Kishore               creation                       */
304 /*                                                                           */
305 /*****************************************************************************/
306 
isvcd_iquant_itrans_residual_recon_chroma_4x4(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_rsd,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 rsd_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)307 void isvcd_iquant_itrans_residual_recon_chroma_4x4(
308     WORD16 *pi2_src, UWORD8 *pu1_pred, WORD16 *pi2_rsd, UWORD8 *pu1_out, WORD32 pred_strd,
309     WORD32 rsd_strd, WORD32 out_strd, const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat,
310     UWORD32 u4_qp_div_6, WORD16 *pi2_tmp, WORD16 *pi2_dc_src)
311 {
312     WORD16 *pi2_src_ptr = pi2_src;
313     WORD16 *pi2_tmp_ptr = pi2_tmp;
314     UWORD8 *pu1_pred_ptr = pu1_pred;
315     WORD16 *pi2_rsd_ptr = pi2_rsd;
316     UWORD8 *pu1_out_ptr = pu1_out;
317     WORD16 x0, x1, x2, x3, i;
318     WORD32 q0, q1, q2, q3;
319     WORD16 i_macro;
320     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
321 
322     /* inverse quant */
323     /*horizontal inverse transform */
324     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
325     {
326         if(i == 0)
327         {
328             q0 = pi2_dc_src[0];
329         }
330         else
331         {
332             q0 = pi2_src_ptr[0];
333             INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
334         }
335 
336         q2 = pi2_src_ptr[2];
337         INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
338 
339         x0 = q0 + q2;
340         x1 = q0 - q2;
341 
342         q1 = pi2_src_ptr[1];
343         INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
344 
345         q3 = pi2_src_ptr[3];
346         INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
347 
348         x2 = (q1 >> 1) - q3;
349         x3 = q1 + (q3 >> 1);
350 
351         pi2_tmp_ptr[0] = x0 + x3;
352         pi2_tmp_ptr[1] = x1 + x2;
353         pi2_tmp_ptr[2] = x1 - x2;
354         pi2_tmp_ptr[3] = x0 - x3;
355 
356         pi2_src_ptr += SUB_BLK_WIDTH_4x4;
357         pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
358         pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
359         pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
360     }
361 
362     /* vertical inverse transform */
363     pi2_tmp_ptr = pi2_tmp;
364     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
365     {
366         pu1_pred_ptr = pu1_pred;
367         pi2_rsd_ptr = pi2_rsd;
368         pu1_out = pu1_out_ptr;
369 
370         x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
371         x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
372         x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
373         x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
374 
375         /* inverse prediction */
376         i_macro = x0 + x3;
377         i_macro = ((i_macro + 32) >> 6);
378         i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
379         i_macro += *pu1_pred_ptr;
380         *pu1_out = CLIP_U8(i_macro);
381         pu1_pred_ptr += pred_strd;
382         pi2_rsd_ptr += rsd_strd;
383         pu1_out += out_strd;
384 
385         i_macro = x1 + x2;
386         i_macro = ((i_macro + 32) >> 6);
387         i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
388         i_macro += *pu1_pred_ptr;
389         *pu1_out = CLIP_U8(i_macro);
390         pu1_pred_ptr += pred_strd;
391         pi2_rsd_ptr += rsd_strd;
392         pu1_out += out_strd;
393 
394         i_macro = x1 - x2;
395         i_macro = ((i_macro + 32) >> 6);
396         i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
397         i_macro += *pu1_pred_ptr;
398         *pu1_out = CLIP_U8(i_macro);
399         pu1_pred_ptr += pred_strd;
400         pi2_rsd_ptr += rsd_strd;
401         pu1_out += out_strd;
402 
403         i_macro = x0 - x3;
404         i_macro = ((i_macro + 32) >> 6);
405         i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
406         i_macro += *pu1_pred_ptr;
407         *pu1_out = CLIP_U8(i_macro);
408 
409         pi2_tmp_ptr++;
410         pu1_out_ptr += 2;  // Interleaved store for output
411         pu1_pred += 2;     // Interleaved load for pred buffer
412         pi2_rsd += 2;
413     }
414 }
415 
416 /*****************************************************************************/
417 /*                                                                           */
418 /*  Function Name : isvcd_iquant_itrans_residual_recon_chroma_4x4_dc          */
419 /*                                                                           */
420 /*  Description   : this function computes the recon output from the         */
421 /*                  IQ+IT+RESD                                               */
422 /*                                                                           */
423 /*  Inputs        :                                                          */
424 /*  Globals       : none                                                     */
425 /*  Processing    :                                                          */
426 /*                                                                           */
427 /*  Outputs       : i4_nnz                                                   */
428 /*  Returns       : none                                                     */
429 /*                                                                           */
430 /*  Issues        : none                                                     */
431 /*                                                                           */
432 /*  Revision History:                                                        */
433 /*                                                                           */
434 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
435 /*         25 11 2021   Kishore               creation                       */
436 /*                                                                           */
437 /*****************************************************************************/
438 
isvcd_iquant_itrans_residual_recon_chroma_4x4_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_rsd,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 rsd_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)439 void isvcd_iquant_itrans_residual_recon_chroma_4x4_dc(
440     WORD16 *pi2_src, UWORD8 *pu1_pred, WORD16 *pi2_rsd, UWORD8 *pu1_out, WORD32 pred_strd,
441     WORD32 rsd_strd, WORD32 out_strd, const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat,
442     UWORD32 u4_qp_div_6, WORD16 *pi2_tmp, WORD16 *pi2_dc_src)
443 {
444     UWORD8 *pu1_pred_ptr = pu1_pred;
445     WORD16 *pi2_rsd_ptr = pi2_rsd;
446     UWORD8 *pu1_out_ptr = pu1_out;
447     WORD32 q0;
448     WORD16 x, i_macro, i;
449     UNUSED(pi2_src);
450     UNUSED(pu2_iscal_mat);
451     UNUSED(pu2_weigh_mat);
452     UNUSED(u4_qp_div_6);
453     UNUSED(pi2_tmp);
454 
455     q0 = pi2_dc_src[0];  // Restoring dc value for intra case3
456     i_macro = ((q0 + 32) >> 6);
457 
458     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
459     {
460         pu1_pred_ptr = pu1_pred;
461         pi2_rsd_ptr = pi2_rsd;
462         pu1_out = pu1_out_ptr;
463 
464         /* inverse prediction */
465         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
466         x += *pu1_pred_ptr;
467         *pu1_out = CLIP_U8(x);
468         pu1_pred_ptr += pred_strd;
469         pi2_rsd_ptr += rsd_strd;
470         pu1_out += out_strd;
471 
472         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
473         x += *pu1_pred_ptr;
474         *pu1_out = CLIP_U8(x);
475         pu1_pred_ptr += pred_strd;
476         pi2_rsd_ptr += rsd_strd;
477         pu1_out += out_strd;
478 
479         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
480         x += *pu1_pred_ptr;
481         *pu1_out = CLIP_U8(x);
482         pu1_pred_ptr += pred_strd;
483         pi2_rsd_ptr += rsd_strd;
484         pu1_out += out_strd;
485 
486         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
487         x += *pu1_pred_ptr;
488         *pu1_out = CLIP_U8(x);
489 
490         pu1_out_ptr += 2;
491         pu1_pred += 2;
492         pi2_rsd += 2;
493     }
494 }
495 
496 /*****************************************************************************/
497 /*                                                                           */
498 /*  Function Name : isvcd_iquant_itrans_residual_recon_8x8                    */
499 /*                                                                           */
500 /*  Description   : this function computes the recon output from the         */
501 /*                  IQ+IT+RESD                                               */
502 /*                                                                           */
503 /*  Inputs        :                                                          */
504 /*  Globals       : none                                                     */
505 /*  Processing    :                                                          */
506 /*                                                                           */
507 /*  Outputs       : i4_nnz                                                   */
508 /*  Returns       : none                                                     */
509 /*                                                                           */
510 /*  Issues        : none                                                     */
511 /*                                                                           */
512 /*  Revision History:                                                        */
513 /*                                                                           */
514 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
515 /*         25 11 2021   Kishore               creation                       */
516 /*                                                                           */
517 /*****************************************************************************/
518 
isvcd_iquant_itrans_residual_recon_8x8(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_rsd,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 rsd_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)519 WORD32 isvcd_iquant_itrans_residual_recon_8x8(WORD16 *pi2_src, UWORD8 *pu1_pred, WORD16 *pi2_rsd,
520                                               UWORD8 *pu1_out, WORD32 pred_strd, WORD32 rsd_strd,
521                                               WORD32 out_strd, const UWORD16 *pu2_iscale_mat,
522                                               const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
523                                               WORD16 *pi2_tmp, WORD32 iq_start_idx,
524                                               WORD16 *pi2_dc_ld_addr)
525 {
526     WORD32 i4_nnz = 0, i4_nnz_H = 0, i4_nnz_L = 0;
527     WORD32 i;
528     WORD16 *pi2_tmp_ptr = pi2_tmp;
529     UWORD8 *pu1_pred_ptr = pu1_pred;
530     WORD16 *pi2_rsd_ptr = pi2_rsd;
531     UWORD8 *pu1_out_ptr = pu1_out;
532     WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
533     WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
534     WORD16 i_macro;
535     WORD32 q;
536     WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
537     UNUSED(iq_start_idx);
538     UNUSED(pi2_dc_ld_addr);
539     /*************************************************************/
540     /* De quantization of coefficients. Will be replaced by SIMD */
541     /* operations on platform. Note : DC coeff is not scaled     */
542     /*************************************************************/
543     for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
544     {
545         q = pi2_src[i];
546         INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
547         pi2_tmp_ptr[i] = q;
548     }
549     /* Perform Inverse transform */
550     /*--------------------------------------------------------------------*/
551     /* IDCT [ Horizontal transformation ]                                 */
552     /*--------------------------------------------------------------------*/
553     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
554     {
555         /*------------------------------------------------------------------*/
556         /* y0 = w0 + w4                                                     */
557         /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
558         /* y2 = w0 - w4                                                     */
559         /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
560         /* y4 = (w2 >> 1) - w6                                              */
561         /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
562         /* y6 = w2 + (w6 >> 1)                                              */
563         /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
564         /*------------------------------------------------------------------*/
565         i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4]);
566 
567         i_y1 =
568             ((WORD32) (-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7] - (pi2_tmp_ptr[7] >> 1));
569 
570         i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4]);
571 
572         i_y3 = ((WORD32) pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3] - (pi2_tmp_ptr[3] >> 1));
573 
574         i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6]);
575 
576         i_y5 =
577             ((WORD32) (-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5] + (pi2_tmp_ptr[5] >> 1));
578 
579         i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
580 
581         i_y7 = ((WORD32) pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1] + (pi2_tmp_ptr[1] >> 1));
582 
583         /*------------------------------------------------------------------*/
584         /* z0 = y0 + y6                                                     */
585         /* z1 = y1 + (y7 >> 2)                                              */
586         /* z2 = y2 + y4                                                     */
587         /* z3 = y3 + (y5 >> 2)                                              */
588         /* z4 = y2 - y4                                                     */
589         /* z5 = (y3 >> 2) - y5                                              */
590         /* z6 = y0 - y6                                                     */
591         /* z7 = y7 - (y1 >> 2)                                              */
592         /*------------------------------------------------------------------*/
593         i_z0 = i_y0 + i_y6;
594         i_z1 = i_y1 + (i_y7 >> 2);
595         i_z2 = i_y2 + i_y4;
596         i_z3 = i_y3 + (i_y5 >> 2);
597         i_z4 = i_y2 - i_y4;
598         i_z5 = (i_y3 >> 2) - i_y5;
599         i_z6 = i_y0 - i_y6;
600         i_z7 = i_y7 - (i_y1 >> 2);
601 
602         /*------------------------------------------------------------------*/
603         /* x0 = z0 + z7                                                     */
604         /* x1 = z2 + z5                                                     */
605         /* x2 = z4 + z3                                                     */
606         /* x3 = z6 + z1                                                     */
607         /* x4 = z6 - z1                                                     */
608         /* x5 = z4 - z3                                                     */
609         /* x6 = z2 - z5                                                     */
610         /* x7 = z0 - z7                                                     */
611         /*------------------------------------------------------------------*/
612         pi2_tmp_ptr[0] = i_z0 + i_z7;
613         pi2_tmp_ptr[1] = i_z2 + i_z5;
614         pi2_tmp_ptr[2] = i_z4 + i_z3;
615         pi2_tmp_ptr[3] = i_z6 + i_z1;
616         pi2_tmp_ptr[4] = i_z6 - i_z1;
617         pi2_tmp_ptr[5] = i_z4 - i_z3;
618         pi2_tmp_ptr[6] = i_z2 - i_z5;
619         pi2_tmp_ptr[7] = i_z0 - i_z7;
620 
621         /* move to the next row */
622         pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
623     }
624     /*--------------------------------------------------------------------*/
625     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
626     /*                                                                    */
627     /* Add the prediction and store it back to reconstructed frame buffer */
628     /* [Prediction buffer itself in this case]                            */
629     /*--------------------------------------------------------------------*/
630 
631     pi2_tmp_ptr = pi2_tmp;
632     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
633     {
634         pu1_pred_ptr = pu1_pred;
635         pi2_rsd_ptr = pi2_rsd;
636         pu1_out = pu1_out_ptr;
637         /*------------------------------------------------------------------*/
638         /* y0j = w0j + w4j                                                  */
639         /* y1j = -w3j + w5j -w7j -(w7j >> 1)                                */
640         /* y2j = w0j -w4j                                                   */
641         /* y3j = w1j + w7j -w3j -(w3j >> 1)                                 */
642         /* y4j = ( w2j >> 1 ) -w6j                                          */
643         /* y5j = -w1j + w7j + w5j + (w5j >> 1)                              */
644         /* y6j = w2j + ( w6j >> 1 )                                         */
645         /* y7j = w3j + w5j + w1j + (w1j >> 1)                               */
646         /*------------------------------------------------------------------*/
647         i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
648 
649         i_y1 = (WORD32) (-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56] -
650                (pi2_tmp_ptr[56] >> 1);
651 
652         i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
653 
654         i_y3 = (WORD32) pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24] - (pi2_tmp_ptr[24] >> 1);
655 
656         i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
657 
658         i_y5 =
659             (WORD32) (-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40] + (pi2_tmp_ptr[40] >> 1);
660 
661         i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
662 
663         i_y7 = (WORD32) pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8] + (pi2_tmp_ptr[8] >> 1);
664 
665         /*------------------------------------------------------------------*/
666         /* z0j = y0j + y6j                                                  */
667         /* z1j = y1j + (y7j >> 2)                                           */
668         /* z2j = y2j + y4j                                                  */
669         /* z3j = y3j + (y5j >> 2)                                           */
670         /* z4j = y2j -y4j                                                   */
671         /* z5j = (y3j >> 2) -y5j                                            */
672         /* z6j = y0j -y6j                                                   */
673         /* z7j = y7j -(y1j >> 2)                                            */
674         /*------------------------------------------------------------------*/
675         i_z0 = i_y0 + i_y6;
676         i_z1 = i_y1 + (i_y7 >> 2);
677         i_z2 = i_y2 + i_y4;
678         i_z3 = i_y3 + (i_y5 >> 2);
679         i_z4 = i_y2 - i_y4;
680         i_z5 = (i_y3 >> 2) - i_y5;
681         i_z6 = i_y0 - i_y6;
682         i_z7 = i_y7 - (i_y1 >> 2);
683 
684         /*------------------------------------------------------------------*/
685         /* x0j = z0j + z7j                                                  */
686         /* x1j = z2j + z5j                                                  */
687         /* x2j = z4j + z3j                                                  */
688         /* x3j = z6j + z1j                                                  */
689         /* x4j = z6j -z1j                                                   */
690         /* x5j = z4j -z3j                                                   */
691         /* x6j = z2j -z5j                                                   */
692         /* x7j = z0j -z7j                                                   */
693         /*------------------------------------------------------------------*/
694         i_macro = CLIP_RSD(((i_z0 + i_z7 + 32) >> 6) + (*pi2_rsd_ptr));
695         i4_nnz_H |= !!i_macro;
696         i_macro += *pu1_pred_ptr;
697         *pu1_out = CLIP_U8(i_macro);
698         /* Change uc_recBuffer to Point to next element in the same column*/
699         pu1_pred_ptr += pred_strd;
700         pi2_rsd_ptr += rsd_strd;
701         pu1_out += out_strd;
702 
703         i_macro = CLIP_RSD(((i_z2 + i_z5 + 32) >> 6) + (*pi2_rsd_ptr));
704         i4_nnz_H |= !!i_macro;
705         i_macro += *pu1_pred_ptr;
706         *pu1_out = CLIP_U8(i_macro);
707         pu1_pred_ptr += pred_strd;
708         pi2_rsd_ptr += rsd_strd;
709         pu1_out += out_strd;
710 
711         i_macro = CLIP_RSD(((i_z4 + i_z3 + 32) >> 6) + (*pi2_rsd_ptr));
712         i4_nnz_H |= !!i_macro;
713         i_macro += *pu1_pred_ptr;
714         *pu1_out = CLIP_U8(i_macro);
715         pu1_pred_ptr += pred_strd;
716         pi2_rsd_ptr += rsd_strd;
717         pu1_out += out_strd;
718 
719         i_macro = CLIP_RSD(((i_z6 + i_z1 + 32) >> 6) + (*pi2_rsd_ptr));
720         i4_nnz_H |= !!i_macro;
721         i_macro += *pu1_pred_ptr;
722         *pu1_out = CLIP_U8(i_macro);
723         pu1_pred_ptr += pred_strd;
724         pi2_rsd_ptr += rsd_strd;
725         pu1_out += out_strd;
726 
727         i_macro = CLIP_RSD(((i_z6 - i_z1 + 32) >> 6) + (*pi2_rsd_ptr));
728         i4_nnz_L |= !!i_macro;
729         i_macro += *pu1_pred_ptr;
730         *pu1_out = CLIP_U8(i_macro);
731         pu1_pred_ptr += pred_strd;
732         pi2_rsd_ptr += rsd_strd;
733         pu1_out += out_strd;
734 
735         i_macro = CLIP_RSD(((i_z4 - i_z3 + 32) >> 6) + (*pi2_rsd_ptr));
736         i4_nnz_L |= !!i_macro;
737         i_macro += *pu1_pred_ptr;
738         *pu1_out = CLIP_U8(i_macro);
739         pu1_pred_ptr += pred_strd;
740         pi2_rsd_ptr += rsd_strd;
741         pu1_out += out_strd;
742 
743         i_macro = CLIP_RSD(((i_z2 - i_z5 + 32) >> 6) + (*pi2_rsd_ptr));
744         i4_nnz_L |= !!i_macro;
745         i_macro += *pu1_pred_ptr;
746         *pu1_out = CLIP_U8(i_macro);
747         pu1_pred_ptr += pred_strd;
748         pi2_rsd_ptr += rsd_strd;
749         pu1_out += out_strd;
750 
751         i_macro = CLIP_RSD(((i_z0 - i_z7 + 32) >> 6) + (*pi2_rsd_ptr));
752         i4_nnz_L |= !!i_macro;
753         i_macro += *pu1_pred_ptr;
754         *pu1_out = CLIP_U8(i_macro);
755 
756         pi2_tmp_ptr++;
757         pu1_out_ptr++;
758         pi2_rsd++;
759         pu1_pred++;
760         if(i == 3)
761         {
762             i4_nnz = i4_nnz_H | (i4_nnz_L << 4);
763             i4_nnz_L = 0;
764             i4_nnz_H = 0;
765         }
766     }
767     i4_nnz |= (i4_nnz_H << 1) | (i4_nnz_L << 5);
768     return i4_nnz;
769 }
770 
771 /*****************************************************************************/
772 /*                                                                           */
773 /*  Function Name : isvcd_iquant_itrans_residual_recon_8x8_dc                 */
774 /*                                                                           */
775 /*  Description   : this function computes the recon output from the         */
776 /*                  IQ+IT+RESD                                               */
777 /*                                                                           */
778 /*  Inputs        :                                                          */
779 /*  Globals       : none                                                     */
780 /*  Processing    :                                                          */
781 /*                                                                           */
782 /*  Outputs       : i4_nnz                                                   */
783 /*  Returns       : none                                                     */
784 /*                                                                           */
785 /*  Issues        : none                                                     */
786 /*                                                                           */
787 /*  Revision History:                                                        */
788 /*                                                                           */
789 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
790 /*         25 11 2021   Kishore               creation                       */
791 /*                                                                           */
792 /*****************************************************************************/
793 
isvcd_iquant_itrans_residual_recon_8x8_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_rsd,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 rsd_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)794 WORD32 isvcd_iquant_itrans_residual_recon_8x8_dc(WORD16 *pi2_src, UWORD8 *pu1_pred, WORD16 *pi2_rsd,
795                                                  UWORD8 *pu1_out, WORD32 pred_strd, WORD32 rsd_strd,
796                                                  WORD32 out_strd, const UWORD16 *pu2_iscale_mat,
797                                                  const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
798                                                  WORD16 *pi2_tmp, WORD32 iq_start_idx,
799                                                  WORD16 *pi2_dc_ld_addr)
800 {
801     WORD32 i4_nnz = 0, i4_nnz_H = 0, i4_nnz_L = 0;
802     UWORD8 *pu1_pred_ptr = pu1_pred;
803     WORD16 *pi2_rsd_ptr = pi2_rsd;
804     UWORD8 *pu1_out_ptr = pu1_out;
805     WORD16 x, i, i_macro;
806     WORD32 q;
807     WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
808     UNUSED(pi2_tmp);
809     UNUSED(iq_start_idx);
810     UNUSED(pi2_dc_ld_addr);
811     /*************************************************************/
812     /* Dequantization of coefficients. Will be replaced by SIMD  */
813     /* operations on platform. Note : DC coeff is not scaled     */
814     /*************************************************************/
815     q = pi2_src[0];
816     INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
817     i_macro = (q + 32) >> 6;
818     /* Perform Inverse transform */
819     /*--------------------------------------------------------------------*/
820     /* IDCT [ Horizontal transformation ]                                 */
821     /*--------------------------------------------------------------------*/
822     /*--------------------------------------------------------------------*/
823     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
824     /*                                                                    */
825     /* Add the prediction and store it back to reconstructed frame buffer */
826     /* [Prediction buffer itself in this case]                            */
827     /*--------------------------------------------------------------------*/
828     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
829     {
830         pu1_pred_ptr = pu1_pred;
831         pi2_rsd_ptr = pi2_rsd;
832         pu1_out = pu1_out_ptr;
833 
834         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
835         i4_nnz_H |= !!x;
836         x += *pu1_pred_ptr;
837         *pu1_out = CLIP_U8(x);
838         /* Change uc_recBuffer to Point to next element in the same column*/
839         pu1_pred_ptr += pred_strd;
840         pi2_rsd_ptr += rsd_strd;
841         pu1_out += out_strd;
842 
843         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
844         i4_nnz_H |= !!x;
845         x += *pu1_pred_ptr;
846         *pu1_out = CLIP_U8(x);
847         pu1_pred_ptr += pred_strd;
848         pi2_rsd_ptr += rsd_strd;
849         pu1_out += out_strd;
850 
851         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
852         i4_nnz_H |= !!x;
853         x += *pu1_pred_ptr;
854         *pu1_out = CLIP_U8(x);
855         pu1_pred_ptr += pred_strd;
856         pi2_rsd_ptr += rsd_strd;
857         pu1_out += out_strd;
858 
859         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
860         i4_nnz_H |= !!x;
861         x += *pu1_pred_ptr;
862         *pu1_out = CLIP_U8(x);
863         pu1_pred_ptr += pred_strd;
864         pi2_rsd_ptr += rsd_strd;
865         pu1_out += out_strd;
866 
867         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
868         i4_nnz_L |= !!x;
869         x += *pu1_pred_ptr;
870         *pu1_out = CLIP_U8(x);
871         pu1_pred_ptr += pred_strd;
872         pi2_rsd_ptr += rsd_strd;
873         pu1_out += out_strd;
874 
875         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
876         i4_nnz_L |= !!x;
877         x += *pu1_pred_ptr;
878         *pu1_out = CLIP_U8(x);
879         pu1_pred_ptr += pred_strd;
880         pi2_rsd_ptr += rsd_strd;
881         pu1_out += out_strd;
882 
883         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
884         i4_nnz_L |= !!x;
885         x += *pu1_pred_ptr;
886         *pu1_out = CLIP_U8(x);
887         pu1_pred_ptr += pred_strd;
888         pi2_rsd_ptr += rsd_strd;
889         pu1_out += out_strd;
890 
891         x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
892         i4_nnz_L |= !!x;
893         x += *pu1_pred_ptr;
894         *pu1_out = CLIP_U8(x);
895 
896         pu1_out_ptr++;
897         pu1_pred++;
898         pi2_rsd++;
899         if(i == 3)
900         {
901             i4_nnz = i4_nnz_H | (i4_nnz_L << 4);
902             i4_nnz_L = 0;
903             i4_nnz_H = 0;
904         }
905     }
906     i4_nnz |= (i4_nnz_H << 1) | (i4_nnz_L << 5);
907     return i4_nnz;
908 }
909