xref: /aosp_15_r20/external/libavc/decoder/svc/isvcd_iquant_itrans_residual.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2022 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 /**
21  *******************************************************************************
22  * @file
23  *  isvcd_iquant_itrans_residual.c
24  *
25  * @brief
26  *  Contains definition of functions for h264 inverse quantization inverse
27  *transformation and resd comp
28  *
29  * @author
30  *  Kishore
31  *
32  *  @par List of Functions:
33  *  - isvcd_iquant_itrans_residual_4x4()
34  *  - isvcd_iquant_itrans_residual_8x8()
35  *  - isvcd_iquant_itrans_residual_4x4_dc()
36  *  - isvcd_iquant_itrans_residual_8x8_dc()
37  *  - isvcd_iquant_itrans_residual_chroma_4x4()
38  *  - isvcd_iquant_itrans_residual_chroma_4x4_dc()
39  *
40  * @remarks
41  *  None
42  *
43  *******************************************************************************
44  */
45 
46 /* User include files */
47 #include "ih264_typedefs.h"
48 #include "ih264_defs.h"
49 #include "ih264_trans_macros.h"
50 #include "ih264_macros.h"
51 #include "ih264_platform_macros.h"
52 #include "ih264_trans_data.h"
53 #include "ih264_size_defs.h"
54 #include "ih264_structs.h"
55 #include "isvcd_iquant_itrans_residual.h"
56 
57 /*****************************************************************************/
58 /*                                                                           */
59 /*  Function Name : isvcd_iquant_itrans_residual_4x4                          */
60 /*                                                                           */
61 /*  Description   : this function computes the resd output from the          */
62 /*                  IQ+IT                                                    */
63 /*                                                                           */
64 /*  Inputs        :                                                          */
65 /*  Globals       : none                                                     */
66 /*  Processing    :                                                          */
67 /*                                                                           */
68 /*  Outputs       : i4_nnz                                                   */
69 /*  Returns       : none                                                     */
70 /*                                                                           */
71 /*  Issues        : none                                                     */
72 /*                                                                           */
73 /*  Revision History:                                                        */
74 /*                                                                           */
75 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
76 /*         25 11 2021   Kishore               creation                       */
77 /*                                                                           */
78 /*****************************************************************************/
79 
isvcd_iquant_itrans_residual_4x4(WORD16 * pi2_src,WORD16 * pi2_pred,WORD16 * pi2_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)80 WORD32 isvcd_iquant_itrans_residual_4x4(WORD16 *pi2_src, WORD16 *pi2_pred, WORD16 *pi2_out,
81                                         WORD32 pred_strd, WORD32 out_strd,
82                                         const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat,
83                                         UWORD32 u4_qp_div_6, WORD16 *pi2_tmp, WORD32 iq_start_idx,
84                                         WORD16 *pi2_dc_ld_addr)
85 {
86     WORD16 *pi2_src_ptr = pi2_src;
87     WORD16 *pi2_tmp_ptr = pi2_tmp;
88     WORD16 *pi2_pred_ptr = pi2_pred;
89     WORD16 *pi2_out_ptr = pi2_out;
90     WORD16 x0, x1, x2, x3, i;
91     WORD32 q0, q1, q2, q3;
92     WORD16 i_macro;
93     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
94     WORD32 i4_nnz = 0;
95 
96     /* inverse quant */
97     /*horizontal inverse transform */
98     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
99     {
100         q0 = pi2_src_ptr[0];
101         INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
102         if(i == 0 && iq_start_idx == 1)
103             q0 = pi2_dc_ld_addr[0];  // Restoring dc value for intra case
104 
105         q2 = pi2_src_ptr[2];
106         INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
107 
108         x0 = q0 + q2;
109         x1 = q0 - q2;
110 
111         q1 = pi2_src_ptr[1];
112         INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
113 
114         q3 = pi2_src_ptr[3];
115         INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
116 
117         x2 = (q1 >> 1) - q3;
118         x3 = q1 + (q3 >> 1);
119 
120         pi2_tmp_ptr[0] = x0 + x3;
121         pi2_tmp_ptr[1] = x1 + x2;
122         pi2_tmp_ptr[2] = x1 - x2;
123         pi2_tmp_ptr[3] = x0 - x3;
124 
125         pi2_src_ptr += SUB_BLK_WIDTH_4x4;
126         pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
127         pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
128         pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
129     }
130 
131     /* vertical inverse transform */
132     pi2_tmp_ptr = pi2_tmp;
133     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
134     {
135         pi2_pred_ptr = pi2_pred;
136         pi2_out = pi2_out_ptr;
137 
138         x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
139         x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
140         x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
141         x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
142 
143         /* inverse prediction */
144         i_macro = x0 + x3;
145         i_macro = ((i_macro + 32) >> 6);
146         i_macro = CLIP_RSD(i_macro + (*pi2_pred_ptr));
147         i4_nnz |= !!i_macro;
148         *pi2_out = i_macro;
149         pi2_pred_ptr += pred_strd;
150         pi2_out += out_strd;
151 
152         i_macro = x1 + x2;
153         i_macro = ((i_macro + 32) >> 6);
154         i_macro = CLIP_RSD(i_macro + (*pi2_pred_ptr));
155         i4_nnz |= !!i_macro;
156         *pi2_out = i_macro;
157         pi2_pred_ptr += pred_strd;
158         pi2_out += out_strd;
159 
160         i_macro = x1 - x2;
161         i_macro = ((i_macro + 32) >> 6);
162         i_macro = CLIP_RSD(i_macro + (*pi2_pred_ptr));
163         i4_nnz |= !!i_macro;
164         *pi2_out = i_macro;
165         pi2_pred_ptr += pred_strd;
166         pi2_out += out_strd;
167 
168         i_macro = x0 - x3;
169         i_macro = ((i_macro + 32) >> 6);
170         i_macro = CLIP_RSD(i_macro + (*pi2_pred_ptr));
171         i4_nnz |= !!i_macro;
172         *pi2_out = i_macro;
173 
174         pi2_tmp_ptr++;
175         pi2_out_ptr++;
176         pi2_pred++;
177     }
178     return i4_nnz;
179 }
180 /*****************************************************************************/
181 /*                                                                           */
182 /*  Function Name : isvcd_iquant_itrans_residual_4x4_dc                       */
183 /*                                                                           */
184 /*  Description   : this function computes the resd output from the          */
185 /*                  IQ+IT                                                    */
186 /*                                                                           */
187 /*  Inputs        :                                                          */
188 /*  Globals       : none                                                     */
189 /*  Processing    :                                                          */
190 /*                                                                           */
191 /*  Outputs       : i4_nnz                                                   */
192 /*  Returns       : none                                                     */
193 /*                                                                           */
194 /*  Issues        : none                                                     */
195 /*                                                                           */
196 /*  Revision History:                                                        */
197 /*                                                                           */
198 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
199 /*         25 11 2021   Kishore               creation                       */
200 /*                                                                           */
201 /*****************************************************************************/
202 
isvcd_iquant_itrans_residual_4x4_dc(WORD16 * pi2_src,WORD16 * pi2_pred,WORD16 * pi2_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)203 WORD32 isvcd_iquant_itrans_residual_4x4_dc(WORD16 *pi2_src, WORD16 *pi2_pred, WORD16 *pi2_out,
204                                            WORD32 pred_strd, WORD32 out_strd,
205                                            const UWORD16 *pu2_iscal_mat,
206                                            const UWORD16 *pu2_weigh_mat, UWORD32 u4_qp_div_6,
207                                            WORD16 *pi2_tmp, WORD32 iq_start_idx,
208                                            WORD16 *pi2_dc_ld_addr)
209 {
210     WORD32 i4_nnz = 0;
211     WORD16 *pi2_pred_ptr = pi2_pred;
212     WORD16 *pi2_out_ptr = pi2_out;
213     WORD32 q0;
214     WORD16 i_macro, i;
215     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
216     UNUSED(pi2_tmp);
217 
218     if(iq_start_idx == 0)
219     {
220         q0 = pi2_src[0];
221         INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
222     }
223     else
224     {
225         q0 = pi2_dc_ld_addr[0];  // Restoring dc value for intra case3
226     }
227     i_macro = ((q0 + 32) >> 6);
228     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
229     {
230         pi2_pred_ptr = pi2_pred;
231         pi2_out = pi2_out_ptr;
232 
233         /* inverse prediction */
234         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
235         i4_nnz |= !!(*pi2_out);
236         pi2_pred_ptr += pred_strd;
237         pi2_out += out_strd;
238 
239         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
240         i4_nnz |= !!(*pi2_out);
241         pi2_pred_ptr += pred_strd;
242         pi2_out += out_strd;
243 
244         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
245         i4_nnz |= !!(*pi2_out);
246         pi2_pred_ptr += pred_strd;
247         pi2_out += out_strd;
248 
249         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
250         i4_nnz |= !!(*pi2_out);
251 
252         pi2_out_ptr++;
253         pi2_pred++;
254     }
255     return i4_nnz;
256 }
257 /*****************************************************************************/
258 /*                                                                           */
259 /*  Function Name : isvcd_iquant_itrans_residual_8x8                          */
260 /*                                                                           */
261 /*  Description   : this function computes the resd output from the          */
262 /*                  IQ+IT                                                    */
263 /*                                                                           */
264 /*  Inputs        :                                                          */
265 /*  Globals       : none                                                     */
266 /*  Processing    :                                                          */
267 /*                                                                           */
268 /*  Outputs       : i4_nnz                                                   */
269 /*  Returns       : none                                                     */
270 /*                                                                           */
271 /*  Issues        : none                                                     */
272 /*                                                                           */
273 /*  Revision History:                                                        */
274 /*                                                                           */
275 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
276 /*         25 11 2021   Kishore               creation                       */
277 /*                                                                           */
278 /*****************************************************************************/
279 
isvcd_iquant_itrans_residual_8x8(WORD16 * pi2_src,WORD16 * pi2_pred,WORD16 * pi2_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)280 WORD32 isvcd_iquant_itrans_residual_8x8(WORD16 *pi2_src, WORD16 *pi2_pred, WORD16 *pi2_out,
281                                         WORD32 pred_strd, WORD32 out_strd,
282                                         const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat,
283                                         UWORD32 qp_div, WORD16 *pi2_tmp, WORD32 iq_start_idx,
284                                         WORD16 *pi2_dc_ld_addr)
285 {
286     WORD32 i4_nnz = 0, i4_nnz_H = 0, i4_nnz_L = 0;
287     WORD32 i;
288     WORD16 *pi2_tmp_ptr = pi2_tmp;
289     WORD16 *pi2_pred_ptr = pi2_pred;
290     WORD16 *pi2_out_ptr = pi2_out;
291     WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
292     WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
293     WORD32 q;
294     WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
295     UNUSED(iq_start_idx);
296     UNUSED(pi2_dc_ld_addr);
297     /*************************************************************/
298     /* De quantization of coefficients. Will be replaced by SIMD */
299     /* operations on platform. Note : DC coeff is not scaled     */
300     /*************************************************************/
301     for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
302     {
303         q = pi2_src[i];
304         INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
305         pi2_tmp_ptr[i] = q;
306     }
307     /* Perform Inverse transform */
308     /*--------------------------------------------------------------------*/
309     /* IDCT [ Horizontal transformation ]                                 */
310     /*--------------------------------------------------------------------*/
311     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
312     {
313         /*------------------------------------------------------------------*/
314         /* y0 = w0 + w4                                                     */
315         /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
316         /* y2 = w0 - w4                                                     */
317         /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
318         /* y4 = (w2 >> 1) - w6                                              */
319         /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
320         /* y6 = w2 + (w6 >> 1)                                              */
321         /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
322         /*------------------------------------------------------------------*/
323         i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4]);
324 
325         i_y1 =
326             ((WORD32) (-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7] - (pi2_tmp_ptr[7] >> 1));
327 
328         i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4]);
329         i_y3 = ((WORD32) pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3] - (pi2_tmp_ptr[3] >> 1));
330         i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6]);
331 
332         i_y5 =
333             ((WORD32) (-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5] + (pi2_tmp_ptr[5] >> 1));
334 
335         i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
336         i_y7 = ((WORD32) pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1] + (pi2_tmp_ptr[1] >> 1));
337 
338         /*------------------------------------------------------------------*/
339         /* z0 = y0 + y6                                                     */
340         /* z1 = y1 + (y7 >> 2)                                              */
341         /* z2 = y2 + y4                                                     */
342         /* z3 = y3 + (y5 >> 2)                                              */
343         /* z4 = y2 - y4                                                     */
344         /* z5 = (y3 >> 2) - y5                                              */
345         /* z6 = y0 - y6                                                     */
346         /* z7 = y7 - (y1 >> 2)                                              */
347         /*------------------------------------------------------------------*/
348         i_z0 = i_y0 + i_y6;
349         i_z1 = i_y1 + (i_y7 >> 2);
350         i_z2 = i_y2 + i_y4;
351         i_z3 = i_y3 + (i_y5 >> 2);
352         i_z4 = i_y2 - i_y4;
353         i_z5 = (i_y3 >> 2) - i_y5;
354         i_z6 = i_y0 - i_y6;
355         i_z7 = i_y7 - (i_y1 >> 2);
356 
357         /*------------------------------------------------------------------*/
358         /* x0 = z0 + z7                                                     */
359         /* x1 = z2 + z5                                                     */
360         /* x2 = z4 + z3                                                     */
361         /* x3 = z6 + z1                                                     */
362         /* x4 = z6 - z1                                                     */
363         /* x5 = z4 - z3                                                     */
364         /* x6 = z2 - z5                                                     */
365         /* x7 = z0 - z7                                                     */
366         /*------------------------------------------------------------------*/
367         pi2_tmp_ptr[0] = i_z0 + i_z7;
368         pi2_tmp_ptr[1] = i_z2 + i_z5;
369         pi2_tmp_ptr[2] = i_z4 + i_z3;
370         pi2_tmp_ptr[3] = i_z6 + i_z1;
371         pi2_tmp_ptr[4] = i_z6 - i_z1;
372         pi2_tmp_ptr[5] = i_z4 - i_z3;
373         pi2_tmp_ptr[6] = i_z2 - i_z5;
374         pi2_tmp_ptr[7] = i_z0 - i_z7;
375 
376         /* move to the next row */
377         // pi2_src_ptr += SUB_BLK_WIDTH_8x8;
378         pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
379     }
380     /*--------------------------------------------------------------------*/
381     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
382     /*                                                                    */
383     /* Add the prediction and store it back to reconstructed frame buffer */
384     /* [Prediction buffer itself in this case]                            */
385     /*--------------------------------------------------------------------*/
386 
387     pi2_tmp_ptr = pi2_tmp;
388     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
389     {
390         pi2_pred_ptr = pi2_pred;
391         pi2_out = pi2_out_ptr;
392         /*------------------------------------------------------------------*/
393         /* y0j = w0j + w4j                                                  */
394         /* y1j = -w3j + w5j -w7j -(w7j >> 1)                                */
395         /* y2j = w0j -w4j                                                   */
396         /* y3j = w1j + w7j -w3j -(w3j >> 1)                                 */
397         /* y4j = ( w2j >> 1 ) -w6j                                          */
398         /* y5j = -w1j + w7j + w5j + (w5j >> 1)                              */
399         /* y6j = w2j + ( w6j >> 1 )                                         */
400         /* y7j = w3j + w5j + w1j + (w1j >> 1)                               */
401         /*------------------------------------------------------------------*/
402         i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
403 
404         i_y1 = (WORD32) (-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56] -
405                (pi2_tmp_ptr[56] >> 1);
406 
407         i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
408         i_y3 = (WORD32) pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24] - (pi2_tmp_ptr[24] >> 1);
409         i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
410 
411         i_y5 =
412             (WORD32) (-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40] + (pi2_tmp_ptr[40] >> 1);
413 
414         i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
415         i_y7 = (WORD32) pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8] + (pi2_tmp_ptr[8] >> 1);
416 
417         /*------------------------------------------------------------------*/
418         /* z0j = y0j + y6j                                                  */
419         /* z1j = y1j + (y7j >> 2)                                           */
420         /* z2j = y2j + y4j                                                  */
421         /* z3j = y3j + (y5j >> 2)                                           */
422         /* z4j = y2j -y4j                                                   */
423         /* z5j = (y3j >> 2) -y5j                                            */
424         /* z6j = y0j -y6j                                                   */
425         /* z7j = y7j -(y1j >> 2)                                            */
426         /*------------------------------------------------------------------*/
427         i_z0 = i_y0 + i_y6;
428         i_z1 = i_y1 + (i_y7 >> 2);
429         i_z2 = i_y2 + i_y4;
430         i_z3 = i_y3 + (i_y5 >> 2);
431         i_z4 = i_y2 - i_y4;
432         i_z5 = (i_y3 >> 2) - i_y5;
433         i_z6 = i_y0 - i_y6;
434         i_z7 = i_y7 - (i_y1 >> 2);
435 
436         /*------------------------------------------------------------------*/
437         /* x0j = z0j + z7j                                                  */
438         /* x1j = z2j + z5j                                                  */
439         /* x2j = z4j + z3j                                                  */
440         /* x3j = z6j + z1j                                                  */
441         /* x4j = z6j -z1j                                                   */
442         /* x5j = z4j -z3j                                                   */
443         /* x6j = z2j -z5j                                                   */
444         /* x7j = z0j -z7j                                                   */
445         /*------------------------------------------------------------------*/
446         *pi2_out = CLIP_RSD(((i_z0 + i_z7 + 32) >> 6) + (*pi2_pred_ptr));
447         i4_nnz_H |= !!(*pi2_out);
448         /* Change uc_recBuffer to Point to next element in the same column*/
449         pi2_pred_ptr += pred_strd;
450         pi2_out += out_strd;
451 
452         *pi2_out = CLIP_RSD(((i_z2 + i_z5 + 32) >> 6) + (*pi2_pred_ptr));
453         i4_nnz_H |= !!(*pi2_out);
454         pi2_pred_ptr += pred_strd;
455         pi2_out += out_strd;
456 
457         *pi2_out = CLIP_RSD(((i_z4 + i_z3 + 32) >> 6) + (*pi2_pred_ptr));
458         i4_nnz_H |= !!(*pi2_out);
459         pi2_pred_ptr += pred_strd;
460         pi2_out += out_strd;
461 
462         *pi2_out = CLIP_RSD(((i_z6 + i_z1 + 32) >> 6) + (*pi2_pred_ptr));
463         i4_nnz_H |= !!(*pi2_out);
464         pi2_pred_ptr += pred_strd;
465         pi2_out += out_strd;
466 
467         *pi2_out = CLIP_RSD(((i_z6 - i_z1 + 32) >> 6) + (*pi2_pred_ptr));
468         i4_nnz_L |= !!(*pi2_out);
469         pi2_pred_ptr += pred_strd;
470         pi2_out += out_strd;
471 
472         *pi2_out = CLIP_RSD(((i_z4 - i_z3 + 32) >> 6) + (*pi2_pred_ptr));
473         i4_nnz_L |= !!(*pi2_out);
474         pi2_pred_ptr += pred_strd;
475         pi2_out += out_strd;
476 
477         *pi2_out = CLIP_RSD(((i_z2 - i_z5 + 32) >> 6) + (*pi2_pred_ptr));
478         i4_nnz_L |= !!(*pi2_out);
479         pi2_pred_ptr += pred_strd;
480         pi2_out += out_strd;
481 
482         *pi2_out = CLIP_RSD(((i_z0 - i_z7 + 32) >> 6) + (*pi2_pred_ptr));
483         i4_nnz_L |= !!(*pi2_out);
484 
485         pi2_tmp_ptr++;
486         pi2_out_ptr++;
487         pi2_pred++;
488         if(i == 3)
489         {
490             i4_nnz = i4_nnz_H | (i4_nnz_L << 4);
491             i4_nnz_L = 0;
492             i4_nnz_H = 0;
493         }
494     }
495     i4_nnz |= (i4_nnz_H << 1) | (i4_nnz_L << 5);
496     return i4_nnz;
497 }
498 /*****************************************************************************/
499 /*                                                                           */
500 /*  Function Name : isvcd_iquant_itrans_residual_8x8_dc                       */
501 /*                                                                           */
502 /*  Description   : this function computes the resd output from the          */
503 /*                  IQ+IT                                                    */
504 /*                                                                           */
505 /*  Inputs        :                                                          */
506 /*  Globals       : none                                                     */
507 /*  Processing    :                                                          */
508 /*                                                                           */
509 /*  Outputs       : i4_nnz                                                   */
510 /*  Returns       : none                                                     */
511 /*                                                                           */
512 /*  Issues        : none                                                     */
513 /*                                                                           */
514 /*  Revision History:                                                        */
515 /*                                                                           */
516 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
517 /*         25 11 2021   Kishore               creation                       */
518 /*                                                                           */
519 /*****************************************************************************/
520 
isvcd_iquant_itrans_residual_8x8_dc(WORD16 * pi2_src,WORD16 * pi2_pred,WORD16 * pi2_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)521 WORD32 isvcd_iquant_itrans_residual_8x8_dc(WORD16 *pi2_src, WORD16 *pi2_pred, WORD16 *pi2_out,
522                                            WORD32 pred_strd, WORD32 out_strd,
523                                            const UWORD16 *pu2_iscale_mat,
524                                            const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
525                                            WORD16 *pi2_tmp, WORD32 iq_start_idx,
526                                            WORD16 *pi2_dc_ld_addr)
527 {
528     WORD32 i4_nnz = 0, i4_nnz_H = 0, i4_nnz_L = 0;
529     WORD16 *pi2_pred_ptr = pi2_pred;
530     WORD16 *pi2_out_ptr = pi2_out;
531     WORD16 i, i_macro;
532     WORD32 q;
533     WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
534     UNUSED(pi2_tmp);
535     UNUSED(iq_start_idx);
536     UNUSED(pi2_dc_ld_addr);
537     /*************************************************************/
538     /* Dequantization of coefficients. Will be replaced by SIMD  */
539     /* operations on platform. Note : DC coeff is not scaled     */
540     /*************************************************************/
541     q = pi2_src[0];
542     INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
543     i_macro = (q + 32) >> 6;
544     /* Perform Inverse transform */
545     /*--------------------------------------------------------------------*/
546     /* IDCT [ Horizontal transformation ]                                 */
547     /*--------------------------------------------------------------------*/
548     /*--------------------------------------------------------------------*/
549     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
550     /*                                                                    */
551     /* Add the prediction and store it back to reconstructed frame buffer */
552     /* [Prediction buffer itself in this case]                            */
553     /*--------------------------------------------------------------------*/
554     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
555     {
556         pi2_pred_ptr = pi2_pred;
557         pi2_out = pi2_out_ptr;
558 
559         /* Change uc_recBuffer to Point to next element in the same column*/
560         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
561         i4_nnz_H |= !!(*pi2_out);
562         pi2_pred_ptr += pred_strd;
563         pi2_out += out_strd;
564 
565         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
566         i4_nnz_H |= !!(*pi2_out);
567         pi2_pred_ptr += pred_strd;
568         pi2_out += out_strd;
569 
570         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
571         i4_nnz_H |= !!(*pi2_out);
572         pi2_pred_ptr += pred_strd;
573         pi2_out += out_strd;
574 
575         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
576         i4_nnz_H |= !!(*pi2_out);
577         pi2_pred_ptr += pred_strd;
578         pi2_out += out_strd;
579 
580         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
581         i4_nnz_L |= !!(*pi2_out);
582         pi2_pred_ptr += pred_strd;
583         pi2_out += out_strd;
584 
585         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
586         i4_nnz_L |= !!(*pi2_out);
587         pi2_pred_ptr += pred_strd;
588         pi2_out += out_strd;
589 
590         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
591         i4_nnz_L |= !!(*pi2_out);
592         pi2_pred_ptr += pred_strd;
593         pi2_out += out_strd;
594 
595         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
596         i4_nnz_L |= !!(*pi2_out);
597 
598         pi2_out_ptr++;
599         pi2_pred++;
600         if(i == 3)
601         {
602             i4_nnz = i4_nnz_H | (i4_nnz_L << 4);
603             i4_nnz_L = 0;
604             i4_nnz_H = 0;
605         }
606     }
607     i4_nnz |= (i4_nnz_H << 1) | (i4_nnz_L << 5);
608     return i4_nnz;
609 }
610 /*****************************************************************************/
611 /*                                                                           */
612 /*  Function Name : isvcd_iquant_itrans_residual_chroma_4x4                   */
613 /*                                                                           */
614 /*  Description   : this function computes the resd output from the          */
615 /*                  IQ+IT                                                    */
616 /*                                                                           */
617 /*  Inputs        :                                                          */
618 /*  Globals       : none                                                     */
619 /*  Processing    :                                                          */
620 /*                                                                           */
621 /*  Outputs       : i4_nnz                                                   */
622 /*  Returns       : none                                                     */
623 /*                                                                           */
624 /*  Issues        : none                                                     */
625 /*                                                                           */
626 /*  Revision History:                                                        */
627 /*                                                                           */
628 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
629 /*         25 11 2021   Kishore               creation                       */
630 /*                                                                           */
631 /*****************************************************************************/
632 
isvcd_iquant_itrans_residual_chroma_4x4(WORD16 * pi2_src,WORD16 * pi2_pred,WORD16 * pi2_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)633 WORD32 isvcd_iquant_itrans_residual_chroma_4x4(WORD16 *pi2_src, WORD16 *pi2_pred, WORD16 *pi2_out,
634                                                WORD32 pred_strd, WORD32 out_strd,
635                                                const UWORD16 *pu2_iscal_mat,
636                                                const UWORD16 *pu2_weigh_mat, UWORD32 u4_qp_div_6,
637                                                WORD16 *pi2_tmp, WORD16 *pi2_dc_src)
638 {
639     WORD32 i4_nnz = 0;
640     WORD16 *pi2_src_ptr = pi2_src;
641     WORD16 *pi2_tmp_ptr = pi2_tmp;
642     WORD16 *pi2_pred_ptr = pi2_pred;
643     WORD16 *pi2_out_ptr = pi2_out;
644     WORD16 x0, x1, x2, x3, i;
645     WORD32 q0, q1, q2, q3;
646     WORD16 i_macro;
647     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
648 
649     /* inverse quant */
650     /*horizontal inverse transform */
651     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
652     {
653         if(i == 0)
654         {
655             q0 = pi2_dc_src[0];
656         }
657         else
658         {
659             q0 = pi2_src_ptr[0];
660             INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
661         }
662 
663         q2 = pi2_src_ptr[2];
664         INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
665 
666         x0 = q0 + q2;
667         x1 = q0 - q2;
668 
669         q1 = pi2_src_ptr[1];
670         INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
671 
672         q3 = pi2_src_ptr[3];
673         INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
674 
675         x2 = (q1 >> 1) - q3;
676         x3 = q1 + (q3 >> 1);
677 
678         pi2_tmp_ptr[0] = x0 + x3;
679         pi2_tmp_ptr[1] = x1 + x2;
680         pi2_tmp_ptr[2] = x1 - x2;
681         pi2_tmp_ptr[3] = x0 - x3;
682 
683         pi2_src_ptr += SUB_BLK_WIDTH_4x4;
684         pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
685         pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
686         pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
687     }
688 
689     /* vertical inverse transform */
690     pi2_tmp_ptr = pi2_tmp;
691     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
692     {
693         pi2_pred_ptr = pi2_pred;
694         pi2_out = pi2_out_ptr;
695 
696         x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
697         x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
698         x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
699         x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
700 
701         /* inverse prediction */
702         i_macro = x0 + x3;
703         i_macro = ((i_macro + 32) >> 6);
704         i_macro = CLIP_RSD(i_macro + (*pi2_pred_ptr));
705         i4_nnz |= !!i_macro;
706         *pi2_out = i_macro;
707         pi2_pred_ptr += pred_strd;
708         pi2_out += out_strd;
709 
710         i_macro = x1 + x2;
711         i_macro = ((i_macro + 32) >> 6);
712         i_macro = CLIP_RSD(i_macro + (*pi2_pred_ptr));
713         i4_nnz |= !!i_macro;
714         *pi2_out = i_macro;
715         pi2_pred_ptr += pred_strd;
716         pi2_out += out_strd;
717 
718         i_macro = x1 - x2;
719         i_macro = ((i_macro + 32) >> 6);
720         i_macro = CLIP_RSD(i_macro + (*pi2_pred_ptr));
721         i4_nnz |= !!i_macro;
722         *pi2_out = i_macro;
723         pi2_pred_ptr += pred_strd;
724         pi2_out += out_strd;
725 
726         i_macro = x0 - x3;
727         i_macro = ((i_macro + 32) >> 6);
728         i_macro = CLIP_RSD(i_macro + (*pi2_pred_ptr));
729         i4_nnz |= !!i_macro;
730         *pi2_out = i_macro;
731 
732         pi2_tmp_ptr++;
733         pi2_out_ptr += 2;  // Interleaved store for output
734         pi2_pred += 2;     // Interleaved load for pred buffer
735     }
736     return i4_nnz;
737 }
738 /*****************************************************************************/
739 /*                                                                           */
740 /*  Function Name : isvcd_iquant_itrans_residual_chroma_4x4_dc                */
741 /*                                                                           */
742 /*  Description   : this function computes the resd output from the          */
743 /*                  IQ+IT                                                    */
744 /*                                                                           */
745 /*  Inputs        :                                                          */
746 /*  Globals       : none                                                     */
747 /*  Processing    :                                                          */
748 /*                                                                           */
749 /*  Outputs       : i4_nnz                                                   */
750 /*  Returns       : none                                                     */
751 /*                                                                           */
752 /*  Issues        : none                                                     */
753 /*                                                                           */
754 /*  Revision History:                                                        */
755 /*                                                                           */
756 /*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
757 /*         25 11 2021   Kishore               creation                       */
758 /*                                                                           */
759 /*****************************************************************************/
760 
isvcd_iquant_itrans_residual_chroma_4x4_dc(WORD16 * pi2_src,WORD16 * pi2_pred,WORD16 * pi2_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)761 WORD32 isvcd_iquant_itrans_residual_chroma_4x4_dc(WORD16 *pi2_src, WORD16 *pi2_pred,
762                                                   WORD16 *pi2_out, WORD32 pred_strd,
763                                                   WORD32 out_strd, const UWORD16 *pu2_iscal_mat,
764                                                   const UWORD16 *pu2_weigh_mat, UWORD32 u4_qp_div_6,
765                                                   WORD16 *pi2_tmp, WORD16 *pi2_dc_src)
766 {
767     WORD32 i4_nnz = 0;
768     WORD16 *pi2_pred_ptr = pi2_pred;
769     WORD16 *pi2_out_ptr = pi2_out;
770     WORD32 q0;
771     WORD16 i_macro, i;
772     UNUSED(pi2_src);
773     UNUSED(pu2_iscal_mat);
774     UNUSED(pu2_weigh_mat);
775     UNUSED(u4_qp_div_6);
776     UNUSED(pi2_tmp);
777 
778     q0 = pi2_dc_src[0];  // Restoring dc value for intra case3
779     i_macro = ((q0 + 32) >> 6);
780 
781     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
782     {
783         pi2_pred_ptr = pi2_pred;
784         pi2_out = pi2_out_ptr;
785 
786         /* inverse prediction */
787         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
788         i4_nnz |= !!(*pi2_out);
789         pi2_pred_ptr += pred_strd;
790         pi2_out += out_strd;
791 
792         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
793         i4_nnz |= !!(*pi2_out);
794         pi2_pred_ptr += pred_strd;
795         pi2_out += out_strd;
796 
797         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
798         i4_nnz |= !!(*pi2_out);
799         pi2_pred_ptr += pred_strd;
800         pi2_out += out_strd;
801 
802         *pi2_out = CLIP_RSD(i_macro + (*pi2_pred_ptr));
803         i4_nnz |= !!(*pi2_out);
804 
805         pi2_out_ptr += 2;
806         pi2_pred += 2;
807     }
808     return i4_nnz;
809 }
810