1 /******************************************************************************
2 *
3 * Copyright (C) 2022 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 * isvcd_iquant_itrans_residual_recon.c
24 *
25 * @brief
26 * Contains definition of functions for h264 inverse quantization inverse
27 *transformation and recon
28 *
29 * @author
30 * Kishore
31 *
32 * @par List of Functions:
33 * - isvcd_iquant_itrans_residual_recon_4x4()
34 * - isvcd_iquant_itrans_residual_recon_8x8()
35 * - isvcd_iquant_itrans_residual_recon_4x4_dc()
36 * - isvcd_iquant_itrans_residual_recon_8x8_dc()
37 * - isvcd_iquant_itrans_residual_recon_chroma_4x4()
38 * - isvcd_iquant_itrans_residual_recon_chroma_4x4_dc()
39 *
40 * @remarks
41 * None
42 *
43 *******************************************************************************
44 */
45
46 /* User include files */
47 #include "ih264_typedefs.h"
48 #include "ih264_defs.h"
49 #include "ih264_trans_macros.h"
50 #include "ih264_macros.h"
51 #include "ih264_platform_macros.h"
52 #include "ih264_trans_data.h"
53 #include "ih264_size_defs.h"
54 #include "ih264_structs.h"
55 #include "isvcd_iquant_itrans_residual_recon.h"
56
57 /*****************************************************************************/
58 /* */
59 /* Function Name : isvcd_iquant_itrans_residual_recon_4x4 */
60 /* */
61 /* Description : this function computes the recon output from the */
62 /* IQ+IT+RESD */
63 /* */
64 /* Inputs : */
65 /* Globals : none */
66 /* Processing : */
67 /* */
68 /* Outputs : i4_nnz */
69 /* Returns : none */
70 /* */
71 /* Issues : none */
72 /* */
73 /* Revision History: */
74 /* */
75 /* DD MM YYYY Author(s) Changes (Describe the changes made) */
76 /* 25 11 2021 Kishore creation */
77 /* */
78 /*****************************************************************************/
79
isvcd_iquant_itrans_residual_recon_4x4(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_rsd,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 rsd_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)80 WORD32 isvcd_iquant_itrans_residual_recon_4x4(WORD16 *pi2_src, UWORD8 *pu1_pred, WORD16 *pi2_rsd,
81 UWORD8 *pu1_out, WORD32 pred_strd, WORD32 rsd_strd,
82 WORD32 out_strd, const UWORD16 *pu2_iscal_mat,
83 const UWORD16 *pu2_weigh_mat, UWORD32 u4_qp_div_6,
84 WORD16 *pi2_tmp, WORD32 iq_start_idx,
85 WORD16 *pi2_dc_ld_addr)
86 {
87 WORD32 i4_nnz = 0;
88 WORD16 *pi2_src_ptr = pi2_src;
89 WORD16 *pi2_tmp_ptr = pi2_tmp;
90 UWORD8 *pu1_pred_ptr = pu1_pred;
91 WORD16 *pi2_rsd_ptr = pi2_rsd;
92 UWORD8 *pu1_out_ptr = pu1_out;
93 WORD16 x0, x1, x2, x3, i;
94 WORD32 q0, q1, q2, q3;
95 WORD16 i_macro;
96 WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
97
98 /* inverse quant */
99 /*horizontal inverse transform */
100 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
101 {
102 q0 = pi2_src_ptr[0];
103 INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
104 if(i == 0 && iq_start_idx == 1)
105 q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case
106
107 q2 = pi2_src_ptr[2];
108 INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
109
110 x0 = q0 + q2;
111 x1 = q0 - q2;
112
113 q1 = pi2_src_ptr[1];
114 INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
115
116 q3 = pi2_src_ptr[3];
117 INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
118
119 x2 = (q1 >> 1) - q3;
120 x3 = q1 + (q3 >> 1);
121
122 pi2_tmp_ptr[0] = x0 + x3;
123 pi2_tmp_ptr[1] = x1 + x2;
124 pi2_tmp_ptr[2] = x1 - x2;
125 pi2_tmp_ptr[3] = x0 - x3;
126
127 pi2_src_ptr += SUB_BLK_WIDTH_4x4;
128 pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
129 pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
130 pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
131 }
132
133 /* vertical inverse transform */
134 pi2_tmp_ptr = pi2_tmp;
135 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
136 {
137 pu1_pred_ptr = pu1_pred;
138 pi2_rsd_ptr = pi2_rsd;
139 pu1_out = pu1_out_ptr;
140
141 x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
142 x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
143 x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
144 x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
145
146 /* inverse prediction */
147 i_macro = x0 + x3;
148 i_macro = ((i_macro + 32) >> 6);
149 i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
150 i4_nnz |= !!i_macro;
151 i_macro += *pu1_pred_ptr;
152 *pu1_out = CLIP_U8(i_macro);
153 pu1_pred_ptr += pred_strd;
154 pi2_rsd_ptr += rsd_strd;
155 pu1_out += out_strd;
156
157 i_macro = x1 + x2;
158 i_macro = ((i_macro + 32) >> 6);
159 i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
160 i4_nnz |= !!i_macro;
161 i_macro += *pu1_pred_ptr;
162 *pu1_out = CLIP_U8(i_macro);
163 pu1_pred_ptr += pred_strd;
164 pi2_rsd_ptr += rsd_strd;
165 pu1_out += out_strd;
166
167 i_macro = x1 - x2;
168 i_macro = ((i_macro + 32) >> 6);
169 i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
170 i4_nnz |= !!i_macro;
171 i_macro += *pu1_pred_ptr;
172 *pu1_out = CLIP_U8(i_macro);
173 pu1_pred_ptr += pred_strd;
174 pi2_rsd_ptr += rsd_strd;
175 pu1_out += out_strd;
176
177 i_macro = x0 - x3;
178 i_macro = ((i_macro + 32) >> 6);
179 i4_nnz |= !!i_macro;
180 i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
181 i_macro += *pu1_pred_ptr;
182 *pu1_out = CLIP_U8(i_macro);
183
184 pi2_tmp_ptr++;
185 pu1_out_ptr++;
186 pi2_rsd++;
187 pu1_pred++;
188 }
189 return i4_nnz;
190 }
191
192 /*****************************************************************************/
193 /* */
194 /* Function Name : isvcd_iquant_itrans_residual_recon_4x4_dc */
195 /* */
196 /* Description : this function computes the recon output from the */
197 /* IQ+IT+RESD */
198 /* */
199 /* Inputs : */
200 /* Globals : none */
201 /* Processing : */
202 /* */
203 /* Outputs : i4_nnz */
204 /* Returns : none */
205 /* */
206 /* Issues : none */
207 /* */
208 /* Revision History: */
209 /* */
210 /* DD MM YYYY Author(s) Changes (Describe the changes made) */
211 /* 25 11 2021 Kishore creation */
212 /* */
213 /*****************************************************************************/
214
isvcd_iquant_itrans_residual_recon_4x4_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_rsd,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 rsd_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)215 WORD32 isvcd_iquant_itrans_residual_recon_4x4_dc(WORD16 *pi2_src, UWORD8 *pu1_pred, WORD16 *pi2_rsd,
216 UWORD8 *pu1_out, WORD32 pred_strd, WORD32 rsd_strd,
217 WORD32 out_strd, const UWORD16 *pu2_iscal_mat,
218 const UWORD16 *pu2_weigh_mat, UWORD32 u4_qp_div_6,
219 WORD16 *pi2_tmp, WORD32 iq_start_idx,
220 WORD16 *pi2_dc_ld_addr)
221 {
222 WORD32 i4_nnz = 0;
223 UWORD8 *pu1_pred_ptr = pu1_pred;
224 WORD16 *pi2_rsd_ptr = pi2_rsd;
225 UWORD8 *pu1_out_ptr = pu1_out;
226 WORD32 q0;
227 WORD16 x, i_macro, i;
228 WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
229 UNUSED(pi2_tmp);
230
231 if(iq_start_idx == 0)
232 {
233 q0 = pi2_src[0];
234 INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
235 }
236 else
237 {
238 q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case3
239 }
240 i_macro = ((q0 + 32) >> 6);
241 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
242 {
243 pu1_pred_ptr = pu1_pred;
244 pi2_rsd_ptr = pi2_rsd;
245 pu1_out = pu1_out_ptr;
246
247 /* inverse prediction */
248 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
249 i4_nnz |= !!x;
250 x += *pu1_pred_ptr;
251 *pu1_out = CLIP_U8(x);
252 pu1_pred_ptr += pred_strd;
253 pi2_rsd_ptr += rsd_strd;
254 pu1_out += out_strd;
255
256 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
257 i4_nnz |= !!x;
258 x += *pu1_pred_ptr;
259 *pu1_out = CLIP_U8(x);
260 pu1_pred_ptr += pred_strd;
261 pi2_rsd_ptr += rsd_strd;
262 pu1_out += out_strd;
263
264 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
265 i4_nnz |= !!x;
266 x += *pu1_pred_ptr;
267 *pu1_out = CLIP_U8(x);
268 pu1_pred_ptr += pred_strd;
269 pi2_rsd_ptr += rsd_strd;
270 pu1_out += out_strd;
271
272 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
273 i4_nnz |= !!x;
274 x += *pu1_pred_ptr;
275 *pu1_out = CLIP_U8(x);
276
277 pu1_out_ptr++;
278 pu1_pred++;
279 pi2_rsd++;
280 }
281 return i4_nnz;
282 }
283
284 /*****************************************************************************/
285 /* */
286 /* Function Name : isvcd_iquant_itrans_residual_recon_chroma_4x4 */
287 /* */
288 /* Description : this function computes the recon output from the */
289 /* IQ+IT+RESD */
290 /* */
291 /* Inputs : */
292 /* Globals : none */
293 /* Processing : */
294 /* */
295 /* Outputs : i4_nnz */
296 /* Returns : none */
297 /* */
298 /* Issues : none */
299 /* */
300 /* Revision History: */
301 /* */
302 /* DD MM YYYY Author(s) Changes (Describe the changes made) */
303 /* 25 11 2021 Kishore creation */
304 /* */
305 /*****************************************************************************/
306
isvcd_iquant_itrans_residual_recon_chroma_4x4(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_rsd,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 rsd_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)307 void isvcd_iquant_itrans_residual_recon_chroma_4x4(
308 WORD16 *pi2_src, UWORD8 *pu1_pred, WORD16 *pi2_rsd, UWORD8 *pu1_out, WORD32 pred_strd,
309 WORD32 rsd_strd, WORD32 out_strd, const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat,
310 UWORD32 u4_qp_div_6, WORD16 *pi2_tmp, WORD16 *pi2_dc_src)
311 {
312 WORD16 *pi2_src_ptr = pi2_src;
313 WORD16 *pi2_tmp_ptr = pi2_tmp;
314 UWORD8 *pu1_pred_ptr = pu1_pred;
315 WORD16 *pi2_rsd_ptr = pi2_rsd;
316 UWORD8 *pu1_out_ptr = pu1_out;
317 WORD16 x0, x1, x2, x3, i;
318 WORD32 q0, q1, q2, q3;
319 WORD16 i_macro;
320 WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
321
322 /* inverse quant */
323 /*horizontal inverse transform */
324 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
325 {
326 if(i == 0)
327 {
328 q0 = pi2_dc_src[0];
329 }
330 else
331 {
332 q0 = pi2_src_ptr[0];
333 INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
334 }
335
336 q2 = pi2_src_ptr[2];
337 INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
338
339 x0 = q0 + q2;
340 x1 = q0 - q2;
341
342 q1 = pi2_src_ptr[1];
343 INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
344
345 q3 = pi2_src_ptr[3];
346 INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
347
348 x2 = (q1 >> 1) - q3;
349 x3 = q1 + (q3 >> 1);
350
351 pi2_tmp_ptr[0] = x0 + x3;
352 pi2_tmp_ptr[1] = x1 + x2;
353 pi2_tmp_ptr[2] = x1 - x2;
354 pi2_tmp_ptr[3] = x0 - x3;
355
356 pi2_src_ptr += SUB_BLK_WIDTH_4x4;
357 pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
358 pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
359 pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
360 }
361
362 /* vertical inverse transform */
363 pi2_tmp_ptr = pi2_tmp;
364 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
365 {
366 pu1_pred_ptr = pu1_pred;
367 pi2_rsd_ptr = pi2_rsd;
368 pu1_out = pu1_out_ptr;
369
370 x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
371 x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
372 x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
373 x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
374
375 /* inverse prediction */
376 i_macro = x0 + x3;
377 i_macro = ((i_macro + 32) >> 6);
378 i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
379 i_macro += *pu1_pred_ptr;
380 *pu1_out = CLIP_U8(i_macro);
381 pu1_pred_ptr += pred_strd;
382 pi2_rsd_ptr += rsd_strd;
383 pu1_out += out_strd;
384
385 i_macro = x1 + x2;
386 i_macro = ((i_macro + 32) >> 6);
387 i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
388 i_macro += *pu1_pred_ptr;
389 *pu1_out = CLIP_U8(i_macro);
390 pu1_pred_ptr += pred_strd;
391 pi2_rsd_ptr += rsd_strd;
392 pu1_out += out_strd;
393
394 i_macro = x1 - x2;
395 i_macro = ((i_macro + 32) >> 6);
396 i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
397 i_macro += *pu1_pred_ptr;
398 *pu1_out = CLIP_U8(i_macro);
399 pu1_pred_ptr += pred_strd;
400 pi2_rsd_ptr += rsd_strd;
401 pu1_out += out_strd;
402
403 i_macro = x0 - x3;
404 i_macro = ((i_macro + 32) >> 6);
405 i_macro = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
406 i_macro += *pu1_pred_ptr;
407 *pu1_out = CLIP_U8(i_macro);
408
409 pi2_tmp_ptr++;
410 pu1_out_ptr += 2; // Interleaved store for output
411 pu1_pred += 2; // Interleaved load for pred buffer
412 pi2_rsd += 2;
413 }
414 }
415
416 /*****************************************************************************/
417 /* */
418 /* Function Name : isvcd_iquant_itrans_residual_recon_chroma_4x4_dc */
419 /* */
420 /* Description : this function computes the recon output from the */
421 /* IQ+IT+RESD */
422 /* */
423 /* Inputs : */
424 /* Globals : none */
425 /* Processing : */
426 /* */
427 /* Outputs : i4_nnz */
428 /* Returns : none */
429 /* */
430 /* Issues : none */
431 /* */
432 /* Revision History: */
433 /* */
434 /* DD MM YYYY Author(s) Changes (Describe the changes made) */
435 /* 25 11 2021 Kishore creation */
436 /* */
437 /*****************************************************************************/
438
isvcd_iquant_itrans_residual_recon_chroma_4x4_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_rsd,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 rsd_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)439 void isvcd_iquant_itrans_residual_recon_chroma_4x4_dc(
440 WORD16 *pi2_src, UWORD8 *pu1_pred, WORD16 *pi2_rsd, UWORD8 *pu1_out, WORD32 pred_strd,
441 WORD32 rsd_strd, WORD32 out_strd, const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat,
442 UWORD32 u4_qp_div_6, WORD16 *pi2_tmp, WORD16 *pi2_dc_src)
443 {
444 UWORD8 *pu1_pred_ptr = pu1_pred;
445 WORD16 *pi2_rsd_ptr = pi2_rsd;
446 UWORD8 *pu1_out_ptr = pu1_out;
447 WORD32 q0;
448 WORD16 x, i_macro, i;
449 UNUSED(pi2_src);
450 UNUSED(pu2_iscal_mat);
451 UNUSED(pu2_weigh_mat);
452 UNUSED(u4_qp_div_6);
453 UNUSED(pi2_tmp);
454
455 q0 = pi2_dc_src[0]; // Restoring dc value for intra case3
456 i_macro = ((q0 + 32) >> 6);
457
458 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
459 {
460 pu1_pred_ptr = pu1_pred;
461 pi2_rsd_ptr = pi2_rsd;
462 pu1_out = pu1_out_ptr;
463
464 /* inverse prediction */
465 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
466 x += *pu1_pred_ptr;
467 *pu1_out = CLIP_U8(x);
468 pu1_pred_ptr += pred_strd;
469 pi2_rsd_ptr += rsd_strd;
470 pu1_out += out_strd;
471
472 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
473 x += *pu1_pred_ptr;
474 *pu1_out = CLIP_U8(x);
475 pu1_pred_ptr += pred_strd;
476 pi2_rsd_ptr += rsd_strd;
477 pu1_out += out_strd;
478
479 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
480 x += *pu1_pred_ptr;
481 *pu1_out = CLIP_U8(x);
482 pu1_pred_ptr += pred_strd;
483 pi2_rsd_ptr += rsd_strd;
484 pu1_out += out_strd;
485
486 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
487 x += *pu1_pred_ptr;
488 *pu1_out = CLIP_U8(x);
489
490 pu1_out_ptr += 2;
491 pu1_pred += 2;
492 pi2_rsd += 2;
493 }
494 }
495
496 /*****************************************************************************/
497 /* */
498 /* Function Name : isvcd_iquant_itrans_residual_recon_8x8 */
499 /* */
500 /* Description : this function computes the recon output from the */
501 /* IQ+IT+RESD */
502 /* */
503 /* Inputs : */
504 /* Globals : none */
505 /* Processing : */
506 /* */
507 /* Outputs : i4_nnz */
508 /* Returns : none */
509 /* */
510 /* Issues : none */
511 /* */
512 /* Revision History: */
513 /* */
514 /* DD MM YYYY Author(s) Changes (Describe the changes made) */
515 /* 25 11 2021 Kishore creation */
516 /* */
517 /*****************************************************************************/
518
isvcd_iquant_itrans_residual_recon_8x8(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_rsd,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 rsd_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)519 WORD32 isvcd_iquant_itrans_residual_recon_8x8(WORD16 *pi2_src, UWORD8 *pu1_pred, WORD16 *pi2_rsd,
520 UWORD8 *pu1_out, WORD32 pred_strd, WORD32 rsd_strd,
521 WORD32 out_strd, const UWORD16 *pu2_iscale_mat,
522 const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
523 WORD16 *pi2_tmp, WORD32 iq_start_idx,
524 WORD16 *pi2_dc_ld_addr)
525 {
526 WORD32 i4_nnz = 0, i4_nnz_H = 0, i4_nnz_L = 0;
527 WORD32 i;
528 WORD16 *pi2_tmp_ptr = pi2_tmp;
529 UWORD8 *pu1_pred_ptr = pu1_pred;
530 WORD16 *pi2_rsd_ptr = pi2_rsd;
531 UWORD8 *pu1_out_ptr = pu1_out;
532 WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
533 WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
534 WORD16 i_macro;
535 WORD32 q;
536 WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
537 UNUSED(iq_start_idx);
538 UNUSED(pi2_dc_ld_addr);
539 /*************************************************************/
540 /* De quantization of coefficients. Will be replaced by SIMD */
541 /* operations on platform. Note : DC coeff is not scaled */
542 /*************************************************************/
543 for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
544 {
545 q = pi2_src[i];
546 INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
547 pi2_tmp_ptr[i] = q;
548 }
549 /* Perform Inverse transform */
550 /*--------------------------------------------------------------------*/
551 /* IDCT [ Horizontal transformation ] */
552 /*--------------------------------------------------------------------*/
553 for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
554 {
555 /*------------------------------------------------------------------*/
556 /* y0 = w0 + w4 */
557 /* y1 = -w3 + w5 - w7 - (w7 >> 1) */
558 /* y2 = w0 - w4 */
559 /* y3 = w1 + w7 - w3 - (w3 >> 1) */
560 /* y4 = (w2 >> 1) - w6 */
561 /* y5 = -w1 + w7 + w5 + (w5 >> 1) */
562 /* y6 = w2 + (w6 >> 1) */
563 /* y7 = w3 + w5 + w1 + (w1 >> 1) */
564 /*------------------------------------------------------------------*/
565 i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4]);
566
567 i_y1 =
568 ((WORD32) (-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7] - (pi2_tmp_ptr[7] >> 1));
569
570 i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4]);
571
572 i_y3 = ((WORD32) pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3] - (pi2_tmp_ptr[3] >> 1));
573
574 i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6]);
575
576 i_y5 =
577 ((WORD32) (-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5] + (pi2_tmp_ptr[5] >> 1));
578
579 i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
580
581 i_y7 = ((WORD32) pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1] + (pi2_tmp_ptr[1] >> 1));
582
583 /*------------------------------------------------------------------*/
584 /* z0 = y0 + y6 */
585 /* z1 = y1 + (y7 >> 2) */
586 /* z2 = y2 + y4 */
587 /* z3 = y3 + (y5 >> 2) */
588 /* z4 = y2 - y4 */
589 /* z5 = (y3 >> 2) - y5 */
590 /* z6 = y0 - y6 */
591 /* z7 = y7 - (y1 >> 2) */
592 /*------------------------------------------------------------------*/
593 i_z0 = i_y0 + i_y6;
594 i_z1 = i_y1 + (i_y7 >> 2);
595 i_z2 = i_y2 + i_y4;
596 i_z3 = i_y3 + (i_y5 >> 2);
597 i_z4 = i_y2 - i_y4;
598 i_z5 = (i_y3 >> 2) - i_y5;
599 i_z6 = i_y0 - i_y6;
600 i_z7 = i_y7 - (i_y1 >> 2);
601
602 /*------------------------------------------------------------------*/
603 /* x0 = z0 + z7 */
604 /* x1 = z2 + z5 */
605 /* x2 = z4 + z3 */
606 /* x3 = z6 + z1 */
607 /* x4 = z6 - z1 */
608 /* x5 = z4 - z3 */
609 /* x6 = z2 - z5 */
610 /* x7 = z0 - z7 */
611 /*------------------------------------------------------------------*/
612 pi2_tmp_ptr[0] = i_z0 + i_z7;
613 pi2_tmp_ptr[1] = i_z2 + i_z5;
614 pi2_tmp_ptr[2] = i_z4 + i_z3;
615 pi2_tmp_ptr[3] = i_z6 + i_z1;
616 pi2_tmp_ptr[4] = i_z6 - i_z1;
617 pi2_tmp_ptr[5] = i_z4 - i_z3;
618 pi2_tmp_ptr[6] = i_z2 - i_z5;
619 pi2_tmp_ptr[7] = i_z0 - i_z7;
620
621 /* move to the next row */
622 pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
623 }
624 /*--------------------------------------------------------------------*/
625 /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */
626 /* */
627 /* Add the prediction and store it back to reconstructed frame buffer */
628 /* [Prediction buffer itself in this case] */
629 /*--------------------------------------------------------------------*/
630
631 pi2_tmp_ptr = pi2_tmp;
632 for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
633 {
634 pu1_pred_ptr = pu1_pred;
635 pi2_rsd_ptr = pi2_rsd;
636 pu1_out = pu1_out_ptr;
637 /*------------------------------------------------------------------*/
638 /* y0j = w0j + w4j */
639 /* y1j = -w3j + w5j -w7j -(w7j >> 1) */
640 /* y2j = w0j -w4j */
641 /* y3j = w1j + w7j -w3j -(w3j >> 1) */
642 /* y4j = ( w2j >> 1 ) -w6j */
643 /* y5j = -w1j + w7j + w5j + (w5j >> 1) */
644 /* y6j = w2j + ( w6j >> 1 ) */
645 /* y7j = w3j + w5j + w1j + (w1j >> 1) */
646 /*------------------------------------------------------------------*/
647 i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
648
649 i_y1 = (WORD32) (-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56] -
650 (pi2_tmp_ptr[56] >> 1);
651
652 i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
653
654 i_y3 = (WORD32) pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24] - (pi2_tmp_ptr[24] >> 1);
655
656 i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
657
658 i_y5 =
659 (WORD32) (-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40] + (pi2_tmp_ptr[40] >> 1);
660
661 i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
662
663 i_y7 = (WORD32) pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8] + (pi2_tmp_ptr[8] >> 1);
664
665 /*------------------------------------------------------------------*/
666 /* z0j = y0j + y6j */
667 /* z1j = y1j + (y7j >> 2) */
668 /* z2j = y2j + y4j */
669 /* z3j = y3j + (y5j >> 2) */
670 /* z4j = y2j -y4j */
671 /* z5j = (y3j >> 2) -y5j */
672 /* z6j = y0j -y6j */
673 /* z7j = y7j -(y1j >> 2) */
674 /*------------------------------------------------------------------*/
675 i_z0 = i_y0 + i_y6;
676 i_z1 = i_y1 + (i_y7 >> 2);
677 i_z2 = i_y2 + i_y4;
678 i_z3 = i_y3 + (i_y5 >> 2);
679 i_z4 = i_y2 - i_y4;
680 i_z5 = (i_y3 >> 2) - i_y5;
681 i_z6 = i_y0 - i_y6;
682 i_z7 = i_y7 - (i_y1 >> 2);
683
684 /*------------------------------------------------------------------*/
685 /* x0j = z0j + z7j */
686 /* x1j = z2j + z5j */
687 /* x2j = z4j + z3j */
688 /* x3j = z6j + z1j */
689 /* x4j = z6j -z1j */
690 /* x5j = z4j -z3j */
691 /* x6j = z2j -z5j */
692 /* x7j = z0j -z7j */
693 /*------------------------------------------------------------------*/
694 i_macro = CLIP_RSD(((i_z0 + i_z7 + 32) >> 6) + (*pi2_rsd_ptr));
695 i4_nnz_H |= !!i_macro;
696 i_macro += *pu1_pred_ptr;
697 *pu1_out = CLIP_U8(i_macro);
698 /* Change uc_recBuffer to Point to next element in the same column*/
699 pu1_pred_ptr += pred_strd;
700 pi2_rsd_ptr += rsd_strd;
701 pu1_out += out_strd;
702
703 i_macro = CLIP_RSD(((i_z2 + i_z5 + 32) >> 6) + (*pi2_rsd_ptr));
704 i4_nnz_H |= !!i_macro;
705 i_macro += *pu1_pred_ptr;
706 *pu1_out = CLIP_U8(i_macro);
707 pu1_pred_ptr += pred_strd;
708 pi2_rsd_ptr += rsd_strd;
709 pu1_out += out_strd;
710
711 i_macro = CLIP_RSD(((i_z4 + i_z3 + 32) >> 6) + (*pi2_rsd_ptr));
712 i4_nnz_H |= !!i_macro;
713 i_macro += *pu1_pred_ptr;
714 *pu1_out = CLIP_U8(i_macro);
715 pu1_pred_ptr += pred_strd;
716 pi2_rsd_ptr += rsd_strd;
717 pu1_out += out_strd;
718
719 i_macro = CLIP_RSD(((i_z6 + i_z1 + 32) >> 6) + (*pi2_rsd_ptr));
720 i4_nnz_H |= !!i_macro;
721 i_macro += *pu1_pred_ptr;
722 *pu1_out = CLIP_U8(i_macro);
723 pu1_pred_ptr += pred_strd;
724 pi2_rsd_ptr += rsd_strd;
725 pu1_out += out_strd;
726
727 i_macro = CLIP_RSD(((i_z6 - i_z1 + 32) >> 6) + (*pi2_rsd_ptr));
728 i4_nnz_L |= !!i_macro;
729 i_macro += *pu1_pred_ptr;
730 *pu1_out = CLIP_U8(i_macro);
731 pu1_pred_ptr += pred_strd;
732 pi2_rsd_ptr += rsd_strd;
733 pu1_out += out_strd;
734
735 i_macro = CLIP_RSD(((i_z4 - i_z3 + 32) >> 6) + (*pi2_rsd_ptr));
736 i4_nnz_L |= !!i_macro;
737 i_macro += *pu1_pred_ptr;
738 *pu1_out = CLIP_U8(i_macro);
739 pu1_pred_ptr += pred_strd;
740 pi2_rsd_ptr += rsd_strd;
741 pu1_out += out_strd;
742
743 i_macro = CLIP_RSD(((i_z2 - i_z5 + 32) >> 6) + (*pi2_rsd_ptr));
744 i4_nnz_L |= !!i_macro;
745 i_macro += *pu1_pred_ptr;
746 *pu1_out = CLIP_U8(i_macro);
747 pu1_pred_ptr += pred_strd;
748 pi2_rsd_ptr += rsd_strd;
749 pu1_out += out_strd;
750
751 i_macro = CLIP_RSD(((i_z0 - i_z7 + 32) >> 6) + (*pi2_rsd_ptr));
752 i4_nnz_L |= !!i_macro;
753 i_macro += *pu1_pred_ptr;
754 *pu1_out = CLIP_U8(i_macro);
755
756 pi2_tmp_ptr++;
757 pu1_out_ptr++;
758 pi2_rsd++;
759 pu1_pred++;
760 if(i == 3)
761 {
762 i4_nnz = i4_nnz_H | (i4_nnz_L << 4);
763 i4_nnz_L = 0;
764 i4_nnz_H = 0;
765 }
766 }
767 i4_nnz |= (i4_nnz_H << 1) | (i4_nnz_L << 5);
768 return i4_nnz;
769 }
770
771 /*****************************************************************************/
772 /* */
773 /* Function Name : isvcd_iquant_itrans_residual_recon_8x8_dc */
774 /* */
775 /* Description : this function computes the recon output from the */
776 /* IQ+IT+RESD */
777 /* */
778 /* Inputs : */
779 /* Globals : none */
780 /* Processing : */
781 /* */
782 /* Outputs : i4_nnz */
783 /* Returns : none */
784 /* */
785 /* Issues : none */
786 /* */
787 /* Revision History: */
788 /* */
789 /* DD MM YYYY Author(s) Changes (Describe the changes made) */
790 /* 25 11 2021 Kishore creation */
791 /* */
792 /*****************************************************************************/
793
isvcd_iquant_itrans_residual_recon_8x8_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_rsd,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 rsd_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)794 WORD32 isvcd_iquant_itrans_residual_recon_8x8_dc(WORD16 *pi2_src, UWORD8 *pu1_pred, WORD16 *pi2_rsd,
795 UWORD8 *pu1_out, WORD32 pred_strd, WORD32 rsd_strd,
796 WORD32 out_strd, const UWORD16 *pu2_iscale_mat,
797 const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
798 WORD16 *pi2_tmp, WORD32 iq_start_idx,
799 WORD16 *pi2_dc_ld_addr)
800 {
801 WORD32 i4_nnz = 0, i4_nnz_H = 0, i4_nnz_L = 0;
802 UWORD8 *pu1_pred_ptr = pu1_pred;
803 WORD16 *pi2_rsd_ptr = pi2_rsd;
804 UWORD8 *pu1_out_ptr = pu1_out;
805 WORD16 x, i, i_macro;
806 WORD32 q;
807 WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
808 UNUSED(pi2_tmp);
809 UNUSED(iq_start_idx);
810 UNUSED(pi2_dc_ld_addr);
811 /*************************************************************/
812 /* Dequantization of coefficients. Will be replaced by SIMD */
813 /* operations on platform. Note : DC coeff is not scaled */
814 /*************************************************************/
815 q = pi2_src[0];
816 INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
817 i_macro = (q + 32) >> 6;
818 /* Perform Inverse transform */
819 /*--------------------------------------------------------------------*/
820 /* IDCT [ Horizontal transformation ] */
821 /*--------------------------------------------------------------------*/
822 /*--------------------------------------------------------------------*/
823 /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */
824 /* */
825 /* Add the prediction and store it back to reconstructed frame buffer */
826 /* [Prediction buffer itself in this case] */
827 /*--------------------------------------------------------------------*/
828 for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
829 {
830 pu1_pred_ptr = pu1_pred;
831 pi2_rsd_ptr = pi2_rsd;
832 pu1_out = pu1_out_ptr;
833
834 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
835 i4_nnz_H |= !!x;
836 x += *pu1_pred_ptr;
837 *pu1_out = CLIP_U8(x);
838 /* Change uc_recBuffer to Point to next element in the same column*/
839 pu1_pred_ptr += pred_strd;
840 pi2_rsd_ptr += rsd_strd;
841 pu1_out += out_strd;
842
843 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
844 i4_nnz_H |= !!x;
845 x += *pu1_pred_ptr;
846 *pu1_out = CLIP_U8(x);
847 pu1_pred_ptr += pred_strd;
848 pi2_rsd_ptr += rsd_strd;
849 pu1_out += out_strd;
850
851 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
852 i4_nnz_H |= !!x;
853 x += *pu1_pred_ptr;
854 *pu1_out = CLIP_U8(x);
855 pu1_pred_ptr += pred_strd;
856 pi2_rsd_ptr += rsd_strd;
857 pu1_out += out_strd;
858
859 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
860 i4_nnz_H |= !!x;
861 x += *pu1_pred_ptr;
862 *pu1_out = CLIP_U8(x);
863 pu1_pred_ptr += pred_strd;
864 pi2_rsd_ptr += rsd_strd;
865 pu1_out += out_strd;
866
867 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
868 i4_nnz_L |= !!x;
869 x += *pu1_pred_ptr;
870 *pu1_out = CLIP_U8(x);
871 pu1_pred_ptr += pred_strd;
872 pi2_rsd_ptr += rsd_strd;
873 pu1_out += out_strd;
874
875 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
876 i4_nnz_L |= !!x;
877 x += *pu1_pred_ptr;
878 *pu1_out = CLIP_U8(x);
879 pu1_pred_ptr += pred_strd;
880 pi2_rsd_ptr += rsd_strd;
881 pu1_out += out_strd;
882
883 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
884 i4_nnz_L |= !!x;
885 x += *pu1_pred_ptr;
886 *pu1_out = CLIP_U8(x);
887 pu1_pred_ptr += pred_strd;
888 pi2_rsd_ptr += rsd_strd;
889 pu1_out += out_strd;
890
891 x = CLIP_RSD(i_macro + (*pi2_rsd_ptr));
892 i4_nnz_L |= !!x;
893 x += *pu1_pred_ptr;
894 *pu1_out = CLIP_U8(x);
895
896 pu1_out_ptr++;
897 pu1_pred++;
898 pi2_rsd++;
899 if(i == 3)
900 {
901 i4_nnz = i4_nnz_H | (i4_nnz_L << 4);
902 i4_nnz_L = 0;
903 i4_nnz_H = 0;
904 }
905 }
906 i4_nnz |= (i4_nnz_H << 1) | (i4_nnz_L << 5);
907 return i4_nnz;
908 }
909