1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264_iquant_itrans_recon.c
25 *
26 * @brief
27 * Contains definition of functions for h264 inverse quantization,
28 * inverse transformation and recon
29 *
30 * @author
31 * ittiam
32 *
33 * @par List of Functions:
34 * - ih264_iquant_itrans_recon_4x4
35 * - ih264_iquant_itrans_recon_8x8
36 * - ih264_iquant_itrans_recon_4x4_dc
37 * - ih264_iquant_itrans_recon_8x8_dc
38 * - ih264_iquant_itrans_recon_chroma_4x4
39 * - ih264_iquant_itrans_recon_chroma_4x4_dc
40 *
41 * @remarks
42 *
43 *******************************************************************************
44 */
45
46 /*****************************************************************************/
47 /* File Includes */
48 /*****************************************************************************/
49
50 /* User Include Files */
51 #include "ih264_typedefs.h"
52 #include "ih264_defs.h"
53 #include "ih264_macros.h"
54 #include "ih264_size_defs.h"
55 #include "ih264_trans_macros.h"
56 #include "ih264_trans_data.h"
57 #include "ih264_structs.h"
58 #include "ih264_trans_quant_itrans_iquant.h"
59 #include "ih264_platform_macros.h"
60
61 /*****************************************************************************/
62 /* Function definitions */
63 /*****************************************************************************/
64
65 /**
66 ********************************************************************************
67 *
68 * @brief This function reconstructs a 4x4 sub block from quantized residue and
69 * prediction buffer
70 *
71 * @par Description:
72 * The quantized residue is first inverse quantized, then inverse transformed.
73 * This inverse transformed content is added to the prediction buffer to recon-
74 * struct the end output
75 *
76 * @param[in] pi2_src
77 * quantized 4x4 block
78 *
79 * @param[in] pu1_pred
80 * prediction 4x4 block
81 *
82 * @param[out] pu1_out
83 * reconstructed 4x4 block
84 *
85 * @param[in] pred_strd
86 * Prediction buffer stride
87 *
88 * @param[in] out_strd
89 * recon buffer Stride
90 *
91 * @param[in] pu2_iscal_mat
92 * pointer to inverse scaling matrix
93 *
94 * @param[in] pu2_weigh_mat
95 * pointer to weight matrix
96 *
97 * @param[in] u4_qp_div_6
98 * Floor (qp/6)
99 *
100 * @param[in] pi2_tmp
101 * temporary buffer of size 1*16
102 *
103 * @param[in] iq_start_idx
104 * Differentiates b/w intra or inter
105 *
106 * @param[in] pi2_dc_ld_addr
107 * Address to load DC value of the 4x4 blk
108 *
109 * @returns none
110 *
111 * @remarks none
112 *
113 *******************************************************************************
114 */
ih264_iquant_itrans_recon_4x4(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)115 void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src,
116 UWORD8 *pu1_pred,
117 UWORD8 *pu1_out,
118 WORD32 pred_strd,
119 WORD32 out_strd,
120 const UWORD16 *pu2_iscal_mat,
121 const UWORD16 *pu2_weigh_mat,
122 UWORD32 u4_qp_div_6,
123 WORD16 *pi2_tmp,
124 WORD32 iq_start_idx,
125 WORD16 *pi2_dc_ld_addr)
126 {
127 WORD16 *pi2_src_ptr = pi2_src;
128 WORD16 *pi2_tmp_ptr = pi2_tmp;
129 UWORD8 *pu1_pred_ptr = pu1_pred;
130 UWORD8 *pu1_out_ptr = pu1_out;
131 WORD16 x0, x1, x2, x3, i;
132 WORD32 q0, q1, q2, q3;
133 WORD16 i_macro;
134 WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
135
136 /* inverse quant */
137 /* horizontal inverse transform */
138 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
139 {
140 q0 = pi2_src_ptr[0];
141 INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
142 /* Restoring dc value for intra case */
143 if (i==0 && iq_start_idx == 1)
144 {
145 q0 = pi2_dc_ld_addr[0];
146 }
147
148 q2 = pi2_src_ptr[2];
149 INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
150
151 x0 = q0 + q2;
152 x1 = q0 - q2;
153
154 q1 = pi2_src_ptr[1];
155 INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
156
157 q3 = pi2_src_ptr[3];
158 INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
159
160 x2 = (q1 >> 1) - q3;
161 x3 = q1 + (q3 >> 1);
162
163 pi2_tmp_ptr[0] = x0 + x3;
164 pi2_tmp_ptr[1] = x1 + x2;
165 pi2_tmp_ptr[2] = x1 - x2;
166 pi2_tmp_ptr[3] = x0 - x3;
167
168 pi2_src_ptr += SUB_BLK_WIDTH_4x4;
169 pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
170 pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
171 pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
172 }
173
174 /* vertical inverse transform */
175 pi2_tmp_ptr = pi2_tmp;
176 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
177 {
178 pu1_pred_ptr = pu1_pred;
179 pu1_out = pu1_out_ptr;
180
181 x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
182 x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
183 x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
184 x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
185
186 /* inverse prediction */
187 i_macro = x0 + x3;
188 i_macro = ((i_macro + 32) >> 6);
189 i_macro += *pu1_pred_ptr;
190 *pu1_out = CLIP_U8(i_macro);
191 pu1_pred_ptr += pred_strd;
192 pu1_out += out_strd;
193
194 i_macro = x1 + x2;
195 i_macro = ((i_macro + 32) >> 6);
196 i_macro += *pu1_pred_ptr;
197 *pu1_out = CLIP_U8(i_macro);
198 pu1_pred_ptr += pred_strd;
199 pu1_out += out_strd;
200
201 i_macro = x1 - x2;
202 i_macro = ((i_macro + 32) >> 6);
203 i_macro += *pu1_pred_ptr;
204 *pu1_out = CLIP_U8(i_macro);
205 pu1_pred_ptr += pred_strd;
206 pu1_out += out_strd;
207
208 i_macro = x0 - x3;
209 i_macro = ((i_macro + 32) >> 6);
210 i_macro += *pu1_pred_ptr;
211 *pu1_out = CLIP_U8(i_macro);
212
213 pi2_tmp_ptr++;
214 pu1_out_ptr++;
215 pu1_pred++;
216 }
217 }
218
219 /**
220 ********************************************************************************
221 *
222 * @brief This function reconstructs a 4x4 sub block from quantized residue and
223 * prediction buffer, if only dc value is present for residue
224 *
225 * @par Description:
226 * The quantized residue is first inverse quantized, then inverse transformed.
227 * This inverse transformed content is added to the prediction buffer to recon-
228 * struct the end output
229 *
230 * @param[in] pi2_src
231 * quantized 4x4 block
232 *
233 * @param[in] pu1_pred
234 * prediction 4x4 block
235 *
236 * @param[out] pu1_out
237 * reconstructed 4x4 block
238 *
239 * @param[in] pred_strd
240 * Prediction buffer stride
241 *
242 * @param[in] out_strd
243 * recon buffer Stride
244 *
245 * @param[in] pu2_iscal_mat
246 * pointer to inverse scaling matrix
247 *
248 * @param[in] pu2_weigh_mat
249 * pointer to weight matrix
250 *
251 * @param[in] u4_qp_div_6
252 * Floor (qp/6)
253 *
254 * @param[in] pi2_tmp
255 * temporary buffer of size 1*16
256 *
257 * @param[in] iq_start_idx
258 * Differentiates b/w intra or inter
259 *
260 * @param[in] pi2_dc_ld_addr
261 * Address to load DC value of the 4x4 blk
262 *
263 * @returns none
264 *
265 * @remarks none
266 *
267 *******************************************************************************
268 */
ih264_iquant_itrans_recon_4x4_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)269 void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src,
270 UWORD8 *pu1_pred,
271 UWORD8 *pu1_out,
272 WORD32 pred_strd,
273 WORD32 out_strd,
274 const UWORD16 *pu2_iscal_mat,
275 const UWORD16 *pu2_weigh_mat,
276 UWORD32 u4_qp_div_6,
277 WORD16 *pi2_tmp,
278 WORD32 iq_start_idx,
279 WORD16 *pi2_dc_ld_addr)
280 {
281 UWORD8 *pu1_pred_ptr = pu1_pred;
282 UWORD8 *pu1_out_ptr = pu1_out;
283 WORD32 q0;
284 WORD16 x, i_macro, i;
285 WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
286
287 UNUSED(pi2_tmp);
288 if(iq_start_idx == 0)
289 {
290 q0 = pi2_src[0];
291 INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
292 }
293 else
294 {
295 q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case3
296 }
297 i_macro = ((q0 + 32) >> 6);
298 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
299 {
300 pu1_pred_ptr = pu1_pred;
301 pu1_out = pu1_out_ptr;
302
303 /* inverse prediction */
304 x = i_macro + *pu1_pred_ptr;
305 *pu1_out = CLIP_U8(x);
306 pu1_pred_ptr += pred_strd;
307 pu1_out += out_strd;
308
309 x = i_macro + *pu1_pred_ptr;
310 *pu1_out = CLIP_U8(x);
311 pu1_pred_ptr += pred_strd;
312 pu1_out += out_strd;
313
314 x = i_macro + *pu1_pred_ptr;
315 *pu1_out = CLIP_U8(x);
316 pu1_pred_ptr += pred_strd;
317 pu1_out += out_strd;
318
319 x = i_macro + *pu1_pred_ptr;
320 *pu1_out = CLIP_U8(x);
321
322 pu1_out_ptr++;
323 pu1_pred++;
324 }
325 }
326
327 /**
328 ********************************************************************************
329 *
330 * @brief This function reconstructs a 8x8 sub block from quantized residue and
331 * prediction buffer
332 *
333 * @par Description:
334 * The quantized residue is first inverse quantized, then inverse transformed.
335 * This inverse transformed content is added to the prediction buffer to recon-
336 * struct the end output
337 *
338 * @param[in] pi2_src
339 * quantized 4x4 block
340 *
341 * @param[in] pu1_pred
342 * prediction 4x4 block
343 *
344 * @param[out] pu1_out
345 * reconstructed 4x4 block
346 *
347 * @param[in] pred_strd
348 * Prediction buffer stride
349 *
350 * @param[in] out_strd
351 * recon buffer Stride
352 *
353 * @param[in] pu2_iscal_mat
354 * pointer to inverse scaling matrix
355 *
356 * @param[in] pu2_weigh_mat
357 * pointer to weight matrix
358 *
359 * @param[in] u4_qp_div_6
360 * Floor (qp/6)
361 *
362 * @param[in] pi2_tmp
363 * temporary buffer of size 1*16. we dont need a bigger block since we reuse
364 * the tmp for each block
365 *
366 * @param[in] iq_start_idx
367 * UNUSED
368 *
369 * @param[in] pi2_dc_ld_addr
370 * UNUSED
371 *
372 * @returns none
373 *
374 * @remarks none
375 *
376 *******************************************************************************
377 */
ih264_iquant_itrans_recon_8x8(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)378 void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src,
379 UWORD8 *pu1_pred,
380 UWORD8 *pu1_out,
381 WORD32 pred_strd,
382 WORD32 out_strd,
383 const UWORD16 *pu2_iscale_mat,
384 const UWORD16 *pu2_weigh_mat,
385 UWORD32 qp_div,
386 WORD16 *pi2_tmp,
387 WORD32 iq_start_idx,
388 WORD16 *pi2_dc_ld_addr)
389 {
390 WORD32 i;
391 WORD16 *pi2_tmp_ptr = pi2_tmp;
392 UWORD8 *pu1_pred_ptr = pu1_pred;
393 UWORD8 *pu1_out_ptr = pu1_out;
394 WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
395 WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
396 WORD16 i_macro;
397 WORD32 q;
398 WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
399
400 UNUSED(iq_start_idx);
401 UNUSED(pi2_dc_ld_addr);
402 /*************************************************************/
403 /* De quantization of coefficients. Will be replaced by SIMD */
404 /* operations on platform. Note : DC coeff is not scaled */
405 /*************************************************************/
406 for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
407 {
408 q = pi2_src[i];
409 INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
410 pi2_tmp_ptr[i] = q;
411 }
412 /* Perform Inverse transform */
413 /*--------------------------------------------------------------------*/
414 /* IDCT [ Horizontal transformation ] */
415 /*--------------------------------------------------------------------*/
416 for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
417 {
418 /*------------------------------------------------------------------*/
419 /* y0 = w0 + w4 */
420 /* y1 = -w3 + w5 - w7 - (w7 >> 1) */
421 /* y2 = w0 - w4 */
422 /* y3 = w1 + w7 - w3 - (w3 >> 1) */
423 /* y4 = (w2 >> 1) - w6 */
424 /* y5 = -w1 + w7 + w5 + (w5 >> 1) */
425 /* y6 = w2 + (w6 >> 1) */
426 /* y7 = w3 + w5 + w1 + (w1 >> 1) */
427 /*------------------------------------------------------------------*/
428 i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4] );
429
430 i_y1 = ((WORD32)(-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7]
431 - (pi2_tmp_ptr[7] >> 1));
432
433 i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4] );
434
435 i_y3 = ((WORD32)pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3]
436 - (pi2_tmp_ptr[3] >> 1));
437
438 i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6] );
439
440 i_y5 = ((WORD32)(-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5]
441 + (pi2_tmp_ptr[5] >> 1));
442
443 i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
444
445 i_y7 = ((WORD32)pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1]
446 + (pi2_tmp_ptr[1] >> 1));
447
448 /*------------------------------------------------------------------*/
449 /* z0 = y0 + y6 */
450 /* z1 = y1 + (y7 >> 2) */
451 /* z2 = y2 + y4 */
452 /* z3 = y3 + (y5 >> 2) */
453 /* z4 = y2 - y4 */
454 /* z5 = (y3 >> 2) - y5 */
455 /* z6 = y0 - y6 */
456 /* z7 = y7 - (y1 >> 2) */
457 /*------------------------------------------------------------------*/
458 i_z0 = i_y0 + i_y6;
459 i_z1 = i_y1 + (i_y7 >> 2);
460 i_z2 = i_y2 + i_y4;
461 i_z3 = i_y3 + (i_y5 >> 2);
462 i_z4 = i_y2 - i_y4;
463 i_z5 = (i_y3 >> 2) - i_y5;
464 i_z6 = i_y0 - i_y6;
465 i_z7 = i_y7 - (i_y1 >> 2);
466
467 /*------------------------------------------------------------------*/
468 /* x0 = z0 + z7 */
469 /* x1 = z2 + z5 */
470 /* x2 = z4 + z3 */
471 /* x3 = z6 + z1 */
472 /* x4 = z6 - z1 */
473 /* x5 = z4 - z3 */
474 /* x6 = z2 - z5 */
475 /* x7 = z0 - z7 */
476 /*------------------------------------------------------------------*/
477 pi2_tmp_ptr[0] = i_z0 + i_z7;
478 pi2_tmp_ptr[1] = i_z2 + i_z5;
479 pi2_tmp_ptr[2] = i_z4 + i_z3;
480 pi2_tmp_ptr[3] = i_z6 + i_z1;
481 pi2_tmp_ptr[4] = i_z6 - i_z1;
482 pi2_tmp_ptr[5] = i_z4 - i_z3;
483 pi2_tmp_ptr[6] = i_z2 - i_z5;
484 pi2_tmp_ptr[7] = i_z0 - i_z7;
485
486 /* move to the next row */
487 //pi2_src_ptr += SUB_BLK_WIDTH_8x8;
488 pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
489 }
490
491 /*--------------------------------------------------------------------*/
492 /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */
493 /* */
494 /* Add the prediction and store it back to reconstructed frame buffer */
495 /* [Prediction buffer itself in this case] */
496 /*--------------------------------------------------------------------*/
497 pi2_tmp_ptr = pi2_tmp;
498 for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
499 {
500 pu1_pred_ptr = pu1_pred;
501 pu1_out = pu1_out_ptr;
502 /*------------------------------------------------------------------*/
503 /* y0j = w0j + w4j */
504 /* y1j = -w3j + w5j -w7j -(w7j >> 1) */
505 /* y2j = w0j -w4j */
506 /* y3j = w1j + w7j -w3j -(w3j >> 1) */
507 /* y4j = ( w2j >> 1 ) -w6j */
508 /* y5j = -w1j + w7j + w5j + (w5j >> 1) */
509 /* y6j = w2j + ( w6j >> 1 ) */
510 /* y7j = w3j + w5j + w1j + (w1j >> 1) */
511 /*------------------------------------------------------------------*/
512 i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
513
514 i_y1 = (WORD32)(-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56]
515 - (pi2_tmp_ptr[56] >> 1);
516
517 i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
518
519 i_y3 = (WORD32)pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24]
520 - (pi2_tmp_ptr[24] >> 1);
521
522 i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
523
524 i_y5 = (WORD32)(-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40]
525 + (pi2_tmp_ptr[40] >> 1);
526
527 i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
528
529 i_y7 = (WORD32)pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8]
530 + (pi2_tmp_ptr[8] >> 1);
531
532 /*------------------------------------------------------------------*/
533 /* z0j = y0j + y6j */
534 /* z1j = y1j + (y7j >> 2) */
535 /* z2j = y2j + y4j */
536 /* z3j = y3j + (y5j >> 2) */
537 /* z4j = y2j -y4j */
538 /* z5j = (y3j >> 2) -y5j */
539 /* z6j = y0j -y6j */
540 /* z7j = y7j -(y1j >> 2) */
541 /*------------------------------------------------------------------*/
542 i_z0 = i_y0 + i_y6;
543 i_z1 = i_y1 + (i_y7 >> 2);
544 i_z2 = i_y2 + i_y4;
545 i_z3 = i_y3 + (i_y5 >> 2);
546 i_z4 = i_y2 - i_y4;
547 i_z5 = (i_y3 >> 2) - i_y5;
548 i_z6 = i_y0 - i_y6;
549 i_z7 = i_y7 - (i_y1 >> 2);
550
551 /*------------------------------------------------------------------*/
552 /* x0j = z0j + z7j */
553 /* x1j = z2j + z5j */
554 /* x2j = z4j + z3j */
555 /* x3j = z6j + z1j */
556 /* x4j = z6j -z1j */
557 /* x5j = z4j -z3j */
558 /* x6j = z2j -z5j */
559 /* x7j = z0j -z7j */
560 /*------------------------------------------------------------------*/
561 i_macro = ((i_z0 + i_z7 + 32) >> 6) + *pu1_pred_ptr;
562 *pu1_out = CLIP_U8(i_macro);
563 /* Change uc_recBuffer to Point to next element in the same column*/
564 pu1_pred_ptr += pred_strd;
565 pu1_out += out_strd;
566
567 i_macro = ((i_z2 + i_z5 + 32) >> 6) + *pu1_pred_ptr;
568 *pu1_out = CLIP_U8(i_macro);
569 pu1_pred_ptr += pred_strd;
570 pu1_out += out_strd;
571
572 i_macro = ((i_z4 + i_z3 + 32) >> 6) + *pu1_pred_ptr;
573 *pu1_out = CLIP_U8(i_macro);
574 pu1_pred_ptr += pred_strd;
575 pu1_out += out_strd;
576
577 i_macro = ((i_z6 + i_z1 + 32) >> 6) + *pu1_pred_ptr;
578 *pu1_out = CLIP_U8(i_macro);
579 pu1_pred_ptr += pred_strd;
580 pu1_out += out_strd;
581
582 i_macro = ((i_z6 - i_z1 + 32) >> 6) + *pu1_pred_ptr;
583 *pu1_out = CLIP_U8(i_macro);
584 pu1_pred_ptr += pred_strd;
585 pu1_out += out_strd;
586
587 i_macro = ((i_z4 - i_z3 + 32) >> 6) + *pu1_pred_ptr;
588 *pu1_out = CLIP_U8(i_macro);
589 pu1_pred_ptr += pred_strd;
590 pu1_out += out_strd;
591
592 i_macro = ((i_z2 - i_z5 + 32) >> 6) + *pu1_pred_ptr;
593 *pu1_out = CLIP_U8(i_macro);
594 pu1_pred_ptr += pred_strd;
595 pu1_out += out_strd;
596
597 i_macro = ((i_z0 - i_z7 + 32) >> 6) + *pu1_pred_ptr;
598 *pu1_out = CLIP_U8(i_macro);
599
600 pi2_tmp_ptr++;
601 pu1_out_ptr++;
602 pu1_pred++;
603 }
604 }
605
606 /**
607 ********************************************************************************
608 *
609 * @brief This function reconstructs a 8x8 sub block from quantized residue and
610 * prediction buffer, if only dc value is present
611 *
612 * @par Description:
613 * The quantized residue is first inverse quantized, then inverse transformed.
614 * This inverse transformed content is added to the prediction buffer to recon-
615 * struct the end output
616 *
617 * @param[in] pi2_src
618 * quantized 4x4 block
619 *
620 * @param[in] pu1_pred
621 * prediction 4x4 block
622 *
623 * @param[out] pu1_out
624 * reconstructed 4x4 block
625 *
626 * @param[in] pred_strd
627 * Prediction buffer stride
628 *
629 * @param[in] out_strd
630 * recon buffer Stride
631 *
632 * @param[in] pu2_iscal_mat
633 * pointer to inverse scaling matrix
634 *
635 * @param[in] pu2_weigh_mat
636 * pointer to weight matrix
637 *
638 * @param[in] u4_qp_div_6
639 * Floor (qp/6)
640 *
641 * @param[in] pi2_tmp
642 * temporary buffer of size 1*16. we dont need a bigger block since we reuse
643 * the tmp for each block
644 *
645 * @param[in] iq_start_idx
646 * UNUSED
647 *
648 * @param[in] pi2_dc_ld_addr
649 * UNUSED
650 *
651 * @returns none
652 *
653 * @remarks none
654 *
655 *******************************************************************************
656 */
ih264_iquant_itrans_recon_8x8_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)657 void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src,
658 UWORD8 *pu1_pred,
659 UWORD8 *pu1_out,
660 WORD32 pred_strd,
661 WORD32 out_strd,
662 const UWORD16 *pu2_iscale_mat,
663 const UWORD16 *pu2_weigh_mat,
664 UWORD32 qp_div,
665 WORD16 *pi2_tmp,
666 WORD32 iq_start_idx,
667 WORD16 *pi2_dc_ld_addr)
668 {
669 UWORD8 *pu1_pred_ptr = pu1_pred;
670 UWORD8 *pu1_out_ptr = pu1_out;
671 WORD16 x, i, i_macro;
672 WORD32 q;
673 WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
674
675 UNUSED(pi2_tmp);
676 UNUSED(iq_start_idx);
677 UNUSED(pi2_dc_ld_addr);
678 /*************************************************************/
679 /* Dequantization of coefficients. Will be replaced by SIMD */
680 /* operations on platform. Note : DC coeff is not scaled */
681 /*************************************************************/
682 q = pi2_src[0];
683 INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
684 i_macro = (q + 32) >> 6;
685 /* Perform Inverse transform */
686 /*--------------------------------------------------------------------*/
687 /* IDCT [ Horizontal transformation ] */
688 /*--------------------------------------------------------------------*/
689 /*--------------------------------------------------------------------*/
690 /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */
691 /* */
692 /* Add the prediction and store it back to reconstructed frame buffer */
693 /* [Prediction buffer itself in this case] */
694 /*--------------------------------------------------------------------*/
695 for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
696 {
697 pu1_pred_ptr = pu1_pred;
698 pu1_out = pu1_out_ptr;
699
700 x = i_macro + *pu1_pred_ptr;
701 *pu1_out = CLIP_U8(x);
702 /* Change uc_recBuffer to Point to next element in the same column*/
703 pu1_pred_ptr += pred_strd;
704 pu1_out += out_strd;
705
706 x = i_macro + *pu1_pred_ptr;
707 *pu1_out = CLIP_U8(x);
708 pu1_pred_ptr += pred_strd;
709 pu1_out += out_strd;
710
711 x = i_macro + *pu1_pred_ptr;
712 *pu1_out = CLIP_U8(x);
713 pu1_pred_ptr += pred_strd;
714 pu1_out += out_strd;
715
716 x = i_macro + *pu1_pred_ptr;
717 *pu1_out = CLIP_U8(x);
718 pu1_pred_ptr += pred_strd;
719 pu1_out += out_strd;
720
721 x = i_macro + *pu1_pred_ptr;
722 *pu1_out = CLIP_U8(x);
723 pu1_pred_ptr += pred_strd;
724 pu1_out += out_strd;
725
726 x = i_macro + *pu1_pred_ptr;
727 *pu1_out = CLIP_U8(x);
728 pu1_pred_ptr += pred_strd;
729 pu1_out += out_strd;
730
731 x = i_macro + *pu1_pred_ptr;
732 *pu1_out = CLIP_U8(x);
733 pu1_pred_ptr += pred_strd;
734 pu1_out += out_strd;
735
736 x = i_macro + *pu1_pred_ptr;
737 *pu1_out = CLIP_U8(x);
738
739 pu1_out_ptr++;
740 pu1_pred++;
741 }
742 }
743
744 /**
745 ********************************************************************************
746 *
747 * @brief This function reconstructs a 4x4 sub block from quantized residue and
748 * prediction buffer
749 *
750 * @par Description:
751 * The quantized residue is first inverse quantized, then inverse transformed.
752 * This inverse transformed content is added to the prediction buffer to recon-
753 * struct the end output
754 *
755 * @param[in] pi2_src
756 * quantized 4x4 block
757 *
758 * @param[in] pu1_pred
759 * prediction 4x4 block
760 *
761 * @param[out] pu1_out
762 * reconstructed 4x4 block
763 *
764 * @param[in] pred_strd
765 * Prediction buffer stride
766 *
767 * @param[in] out_strd
768 * recon buffer Stride
769 *
770 * @param[in] pu2_iscal_mat
771 * pointer to inverse scaling matrix
772 *
773 * @param[in] pu2_weigh_mat
774 * pointer to weight matrix
775 *
776 * @param[in] u4_qp_div_6
777 * Floor (qp/6)
778 *
779 * @param[in] pi2_tmp
780 * temporary buffer of size 1*16
781 *
782 * @param[in] pi2_dc_src
783 * Address to load DC value of the 4x4 blk
784 *
785 * @returns none
786 *
787 * @remarks none
788 *
789 *******************************************************************************
790 */
ih264_iquant_itrans_recon_chroma_4x4(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)791 void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src,
792 UWORD8 *pu1_pred,
793 UWORD8 *pu1_out,
794 WORD32 pred_strd,
795 WORD32 out_strd,
796 const UWORD16 *pu2_iscal_mat,
797 const UWORD16 *pu2_weigh_mat,
798 UWORD32 u4_qp_div_6,
799 WORD16 *pi2_tmp,
800 WORD16 *pi2_dc_src)
801 {
802 WORD16 *pi2_src_ptr = pi2_src;
803 WORD16 *pi2_tmp_ptr = pi2_tmp;
804 UWORD8 *pu1_pred_ptr = pu1_pred;
805 UWORD8 *pu1_out_ptr = pu1_out;
806 WORD16 x0, x1, x2, x3, i;
807 WORD32 q0, q1, q2, q3;
808 WORD16 i_macro;
809 WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
810
811 /* inverse quant */
812 /* horizontal inverse transform */
813 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
814 {
815 if(i == 0)
816 {
817 q0 = pi2_dc_src[0];
818 }
819 else
820 {
821 q0 = pi2_src_ptr[0];
822 INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
823 }
824
825 q2 = pi2_src_ptr[2];
826 INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
827
828 x0 = q0 + q2;
829 x1 = q0 - q2;
830
831 q1 = pi2_src_ptr[1];
832 INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
833
834 q3 = pi2_src_ptr[3];
835 INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
836
837 x2 = (q1 >> 1) - q3;
838 x3 = q1 + (q3 >> 1);
839
840 pi2_tmp_ptr[0] = x0 + x3;
841 pi2_tmp_ptr[1] = x1 + x2;
842 pi2_tmp_ptr[2] = x1 - x2;
843 pi2_tmp_ptr[3] = x0 - x3;
844
845 pi2_src_ptr += SUB_BLK_WIDTH_4x4;
846 pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
847 pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
848 pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
849 }
850
851 /* vertical inverse transform */
852 pi2_tmp_ptr = pi2_tmp;
853 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
854 {
855 pu1_pred_ptr = pu1_pred;
856 pu1_out = pu1_out_ptr;
857
858 x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
859 x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
860 x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
861 x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
862
863 /* inverse prediction */
864 i_macro = x0 + x3;
865 i_macro = ((i_macro + 32) >> 6);
866 i_macro += *pu1_pred_ptr;
867 *pu1_out = CLIP_U8(i_macro);
868 pu1_pred_ptr += pred_strd;
869 pu1_out += out_strd;
870
871 i_macro = x1 + x2;
872 i_macro = ((i_macro + 32) >> 6);
873 i_macro += *pu1_pred_ptr;
874 *pu1_out = CLIP_U8(i_macro);
875 pu1_pred_ptr += pred_strd;
876 pu1_out += out_strd;
877
878 i_macro = x1 - x2;
879 i_macro = ((i_macro + 32) >> 6);
880 i_macro += *pu1_pred_ptr;
881 *pu1_out = CLIP_U8(i_macro);
882 pu1_pred_ptr += pred_strd;
883 pu1_out += out_strd;
884
885 i_macro = x0 - x3;
886 i_macro = ((i_macro + 32) >> 6);
887 i_macro += *pu1_pred_ptr;
888 *pu1_out = CLIP_U8(i_macro);
889
890 pi2_tmp_ptr++;
891 pu1_out_ptr += 2; // Interleaved store for output
892 pu1_pred += 2; // Interleaved load for pred buffer
893 }
894 }
895
896 /**
897 ********************************************************************************
898 *
899 * @brief This function reconstructs a 4x4 sub block from quantized residue and
900 * prediction buffer if only dc value is present for residue
901 *
902 * @par Description:
903 * The quantized residue is first inverse quantized,
904 * This inverse quantized content is added to the prediction buffer to recon-
905 * struct the end output
906 *
907 * @param[in] pi2_src
908 * quantized dc coefficient
909 *
910 * @param[in] pu1_pred
911 * prediction 4x4 block in interleaved format
912 *
913 * @param[in] pred_strd,
914 * Prediction buffer stride in interleaved format
915 *
916 * @param[in] out_strd
917 * recon buffer Stride
918 *
919 * @param[in] pu2_iscal_mat
920 * pointer to inverse scaling matrix
921 *
922 * @param[in] pu2_weigh_mat
923 * pointer to weight matrix
924 *
925 * @param[in] u4_qp_div_6
926 * Floor (qp/6)
927 *
928 * @param[in] pi2_tmp
929 * temporary buffer of size 1*16
930 *
931 * @param[in] pi2_dc_src
932 * Address to load DC value of the 4x4 blk
933 *
934 * @returns none
935 *
936 * @remarks none
937 *
938 *******************************************************************************
939 */
ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)940 void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src,
941 UWORD8 *pu1_pred,
942 UWORD8 *pu1_out,
943 WORD32 pred_strd,
944 WORD32 out_strd,
945 const UWORD16 *pu2_iscal_mat,
946 const UWORD16 *pu2_weigh_mat,
947 UWORD32 u4_qp_div_6,
948 WORD16 *pi2_tmp,
949 WORD16 *pi2_dc_src)
950 {
951 UWORD8 *pu1_pred_ptr = pu1_pred;
952 UWORD8 *pu1_out_ptr = pu1_out;
953 WORD32 q0;
954 WORD16 x, i_macro, i;
955
956 UNUSED(pi2_src);
957 UNUSED(pu2_iscal_mat);
958 UNUSED(pu2_weigh_mat);
959 UNUSED(u4_qp_div_6);
960 UNUSED(pi2_tmp);
961
962 q0 = pi2_dc_src[0]; // Restoring dc value for intra case3
963 i_macro = ((q0 + 32) >> 6);
964
965 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
966 {
967 pu1_pred_ptr = pu1_pred;
968 pu1_out = pu1_out_ptr;
969
970 /* inverse prediction */
971 x = i_macro + *pu1_pred_ptr;
972 *pu1_out = CLIP_U8(x);
973 pu1_pred_ptr += pred_strd;
974 pu1_out += out_strd;
975
976 x = i_macro + *pu1_pred_ptr;
977 *pu1_out = CLIP_U8(x);
978 pu1_pred_ptr += pred_strd;
979 pu1_out += out_strd;
980
981 x = i_macro + *pu1_pred_ptr;
982 *pu1_out = CLIP_U8(x);
983 pu1_pred_ptr += pred_strd;
984 pu1_out += out_strd;
985
986 x = i_macro + *pu1_pred_ptr;
987 *pu1_out = CLIP_U8(x);
988
989 pu1_out_ptr+=2;
990 pu1_pred+=2;
991 }
992 }
993