1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevcd_iquant_itrans_recon_ctb.c
22 *
23 * @brief
24 * Contains functions for inverse quantization, inverse transform and recon
25 *
26 * @author
27 * Ittiam
28 *
29 * @par List of Functions:
30 * - ihevcd_iquant_itrans_recon_ctb()
31 *
32 * @remarks
33 * None
34 *
35 *******************************************************************************
36 */
37 /*****************************************************************************/
38 /* File Includes */
39 /*****************************************************************************/
40 #include <stdio.h>
41 #include <stddef.h>
42 #include <stdlib.h>
43 #include <string.h>
44
45 #include "ihevc_typedefs.h"
46 #include "iv.h"
47 #include "ivd.h"
48 #include "ihevcd_cxa.h"
49
50 #include "ihevc_defs.h"
51 #include "ihevc_debug.h"
52 #include "ihevc_structs.h"
53 #include "ihevc_cabac_tables.h"
54 #include "ihevc_macros.h"
55 #include "ihevc_platform_macros.h"
56
57 #include "ihevcd_defs.h"
58 #include "ihevcd_function_selector.h"
59 #include "ihevcd_structs.h"
60 #include "ihevcd_error.h"
61 #include "ihevcd_bitstream.h"
62 #include "ihevc_common_tables.h"
63
64 /* Intra pred includes */
65 #include "ihevc_intra_pred.h"
66
67 /* Inverse transform common module includes */
68 #include "ihevc_trans_tables.h"
69 #include "ihevc_trans_macros.h"
70 #include "ihevc_itrans_recon.h"
71 #include "ihevc_recon.h"
72 #include "ihevc_chroma_itrans_recon.h"
73 #include "ihevc_chroma_recon.h"
74
75 /* Decoder includes */
76 #include "ihevcd_common_tables.h"
77 #include "ihevcd_iquant_itrans_recon_ctb.h"
78 #include "ihevcd_debug.h"
79 #include "ihevcd_profile.h"
80 #include "ihevcd_statistics.h"
81 #include "ihevcd_itrans_recon_dc.h"
82
83 static const UWORD32 gau4_ihevcd_4_bit_reverse[] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
84
85
86 /* Globals */
87 static const WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES] =
88 { IP_FUNC_MODE_0, /* Mode 0 */
89 IP_FUNC_MODE_1, /* Mode 1 */
90 IP_FUNC_MODE_2, /* Mode 2 */
91 IP_FUNC_MODE_3TO9, /* Mode 3 */
92 IP_FUNC_MODE_3TO9, /* Mode 4 */
93 IP_FUNC_MODE_3TO9, /* Mode 5 */
94 IP_FUNC_MODE_3TO9, /* Mode 6 */
95 IP_FUNC_MODE_3TO9, /* Mode 7 */
96 IP_FUNC_MODE_3TO9, /* Mode 8 */
97 IP_FUNC_MODE_3TO9, /* Mode 9 */
98 IP_FUNC_MODE_10, /* Mode 10 */
99 IP_FUNC_MODE_11TO17, /* Mode 11 */
100 IP_FUNC_MODE_11TO17, /* Mode 12 */
101 IP_FUNC_MODE_11TO17, /* Mode 13 */
102 IP_FUNC_MODE_11TO17, /* Mode 14 */
103 IP_FUNC_MODE_11TO17, /* Mode 15 */
104 IP_FUNC_MODE_11TO17, /* Mode 16 */
105 IP_FUNC_MODE_11TO17, /* Mode 17 */
106 IP_FUNC_MODE_18_34, /* Mode 18 */
107 IP_FUNC_MODE_19TO25, /* Mode 19 */
108 IP_FUNC_MODE_19TO25, /* Mode 20 */
109 IP_FUNC_MODE_19TO25, /* Mode 21 */
110 IP_FUNC_MODE_19TO25, /* Mode 22 */
111 IP_FUNC_MODE_19TO25, /* Mode 23 */
112 IP_FUNC_MODE_19TO25, /* Mode 24 */
113 IP_FUNC_MODE_19TO25, /* Mode 25 */
114 IP_FUNC_MODE_26, /* Mode 26 */
115 IP_FUNC_MODE_27TO33, /* Mode 27 */
116 IP_FUNC_MODE_27TO33, /* Mode 26 */
117 IP_FUNC_MODE_27TO33, /* Mode 29 */
118 IP_FUNC_MODE_27TO33, /* Mode 30 */
119 IP_FUNC_MODE_27TO33, /* Mode 31 */
120 IP_FUNC_MODE_27TO33, /* Mode 32 */
121 IP_FUNC_MODE_27TO33, /* Mode 33 */
122 IP_FUNC_MODE_18_34, /* Mode 34 */
123 };
124
125
126 const WORD16 *g_ai2_ihevc_trans_tables[] =
127 { &g_ai2_ihevc_trans_dst_4[0][0],
128 &g_ai2_ihevc_trans_4[0][0],
129 &g_ai2_ihevc_trans_8[0][0],
130 &g_ai2_ihevc_trans_16[0][0],
131 &g_ai2_ihevc_trans_32[0][0]
132 };
133
134
135 /*****************************************************************************/
136 /* Function Prototypes */
137 /*****************************************************************************/
138 /* Returns number of ai2_level read from ps_sblk_coeff */
ihevcd_unpack_coeffs(WORD16 * pi2_tu_coeff,WORD32 log2_trans_size,UWORD8 * pu1_tu_coeff_data,WORD16 * pi2_dequant_matrix,WORD32 qp_rem,WORD32 qp_div,TRANSFORM_TYPE e_trans_type,WORD32 trans_quant_bypass,UWORD32 * pu4_zero_cols,UWORD32 * pu4_zero_rows,UWORD32 * pu4_coeff_type,WORD16 * pi2_coeff_value)139 UWORD8* ihevcd_unpack_coeffs(WORD16 *pi2_tu_coeff,
140 WORD32 log2_trans_size,
141 UWORD8 *pu1_tu_coeff_data,
142 WORD16 *pi2_dequant_matrix,
143 WORD32 qp_rem,
144 WORD32 qp_div,
145 TRANSFORM_TYPE e_trans_type,
146 WORD32 trans_quant_bypass,
147 UWORD32 *pu4_zero_cols,
148 UWORD32 *pu4_zero_rows,
149 UWORD32 *pu4_coeff_type,
150 WORD16 *pi2_coeff_value)
151 {
152 /* Generating coeffs from coeff-map */
153 WORD32 i;
154 WORD16 *pi2_sblk_ptr;
155 WORD32 subblk_pos_x, subblk_pos_y;
156 WORD32 sblk_scan_idx, coeff_raster_idx;
157 WORD32 sblk_non_zero_coeff_idx;
158 tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
159 UWORD8 u1_num_coded_sblks, u1_scan_type;
160 UWORD8 *pu1_new_tu_coeff_data;
161 WORD32 trans_size;
162 WORD32 xs, ys;
163 WORD32 trans_skip;
164 WORD16 iquant_out;
165 WORD32 shift_iq;
166 {
167 WORD32 bit_depth;
168
169 bit_depth = 8 + 0;
170 shift_iq = bit_depth + log2_trans_size - 5;
171 }
172 trans_size = (1 << log2_trans_size);
173
174 /* First byte points to number of coded blocks */
175 u1_num_coded_sblks = *pu1_tu_coeff_data++;
176
177 /* Next byte points to scan type */
178 u1_scan_type = *pu1_tu_coeff_data++;
179 /* 0th bit has trans_skip */
180 trans_skip = u1_scan_type & 1;
181 u1_scan_type >>= 1;
182
183 pi2_sblk_ptr = pi2_tu_coeff;
184
185 /* Initially all columns are assumed to be zero */
186 *pu4_zero_cols = 0xFFFFFFFF;
187 /* Initially all rows are assumed to be zero */
188 *pu4_zero_rows = 0xFFFFFFFF;
189
190 ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)(pu1_tu_coeff_data);
191
192 if(trans_skip)
193 memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
194
195 STATS_INIT_SBLK_AND_COEFF_POS();
196
197 /* DC only case */
198 if((e_trans_type != DST_4x4) && (1 == u1_num_coded_sblks)
199 && (0 == ps_tu_sblk_coeff_data->u2_subblk_pos)
200 && (1 == ps_tu_sblk_coeff_data->u2_sig_coeff_map))
201 {
202 *pu4_coeff_type = 1;
203
204 if(!trans_quant_bypass)
205 {
206 if(4 == trans_size)
207 {
208 IQUANT_4x4(iquant_out,
209 ps_tu_sblk_coeff_data->ai2_level[0],
210 pi2_dequant_matrix[0]
211 * g_ihevc_iquant_scales[qp_rem],
212 shift_iq, qp_div);
213 }
214 else
215 {
216 IQUANT(iquant_out, ps_tu_sblk_coeff_data->ai2_level[0],
217 pi2_dequant_matrix[0] * g_ihevc_iquant_scales[qp_rem],
218 shift_iq, qp_div);
219 }
220 if(trans_skip)
221 iquant_out = (iquant_out + 16) >> 5;
222 }
223 else
224 {
225 /* setting the column to zero */
226 for(i = 0; i < trans_size; i++)
227 *(pi2_tu_coeff + i * trans_size) = 0;
228
229 iquant_out = ps_tu_sblk_coeff_data->ai2_level[0];
230 }
231 *pi2_coeff_value = iquant_out;
232 *pi2_tu_coeff = iquant_out;
233 *pu4_zero_cols &= ~0x1;
234 *pu4_zero_rows &= ~0x1;
235 ps_tu_sblk_coeff_data =
236 (void *)&ps_tu_sblk_coeff_data->ai2_level[1];
237
238 STATS_UPDATE_COEFF_COUNT();
239 STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), 0, 0);
240 STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
241 return ((UWORD8 *)ps_tu_sblk_coeff_data);
242 }
243 else
244 {
245 *pu4_coeff_type = 0;
246 /* In case of trans skip, memset has already happened */
247 if(!trans_skip)
248 memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
249 }
250
251 for(i = 0; i < u1_num_coded_sblks; i++)
252 {
253 UWORD32 u4_sig_coeff_map;
254 subblk_pos_x = ps_tu_sblk_coeff_data->u2_subblk_pos & 0x00FF;
255 subblk_pos_y = (ps_tu_sblk_coeff_data->u2_subblk_pos & 0xFF00) >> 8;
256
257 STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), subblk_pos_x, subblk_pos_y);
258
259 subblk_pos_x = subblk_pos_x * MIN_TU_SIZE;
260 subblk_pos_y = subblk_pos_y * MIN_TU_SIZE;
261
262 pi2_sblk_ptr = pi2_tu_coeff + subblk_pos_y * trans_size
263 + subblk_pos_x;
264
265 //*pu4_zero_cols &= ~(0xF << subblk_pos_x);
266
267 sblk_non_zero_coeff_idx = 0;
268 u4_sig_coeff_map = ps_tu_sblk_coeff_data->u2_sig_coeff_map;
269 //for(sblk_scan_idx = (31 - CLZ(u4_sig_coeff_map)); sblk_scan_idx >= 0; sblk_scan_idx--)
270 sblk_scan_idx = 31;
271 do
272 {
273 WORD32 clz = CLZ(u4_sig_coeff_map);
274
275 sblk_scan_idx -= clz;
276 /* when clz is 31, u4_sig_coeff_map << (clz+1) might result in unknown behaviour in some cases */
277 /* Hence either use SHL which takes care of handling these issues based on platform or shift in two stages */
278 u4_sig_coeff_map = u4_sig_coeff_map << clz;
279 /* Copying coeffs and storing in reverse order */
280 {
281 STATS_UPDATE_COEFF_COUNT();
282 coeff_raster_idx =
283 gau1_ihevc_invscan4x4[u1_scan_type][sblk_scan_idx];
284
285 xs = coeff_raster_idx & 0x3;
286 ys = coeff_raster_idx >> 2;
287
288 if(!trans_quant_bypass)
289 {
290 if(4 == trans_size)
291 {
292 IQUANT_4x4(iquant_out,
293 ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
294 pi2_dequant_matrix[(subblk_pos_x + xs)
295 + (subblk_pos_y + ys)
296 * trans_size]
297 * g_ihevc_iquant_scales[qp_rem],
298 shift_iq, qp_div);
299 sblk_non_zero_coeff_idx++;
300 }
301 else
302 {
303 IQUANT(iquant_out,
304 ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
305 pi2_dequant_matrix[(subblk_pos_x + xs)
306 + (subblk_pos_y + ys)
307 * trans_size]
308 * g_ihevc_iquant_scales[qp_rem],
309 shift_iq, qp_div);
310 sblk_non_zero_coeff_idx++;
311 }
312
313 if(trans_skip)
314 iquant_out = (iquant_out + 16) >> 5;
315 }
316 else
317 {
318 iquant_out = ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx++];
319 }
320 *pu4_zero_cols &= ~(0x1 << (subblk_pos_x + xs));
321 *pu4_zero_rows &= ~(0x1 << (subblk_pos_y + ys));
322 *(pi2_sblk_ptr + xs + ys * trans_size) = iquant_out;
323 }
324 sblk_scan_idx--;
325 u4_sig_coeff_map <<= 1;
326
327 }while(u4_sig_coeff_map);
328 /* Updating the sblk pointer */
329 ps_tu_sblk_coeff_data =
330 (void *)&ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx];
331 }
332
333 STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
334
335 pu1_new_tu_coeff_data = (UWORD8 *)ps_tu_sblk_coeff_data;
336
337 return pu1_new_tu_coeff_data;
338 }
339
ihevcd_get_intra_nbr_flag(process_ctxt_t * ps_proc,tu_t * ps_tu,UWORD32 * pu4_intra_nbr_avail,WORD16 i2_pic_width_in_luma_samples,UWORD8 i1_constrained_intra_pred_flag,WORD32 trans_size,WORD32 ctb_size)340 WORD32 ihevcd_get_intra_nbr_flag(process_ctxt_t *ps_proc,
341 tu_t *ps_tu,
342 UWORD32 *pu4_intra_nbr_avail,
343 WORD16 i2_pic_width_in_luma_samples,
344 UWORD8 i1_constrained_intra_pred_flag,
345 WORD32 trans_size,
346 WORD32 ctb_size)
347 {
348 sps_t *ps_sps;
349 UWORD8 u1_bot_lt_avail, u1_left_avail, u1_top_avail, u1_top_rt_avail,
350 u1_top_lt_avail;
351 WORD32 x_cur, y_cur, x_nbr, y_nbr;
352 UWORD8 *pu1_nbr_intra_flag;
353 UWORD8 *pu1_pic_intra_flag;
354 UWORD8 top_right, top, top_left, left, bot_left;
355 WORD32 intra_pos;
356 WORD32 num_8_blks, num_8_blks_in_bits;
357 WORD32 numbytes_row = (i2_pic_width_in_luma_samples + 63) / 64;
358 WORD32 cur_x, cur_y;
359 WORD32 i;
360 WORD32 nbr_flags;
361
362 ps_sps = ps_proc->ps_sps;
363 cur_x = ps_tu->b4_pos_x;
364 cur_y = ps_tu->b4_pos_y;
365
366 u1_bot_lt_avail = (pu4_intra_nbr_avail[1 + cur_y + trans_size / MIN_TU_SIZE]
367 >> (31 - (1 + cur_x - 1))) & 1;
368 u1_left_avail = (pu4_intra_nbr_avail[1 + cur_y] >> (31 - (1 + cur_x - 1)))
369 & 1;
370 u1_top_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] >> (31 - (1 + cur_x)))
371 & 1;
372 u1_top_rt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
373 >> (31 - (1 + cur_x + trans_size / MIN_TU_SIZE))) & 1;
374 u1_top_lt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
375 >> (31 - (1 + cur_x - 1))) & 1;
376
377 x_cur = ps_proc->i4_ctb_x * ctb_size + cur_x * MIN_TU_SIZE;
378 y_cur = ps_proc->i4_ctb_y * ctb_size + cur_y * MIN_TU_SIZE;
379
380 pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
381
382 /* WORD32 nbr_flags as below MSB --> LSB */
383 /* Top-Left | Top-Right | Top | Left | Bottom-Left
384 * 1 4 4 4 4
385 */
386 bot_left = 0;
387 left = 0;
388 top_right = 0;
389 top = 0;
390 top_left = 0;
391
392 num_8_blks = trans_size > 4 ? trans_size / 8 : 1;
393 num_8_blks_in_bits = ((1 << num_8_blks) - 1);
394
395 if(i1_constrained_intra_pred_flag)
396 {
397 /* TODO: constrained intra pred not tested */
398 if(u1_bot_lt_avail)
399 {
400 x_nbr = x_cur - 1;
401 y_nbr = y_cur + trans_size;
402
403 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
404 + x_nbr / 64;
405 intra_pos = ((x_nbr / 8) % 8);
406 for(i = 0; i < num_8_blks; i++)
407 {
408 bot_left |= ((*(pu1_nbr_intra_flag + i * numbytes_row)
409 >> intra_pos) & 1) << i;
410 }
411 bot_left &= num_8_blks_in_bits;
412 }
413 if(u1_left_avail)
414 {
415 x_nbr = x_cur - 1;
416 y_nbr = y_cur;
417
418 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
419 + x_nbr / 64;
420 intra_pos = ((x_nbr / 8) % 8);
421
422 for(i = 0; i < num_8_blks; i++)
423 {
424 left |= ((*(pu1_nbr_intra_flag + i * numbytes_row) >> intra_pos)
425 & 1) << i;
426 }
427 left &= num_8_blks_in_bits;
428 }
429 if(u1_top_avail)
430 {
431 x_nbr = x_cur;
432 y_nbr = y_cur - 1;
433
434 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
435 + x_nbr / 64;
436 intra_pos = ((x_nbr / 8) % 8);
437
438 top = (*pu1_nbr_intra_flag >> intra_pos);
439 top &= num_8_blks_in_bits;
440 /*
441 for(i=0;i<num_8_blks;i++)
442 {
443 top |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
444 }
445 */
446 }
447 if(u1_top_rt_avail)
448 {
449 x_nbr = x_cur + trans_size;
450 y_nbr = y_cur - 1;
451
452 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
453 + x_nbr / 64;
454 intra_pos = ((x_nbr / 8) % 8);
455
456 top_right = (*pu1_nbr_intra_flag >> intra_pos);
457 top_right &= num_8_blks_in_bits;
458 /*
459 for(i=0;i<num_8_blks;i++)
460 {
461 top_right |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
462 }
463 */
464 }
465 if(u1_top_lt_avail)
466 {
467 x_nbr = x_cur - 1;
468 y_nbr = y_cur - 1;
469
470 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
471 + x_nbr / 64;
472 intra_pos = ((x_nbr / 8) % 8);
473
474 top_left = (*pu1_nbr_intra_flag >> intra_pos) & 1;
475 }
476 }
477 else
478 {
479 if(u1_top_avail)
480 top = 0xF;
481 if(u1_top_rt_avail)
482 top_right = 0xF;
483 if(u1_bot_lt_avail)
484 bot_left = 0xF;
485 if(u1_left_avail)
486 left = 0xF;
487 if(u1_top_lt_avail)
488 top_left = 0x1;
489 }
490
491 /* Handling incomplete CTBs */
492 {
493 WORD32 pu_size_limit = MIN(trans_size, 8);
494 WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples
495 - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size)
496 - (ps_tu->b4_pos_x * MIN_TU_SIZE)
497 - (1 << (ps_tu->b3_size + 2));
498 /* ctb_size_top gives number of valid pixels remaining in the current row */
499 WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
500 WORD32 ctb_size_top_bits = (1 << (ctb_size_top / pu_size_limit)) - 1;
501
502 WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples
503 - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size)
504 - (ps_tu->b4_pos_y * MIN_TU_SIZE)
505 - (1 << (ps_tu->b3_size + 2));
506 /* ctb_size_bot gives number of valid pixels remaining in the current column */
507 WORD32 ctb_size_bot = MIN(ctb_size, rows_remaining);
508 WORD32 ctb_size_bot_bits = (1 << (ctb_size_bot / pu_size_limit)) - 1;
509
510 top_right &= ctb_size_top_bits;
511 bot_left &= ctb_size_bot_bits;
512 }
513
514 /* Top-Left | Top-Right | Top | Left | Bottom-Left
515 * 1 4 4 4 4
516 */
517
518 /*
519 nbr_flags = (top_left << 16) | (gau4_ihevcd_4_bit_reverse[top_right] << 12) | (gau4_ihevcd_4_bit_reverse[top] << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
520 | gau4_ihevcd_4_bit_reverse[bot_left];
521 */
522 nbr_flags = (top_left << 16) | (top_right << 12) | (top << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
523 | gau4_ihevcd_4_bit_reverse[bot_left];
524
525
526 return nbr_flags;
527
528 }
529
ihevcd_iquant_itrans_recon_ctb(process_ctxt_t * ps_proc)530 WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc)
531 {
532 WORD16 *pi2_scaling_mat;
533 UWORD8 *pu1_y_dst_ctb;
534 UWORD8 *pu1_uv_dst_ctb;
535 WORD32 ctb_size;
536 codec_t *ps_codec;
537 slice_header_t *ps_slice_hdr;
538 tu_t *ps_tu;
539 WORD16 *pi2_ctb_coeff;
540 WORD32 tu_cnt;
541 WORD16 *pi2_tu_coeff;
542 WORD16 *pi2_tmp;
543 WORD32 pic_strd;
544 WORD32 luma_nbr_flags;
545 WORD32 luma_nbr_flags_4x4[4] = { 0 };
546 WORD32 chroma_nbr_flags = 0;
547 UWORD8 u1_luma_pred_mode_first_tu = 0;
548 /* Pointers for generating 2d coeffs from coeff-map */
549 UWORD8 *pu1_tu_coeff_data;
550 /* nbr avail map for CTB */
551 /* 1st bit points to neighbor (left/top_left/bot_left) */
552 /* 1Tb starts at 2nd bit from msb of 2nd value in array, followed by number of min_tu's in that ctb */
553 UWORD32 au4_intra_nbr_avail[MAX_CTB_SIZE / MIN_TU_SIZE
554 + 2 /* Top nbr + bot nbr */]; UWORD32
555 top_avail_bits;
556 sps_t *ps_sps;
557 pps_t *ps_pps;
558 WORD32 intra_flag;
559 UWORD8 *pu1_pic_intra_flag;
560 /*************************************************************************/
561 /* Contanis scaling matrix offset in the following order in a 1D buffer */
562 /* Entries that are listed as UNUSED are invalid combinations where */
563 /* scaling matrix is not used. eg: 64x64 SKIP CU, 64x64 PCM CU */
564 /* Intra 4 x 4 Y, 4 x 4 U, 4 x 4 V */
565 /* Inter 4 x 4 Y, 4 x 4 U, 4 x 4 V */
566 /* Intra 8 x 8 Y, 8 x 8 U, 8 x 8 V */
567 /* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V */
568 /* Intra 16x16 Y, 16x16 U, 16x16 V */
569 /* Inter 16x16 Y, 16x16 U, 16x16 V */
570 /* Intra 32x32 Y, UNUSED, UNUSED */
571 /* Inter 32x32 Y, UNUSED, UNUSED */
572 /* UNUSED, UNUSED, UNUSED */
573 /* UNUSED, UNUSED, UNUSED */
574 /*************************************************************************/
575 static const WORD32 scaling_mat_offset[] =
576 { 0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992,
577 1248, 1504, 1760, 2016, 0, 0, 3040, 0, 0, 0, 0, 0, 0, 0, 0};
578
579 PROFILE_DISABLE_IQ_IT_RECON_INTRA_PRED();
580
581 ps_sps = ps_proc->ps_sps;
582 ps_pps = ps_proc->ps_pps;
583 ps_slice_hdr = ps_proc->ps_slice_hdr;
584 ps_codec = ps_proc->ps_codec;
585
586 pu1_y_dst_ctb = ps_proc->pu1_cur_ctb_luma;
587 pu1_uv_dst_ctb = ps_proc->pu1_cur_ctb_chroma;
588
589 pi2_ctb_coeff = ps_proc->pi2_invscan_out;
590
591 ctb_size = (1 << ps_sps->i1_log2_ctb_size);
592 pu1_tu_coeff_data = (UWORD8 *)ps_proc->pv_tu_coeff_data;
593
594 pic_strd = ps_codec->i4_strd;
595
596 pi2_tmp = ps_proc->pi2_itrans_intrmd_buf;
597
598 pi2_tu_coeff = pi2_ctb_coeff;
599
600 ps_tu = ps_proc->ps_tu;
601
602 if((1 == ps_sps->i1_scaling_list_enable_flag) && (1 == ps_pps->i1_pps_scaling_list_data_present_flag))
603 {
604 pi2_scaling_mat = ps_pps->pi2_scaling_mat;
605 }
606 else
607 {
608 pi2_scaling_mat = ps_sps->pi2_scaling_mat;
609 }
610
611 {
612 /* Updating the initial availability map */
613 WORD32 i;
614 UWORD8 u1_left_ctb_avail, u1_top_lt_ctb_avail, u1_top_rt_ctb_avail,
615 u1_top_ctb_avail;
616
617 u1_left_ctb_avail = ps_proc->u1_left_ctb_avail;
618 u1_top_lt_ctb_avail = ps_proc->u1_top_lt_ctb_avail;
619 u1_top_ctb_avail = ps_proc->u1_top_ctb_avail;
620 u1_top_rt_ctb_avail = ps_proc->u1_top_rt_ctb_avail;
621
622 /* Initializing the availability array */
623 memset(au4_intra_nbr_avail, 0,
624 (MAX_CTB_SIZE / MIN_TU_SIZE + 2) * sizeof(UWORD32));
625 /* Initializing the availability array with CTB level availability flags */
626 {
627 WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size);
628 WORD32 ctb_size_left = MIN(ctb_size, rows_remaining);
629 for(i = 0; i < ctb_size_left / MIN_TU_SIZE; i++)
630 {
631 au4_intra_nbr_avail[i + 1] = ((UWORD32)u1_left_ctb_avail << 31);
632 }
633 }
634 au4_intra_nbr_avail[0] |= (((UWORD32)u1_top_rt_ctb_avail << 31)
635 >> (1 + ctb_size / MIN_TU_SIZE)); /* 1+ctb_size/4 position bit pos from msb */
636
637 au4_intra_nbr_avail[0] |= ((UWORD32)u1_top_lt_ctb_avail << 31);
638
639 {
640 WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size);
641 WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
642 WORD32 shift = (31 - (ctb_size / MIN_TU_SIZE));
643
644 /* ctb_size_top gives number of valid pixels remaining in the current row */
645 /* Since we need pattern of 1's starting from the MSB, an additional shift */
646 /* is needed */
647 shift += ((ctb_size - ctb_size_top) / MIN_TU_SIZE);
648
649 top_avail_bits = ((1 << (ctb_size_top / MIN_TU_SIZE)) - 1)
650 << shift;
651 }
652 au4_intra_nbr_avail[0] |= (
653 (u1_top_ctb_avail == 1) ? top_avail_bits : 0x0);
654 /* Starting from msb 2nd bit to (1+ctb_size/4) bit, set 1 if top avail,or 0 */
655
656 }
657
658 /* Applying Inverse transform on all the TU's in CTB */
659 for(tu_cnt = 0; tu_cnt < ps_proc->i4_ctb_tu_cnt; tu_cnt++, ps_tu++)
660 {
661 WORD32 transform_skip_flag = 0;
662 WORD32 transform_skip_flag_v = 0;
663 WORD32 num_comp, c_idx, func_idx;
664 WORD32 src_strd, pred_strd, dst_strd;
665 WORD32 qp_div = 0, qp_rem = 0;
666 WORD32 qp_div_v = 0, qp_rem_v = 0;
667 UWORD32 zero_cols = 0, zero_cols_v = 0;
668 UWORD32 zero_rows = 0, zero_rows_v = 0;
669 UWORD32 coeff_type = 0, coeff_type_v = 0;
670 WORD16 i2_coeff_value, i2_coeff_value_v;
671 WORD32 trans_size = 0;
672 TRANSFORM_TYPE e_trans_type;
673 WORD32 log2_y_trans_size_minus_2, log2_uv_trans_size_minus_2;
674 WORD32 log2_trans_size;
675 WORD32 chroma_qp_idx;
676 WORD16 *pi2_src = NULL, *pi2_src_v = NULL;
677 UWORD8 *pu1_pred = NULL, *pu1_pred_v = NULL;
678 UWORD8 *pu1_dst = NULL, *pu1_dst_v = NULL;
679 WORD16 *pi2_dequant_matrix = NULL, *pi2_dequant_matrix_v = NULL;
680 WORD32 tu_x, tu_y;
681 WORD32 tu_y_offset, tu_uv_offset;
682 WORD8 i1_chroma_pic_qp_offset, i1_chroma_slice_qp_offset;
683 UWORD8 u1_cbf = 0, u1_cbf_v = 0, u1_luma_pred_mode, u1_chroma_pred_mode;
684 WORD32 offset;
685 WORD32 pcm_flag;
686 WORD32 chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU);
687 /* If 420SP_VU is chroma format, pred and dst pointer */
688 /* will be added +1 to point to U */
689 WORD32 chroma_yuv420sp_vu_u_offset = 1 * chroma_yuv420sp_vu;
690 /* If 420SP_VU is chroma format, pred and dst pointer */
691 /* will be added U offset of +1 and subtracted 2 */
692 /* to point to V */
693 WORD32 chroma_yuv420sp_vu_v_offset = -2 * chroma_yuv420sp_vu;
694
695 tu_x = ps_tu->b4_pos_x * 4; /* Converting minTU unit to pixel unit */
696 tu_y = ps_tu->b4_pos_y * 4; /* Converting minTU unit to pixel unit */
697 {
698 WORD32 tu_abs_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (tu_x);
699 WORD32 tu_abs_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (tu_y);
700
701 WORD32 numbytes_row = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
702
703 pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
704 pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
705 pu1_pic_intra_flag += (tu_abs_x >> 6);
706
707 intra_flag = *pu1_pic_intra_flag;
708 intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
709 }
710
711 u1_luma_pred_mode = ps_tu->b6_luma_intra_mode;
712 u1_chroma_pred_mode = ps_tu->b3_chroma_intra_mode_idx;
713
714 if(u1_chroma_pred_mode != 7)
715 num_comp = 2; /* Y and UV */
716 else
717 num_comp = 1; /* Y */
718
719
720 pcm_flag = 0;
721
722 if((intra_flag) && (u1_luma_pred_mode == INTRA_PRED_NONE))
723 {
724 UWORD8 *pu1_buf;
725 UWORD8 *pu1_y_dst = pu1_y_dst_ctb;
726 UWORD8 *pu1_uv_dst = pu1_uv_dst_ctb;
727 WORD32 i, j;
728 tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
729 WORD32 cb_size = 1 << (ps_tu->b3_size + 2);
730
731 /* trans_size is used to update availability after reconstruction */
732 trans_size = cb_size;
733
734 pcm_flag = 1;
735
736 tu_y_offset = tu_x + tu_y * pic_strd;
737 pu1_y_dst += tu_x + tu_y * pic_strd;
738 pu1_uv_dst += tu_x + (tu_y >> 1) * pic_strd;
739
740 /* First byte points to number of coded blocks */
741 pu1_tu_coeff_data++;
742
743 /* Next byte points to scan type */
744 pu1_tu_coeff_data++;
745
746 ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)pu1_tu_coeff_data;
747
748 pu1_buf = (UWORD8 *)&ps_tu_sblk_coeff_data->ai2_level[0];
749 {
750
751 for(i = 0; i < cb_size; i++)
752 {
753 //pu1_y_dst[i * pic_strd + j] = *pu1_buf++;
754 memcpy(&pu1_y_dst[i * pic_strd], pu1_buf, cb_size);
755 pu1_buf += cb_size;
756 }
757
758 pu1_uv_dst = pu1_uv_dst + chroma_yuv420sp_vu_u_offset;
759
760 /* U */
761 for(i = 0; i < cb_size / 2; i++)
762 {
763 for(j = 0; j < cb_size / 2; j++)
764 {
765 pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++;
766 }
767 }
768
769 pu1_uv_dst = pu1_uv_dst + 1 + chroma_yuv420sp_vu_v_offset;
770
771 /* V */
772 for(i = 0; i < cb_size / 2; i++)
773 {
774 for(j = 0; j < cb_size / 2; j++)
775 {
776 pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++;
777 }
778 }
779 }
780
781 pu1_tu_coeff_data = pu1_buf;
782
783 }
784
785
786
787
788
789 for(c_idx = 0; c_idx < num_comp; c_idx++)
790 {
791 if(0 == pcm_flag)
792 {
793 /* Initializing variables */
794 pred_strd = pic_strd;
795 dst_strd = pic_strd;
796
797 if(c_idx == 0) /* Y */
798 {
799 log2_y_trans_size_minus_2 = ps_tu->b3_size;
800 trans_size = 1 << (log2_y_trans_size_minus_2 + 2);
801 log2_trans_size = log2_y_trans_size_minus_2 + 2;
802
803 tu_y_offset = tu_x + tu_y * pic_strd;
804
805 pi2_src = pi2_tu_coeff;
806 pu1_pred = pu1_y_dst_ctb + tu_y_offset;
807 pu1_dst = pu1_y_dst_ctb + tu_y_offset;
808
809 /* Calculating scaling matrix offset */
810 offset = log2_y_trans_size_minus_2 * 6
811 + (!intra_flag) * 3 + c_idx;
812 pi2_dequant_matrix = pi2_scaling_mat
813 + scaling_mat_offset[offset];
814
815 src_strd = trans_size;
816
817 /* 4x4 transform Luma in INTRA mode is DST */
818 if(log2_y_trans_size_minus_2 == 0 && intra_flag)
819 {
820 func_idx = log2_y_trans_size_minus_2;
821 e_trans_type = DST_4x4;
822 }
823 else
824 {
825 func_idx = log2_y_trans_size_minus_2 + 1;
826 e_trans_type = (TRANSFORM_TYPE)(log2_y_trans_size_minus_2 + 1);
827 }
828
829 qp_div = ps_tu->b7_qp / 6;
830 qp_rem = ps_tu->b7_qp % 6;
831
832 u1_cbf = ps_tu->b1_y_cbf;
833
834 transform_skip_flag = pu1_tu_coeff_data[1] & 1;
835 /* Unpacking coeffs */
836 if(1 == u1_cbf)
837 {
838 pu1_tu_coeff_data = ihevcd_unpack_coeffs(
839 pi2_src, log2_y_trans_size_minus_2 + 2,
840 pu1_tu_coeff_data, pi2_dequant_matrix,
841 qp_rem, qp_div, e_trans_type,
842 ps_tu->b1_transquant_bypass, &zero_cols,
843 &zero_rows, &coeff_type,
844 &i2_coeff_value);
845 }
846 }
847 else /* UV interleaved */
848 {
849 /* Chroma :If Transform size is 4x4, keep 4x4 else do transform on (trans_size/2 x trans_size/2) */
850 if(ps_tu->b3_size == 0)
851 {
852 /* Chroma 4x4 is present with 4th luma 4x4 block. For this case chroma postion has to be (luma pos x- 4,luma pos y- 4) */
853 log2_uv_trans_size_minus_2 = ps_tu->b3_size;
854 tu_uv_offset = (tu_x - 4) + ((tu_y - 4) / 2) * pic_strd;
855 }
856 else
857 {
858 log2_uv_trans_size_minus_2 = ps_tu->b3_size - 1;
859 tu_uv_offset = tu_x + (tu_y >> 1) * pic_strd;
860 }
861 trans_size = 1 << (log2_uv_trans_size_minus_2 + 2);
862 log2_trans_size = log2_uv_trans_size_minus_2 + 2;
863
864 pi2_src = pi2_tu_coeff;
865 pi2_src_v = pi2_tu_coeff + trans_size * trans_size;
866 pu1_pred = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
867 pu1_pred_v = pu1_pred + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
868 pu1_dst = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
869 pu1_dst_v = pu1_dst + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
870
871 /*TODO: Add support for choosing different tables for U and V,
872 * change this to a single array to handle flat/default/custom, intra/inter, luma/chroma and various sizes
873 */
874 /* Calculating scaling matrix offset */
875 /* ((log2_uv_trans_size_minus_2 == 3) ? 1:3) condition check is not needed, since
876 * max uv trans size is 16x16
877 */
878 offset = log2_uv_trans_size_minus_2 * 6
879 + (!intra_flag) * 3 + c_idx;
880 pi2_dequant_matrix = pi2_scaling_mat
881 + scaling_mat_offset[offset];
882 pi2_dequant_matrix_v = pi2_scaling_mat
883 + scaling_mat_offset[offset + 1];
884
885 src_strd = trans_size;
886
887 func_idx = 1 + 4 + log2_uv_trans_size_minus_2; /* DST func + Y funcs + cur func index*/
888
889 /* Handle error cases where 64x64 TU is signalled which results in 32x32 chroma.
890 * By limiting func_idx to 7, max of 16x16 chroma is called */
891 func_idx = MIN(func_idx, 7);
892
893 e_trans_type = (TRANSFORM_TYPE)(log2_uv_trans_size_minus_2 + 1);
894 /* QP for U */
895 i1_chroma_pic_qp_offset = ps_pps->i1_pic_cb_qp_offset;
896 i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset;
897 u1_cbf = ps_tu->b1_cb_cbf;
898
899 chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset
900 + i1_chroma_slice_qp_offset;
901 chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
902 qp_div = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
903 qp_rem = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
904
905 /* QP for V */
906 i1_chroma_pic_qp_offset = ps_pps->i1_pic_cr_qp_offset;
907 i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cr_qp_offset;
908 u1_cbf_v = ps_tu->b1_cr_cbf;
909
910 chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset
911 + i1_chroma_slice_qp_offset;
912 chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
913 qp_div_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
914 qp_rem_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
915
916 /* Unpacking coeffs */
917 transform_skip_flag = pu1_tu_coeff_data[1] & 1;
918 if(1 == u1_cbf)
919 {
920 pu1_tu_coeff_data = ihevcd_unpack_coeffs(
921 pi2_src, log2_uv_trans_size_minus_2 + 2,
922 pu1_tu_coeff_data, pi2_dequant_matrix,
923 qp_rem, qp_div, e_trans_type,
924 ps_tu->b1_transquant_bypass, &zero_cols,
925 &zero_rows, &coeff_type,
926 &i2_coeff_value);
927 }
928
929 transform_skip_flag_v = pu1_tu_coeff_data[1] & 1;
930 if(1 == u1_cbf_v)
931 {
932 pu1_tu_coeff_data = ihevcd_unpack_coeffs(
933 pi2_src_v, log2_uv_trans_size_minus_2 + 2,
934 pu1_tu_coeff_data, pi2_dequant_matrix_v,
935 qp_rem_v, qp_div_v, e_trans_type,
936 ps_tu->b1_transquant_bypass, &zero_cols_v,
937 &zero_rows_v, &coeff_type_v, &i2_coeff_value_v);
938 }
939 }
940 /***************************************************************/
941 /****************** Intra Prediction **************************/
942 /***************************************************************/
943 if(intra_flag) /* Intra */
944 {
945 /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actaul size needed,
946 au1_ref_sub_out size is kept as multiple of 8,
947 so that SIMD functions can load 64 bits. Also some SIMD
948 modules read few bytes before the start of the array, so
949 allocate 16 extra bytes at the start */
950 UWORD8 au1_ref_sub_out[16 + (MAX_TU_SIZE * 2 * 2) + 8] = {0};
951 UWORD8 *pu1_ref_sub_out = &au1_ref_sub_out[16];
952 UWORD8 *pu1_top_left, *pu1_top, *pu1_left;
953 WORD32 luma_pred_func_idx, chroma_pred_func_idx;
954
955 /* Get the neighbour availability flags */
956 /* Done for only Y */
957 if(c_idx == 0)
958 {
959 /* Get neighbor availability for Y only */
960 luma_nbr_flags = ihevcd_get_intra_nbr_flag(ps_proc,
961 ps_tu,
962 au4_intra_nbr_avail,
963 ps_sps->i2_pic_width_in_luma_samples,
964 ps_pps->i1_constrained_intra_pred_flag,
965 trans_size,
966 ctb_size);
967
968 if(trans_size == 4)
969 luma_nbr_flags_4x4[(ps_tu->b4_pos_x % 2) + (ps_tu->b4_pos_y % 2) * 2] = luma_nbr_flags;
970
971 if((ps_tu->b4_pos_x % 2 == 0) && (ps_tu->b4_pos_y % 2 == 0))
972 {
973 chroma_nbr_flags = luma_nbr_flags;
974 }
975
976 /* Initializing nbr pointers */
977 pu1_top = pu1_pred - pic_strd;
978 pu1_left = pu1_pred - 1;
979 pu1_top_left = pu1_pred - pic_strd - 1;
980
981 /* call reference array substitution */
982 if(luma_nbr_flags == 0x1ffff)
983 ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr(
984 pu1_top_left,
985 pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, pu1_ref_sub_out, 1);
986 else
987 ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr(
988 pu1_top_left,
989 pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, pu1_ref_sub_out, 1);
990
991 /* call reference filtering */
992 ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr(
993 pu1_ref_sub_out, trans_size,
994 pu1_ref_sub_out,
995 u1_luma_pred_mode, ps_sps->i1_strong_intra_smoothing_enable_flag);
996
997 /* use the look up to get the function idx */
998 luma_pred_func_idx = g_i4_ip_funcs[u1_luma_pred_mode];
999
1000 /* call the intra prediction function */
1001 ps_codec->apf_intra_pred_luma[luma_pred_func_idx](pu1_ref_sub_out, 1, pu1_pred, pred_strd, trans_size, u1_luma_pred_mode);
1002 }
1003 else
1004 {
1005 /* In case of yuv420sp_vu, prediction happens as usual. */
1006 /* So point the pu1_pred pointer to original prediction pointer */
1007 UWORD8 *pu1_pred_orig = pu1_pred - chroma_yuv420sp_vu_u_offset;
1008
1009 /* Top-Left | Top-Right | Top | Left | Bottom-Left
1010 * 1 4 4 4 4
1011 *
1012 * Generating chroma_nbr_flags depending upon the transform size */
1013 if(ps_tu->b3_size == 0)
1014 {
1015 /* Take TL,T,L flags of First luma 4x4 block */
1016 chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0);
1017 /* Take TR flags of Second luma 4x4 block */
1018 chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000);
1019 /* Take BL flags of Third luma 4x4 block */
1020 chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F);
1021 }
1022
1023 /* Initializing nbr pointers */
1024 pu1_top = pu1_pred_orig - pic_strd;
1025 pu1_left = pu1_pred_orig - 2;
1026 pu1_top_left = pu1_pred_orig - pic_strd - 2;
1027
1028 /* Chroma pred mode derivation from luma pred mode */
1029 {
1030 tu_t *ps_tu_tmp = ps_tu;
1031 while(!ps_tu_tmp->b1_first_tu_in_cu)
1032 {
1033 ps_tu_tmp--;
1034 }
1035 u1_luma_pred_mode_first_tu = ps_tu_tmp->b6_luma_intra_mode;
1036 }
1037 if(4 == u1_chroma_pred_mode)
1038 u1_chroma_pred_mode = u1_luma_pred_mode_first_tu;
1039 else
1040 {
1041 u1_chroma_pred_mode = gau1_intra_pred_chroma_modes[u1_chroma_pred_mode];
1042
1043 if(u1_chroma_pred_mode ==
1044 u1_luma_pred_mode_first_tu)
1045 {
1046 u1_chroma_pred_mode = INTRA_ANGULAR(34);
1047 }
1048 }
1049
1050 /* call the chroma reference array substitution */
1051 ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr(
1052 pu1_top_left,
1053 pu1_top, pu1_left, pic_strd, trans_size, chroma_nbr_flags, pu1_ref_sub_out, 1);
1054
1055 /* use the look up to get the function idx */
1056 chroma_pred_func_idx =
1057 g_i4_ip_funcs[u1_chroma_pred_mode];
1058
1059 /* call the intra prediction function */
1060 ps_codec->apf_intra_pred_chroma[chroma_pred_func_idx](pu1_ref_sub_out, 1, pu1_pred_orig, pred_strd, trans_size, u1_chroma_pred_mode);
1061 }
1062 }
1063
1064 /* Updating number of transform types */
1065 STATS_UPDATE_ALL_TRANS(e_trans_type, c_idx);
1066
1067 /* IQ, IT and Recon for Y if c_idx == 0, and U if c_idx !=0 */
1068 if(1 == u1_cbf)
1069 {
1070 if(ps_tu->b1_transquant_bypass || transform_skip_flag)
1071 {
1072 /* Recon */
1073 ps_codec->apf_recon[func_idx](pi2_src, pu1_pred, pu1_dst,
1074 src_strd, pred_strd, dst_strd,
1075 zero_cols);
1076 }
1077 else
1078 {
1079
1080 /* Updating coded number of transform types(excluding trans skip and trans quant skip) */
1081 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0);
1082
1083 /* iQuant , iTrans and Recon */
1084 if((0 == coeff_type))
1085 {
1086 ps_codec->apf_itrans_recon[func_idx](pi2_src, pi2_tmp,
1087 pu1_pred, pu1_dst,
1088 src_strd, pred_strd,
1089 dst_strd, zero_cols,
1090 zero_rows);
1091 }
1092 else /* DC only */
1093 {
1094 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1);
1095 ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred, pu1_dst,
1096 pred_strd, dst_strd,
1097 log2_trans_size,
1098 i2_coeff_value);
1099 }
1100 }
1101 }
1102 /* IQ, IT and Recon for V */
1103 if(c_idx != 0)
1104 {
1105 if(1 == u1_cbf_v)
1106 {
1107 if(ps_tu->b1_transquant_bypass || transform_skip_flag_v)
1108 {
1109 /* Recon */
1110 ps_codec->apf_recon[func_idx](pi2_src_v, pu1_pred_v,
1111 pu1_dst_v, src_strd,
1112 pred_strd, dst_strd,
1113 zero_cols_v);
1114 }
1115 else
1116 {
1117 /* Updating number of transform types */
1118 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0);
1119
1120 /* iQuant , iTrans and Recon */
1121 if((0 == coeff_type_v))
1122 {
1123 ps_codec->apf_itrans_recon[func_idx](pi2_src_v,
1124 pi2_tmp,
1125 pu1_pred_v,
1126 pu1_dst_v,
1127 src_strd,
1128 pred_strd,
1129 dst_strd,
1130 zero_cols_v,
1131 zero_rows_v);
1132 }
1133 else /* DC only */
1134 {
1135 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1);
1136 ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred_v, pu1_dst_v,
1137 pred_strd, dst_strd,
1138 log2_trans_size,
1139 i2_coeff_value_v);
1140 }
1141 }
1142 }
1143 }
1144 }
1145
1146 /* Neighbor availability inside CTB */
1147 /* 1bit per 4x4. Indicates whether that 4x4 block has been reconstructed(avialable) */
1148 /* Used for neighbor availability in intra pred */
1149 if(c_idx == 0)
1150 {
1151 WORD32 i;
1152 WORD32 trans_in_min_tu;
1153 UWORD32 cur_tu_in_bits;
1154 UWORD32 cur_tu_avail_flag;
1155
1156 trans_in_min_tu = trans_size / MIN_TU_SIZE;
1157 cur_tu_in_bits = (1 << trans_in_min_tu) - 1;
1158 cur_tu_in_bits = cur_tu_in_bits << (32 - trans_in_min_tu);
1159
1160 cur_tu_avail_flag = cur_tu_in_bits >> (ps_tu->b4_pos_x + 1);
1161
1162 for(i = 0; i < trans_in_min_tu; i++)
1163 au4_intra_nbr_avail[1 + ps_tu->b4_pos_y + i] |=
1164 cur_tu_avail_flag;
1165 }
1166 }
1167 }
1168 ps_proc->pv_tu_coeff_data = pu1_tu_coeff_data;
1169
1170 return ps_proc->i4_ctb_tu_cnt;
1171 }
1172
1173