1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar *****************************************************************************
18*c83a76b0SSuyog Pawar * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar /**
21*c83a76b0SSuyog Pawar *******************************************************************************
22*c83a76b0SSuyog Pawar * @file
23*c83a76b0SSuyog Pawar * ihevce_deblk.c
24*c83a76b0SSuyog Pawar *
25*c83a76b0SSuyog Pawar * @brief
26*c83a76b0SSuyog Pawar * Contains definition for the ctb level deblk function
27*c83a76b0SSuyog Pawar *
28*c83a76b0SSuyog Pawar * @author
29*c83a76b0SSuyog Pawar * ittiam
30*c83a76b0SSuyog Pawar *
31*c83a76b0SSuyog Pawar * @List of Functions:
32*c83a76b0SSuyog Pawar * ihevce_deblk_populate_qp_map()
33*c83a76b0SSuyog Pawar * ihevce_deblk_ctb()
34*c83a76b0SSuyog Pawar * ihevce_hbd_deblk_ctb()
35*c83a76b0SSuyog Pawar *
36*c83a76b0SSuyog Pawar * @remarks
37*c83a76b0SSuyog Pawar * None
38*c83a76b0SSuyog Pawar *
39*c83a76b0SSuyog Pawar *******************************************************************************
40*c83a76b0SSuyog Pawar */
41*c83a76b0SSuyog Pawar
42*c83a76b0SSuyog Pawar /*****************************************************************************/
43*c83a76b0SSuyog Pawar /* File Includes */
44*c83a76b0SSuyog Pawar /*****************************************************************************/
45*c83a76b0SSuyog Pawar /* System include files */
46*c83a76b0SSuyog Pawar #include <stdio.h>
47*c83a76b0SSuyog Pawar #include <string.h>
48*c83a76b0SSuyog Pawar #include <stdlib.h>
49*c83a76b0SSuyog Pawar #include <assert.h>
50*c83a76b0SSuyog Pawar #include <stdarg.h>
51*c83a76b0SSuyog Pawar #include <math.h>
52*c83a76b0SSuyog Pawar
53*c83a76b0SSuyog Pawar /* User include files */
54*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
55*c83a76b0SSuyog Pawar #include "itt_video_api.h"
56*c83a76b0SSuyog Pawar #include "ihevce_api.h"
57*c83a76b0SSuyog Pawar
58*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
59*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
60*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
61*c83a76b0SSuyog Pawar
62*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
63*c83a76b0SSuyog Pawar #include "ihevc_debug.h"
64*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
65*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
66*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
67*c83a76b0SSuyog Pawar #include "ihevc_deblk_tables.h"
68*c83a76b0SSuyog Pawar #include "ihevc_common_tables.h"
69*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
70*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
71*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
72*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
73*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
74*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
75*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
76*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
77*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
78*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
79*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
80*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
81*c83a76b0SSuyog Pawar
82*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
83*c83a76b0SSuyog Pawar #include "ihevce_hle_interface.h"
84*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
85*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
86*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
87*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
88*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
89*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
90*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
91*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
92*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
93*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
94*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
95*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
96*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
97*c83a76b0SSuyog Pawar #include "ihevce_common_utils.h"
98*c83a76b0SSuyog Pawar #include "ihevce_global_tables.h"
99*c83a76b0SSuyog Pawar #include "ihevce_deblk.h"
100*c83a76b0SSuyog Pawar #include "ihevce_tile_interface.h"
101*c83a76b0SSuyog Pawar
102*c83a76b0SSuyog Pawar /*****************************************************************************/
103*c83a76b0SSuyog Pawar /* Function Definitions */
104*c83a76b0SSuyog Pawar /*****************************************************************************/
105*c83a76b0SSuyog Pawar
106*c83a76b0SSuyog Pawar /*!
107*c83a76b0SSuyog Pawar ******************************************************************************
108*c83a76b0SSuyog Pawar * \if Function name : ihevce_deblk_populate_qp_map \endif
109*c83a76b0SSuyog Pawar *
110*c83a76b0SSuyog Pawar * \brief
111*c83a76b0SSuyog Pawar *
112*c83a76b0SSuyog Pawar *
113*c83a76b0SSuyog Pawar *****************************************************************************
114*c83a76b0SSuyog Pawar */
ihevce_deblk_populate_qp_map(ihevce_enc_loop_ctxt_t * ps_ctxt,deblk_ctbrow_prms_t * ps_deblk_ctb_row_params,ctb_enc_loop_out_t * ps_ctb_out_dblk,WORD32 vert_ctr,frm_ctb_ctxt_t * ps_frm_ctb_prms,ihevce_tile_params_t * ps_col_tile_params)115*c83a76b0SSuyog Pawar void ihevce_deblk_populate_qp_map(
116*c83a76b0SSuyog Pawar ihevce_enc_loop_ctxt_t *ps_ctxt,
117*c83a76b0SSuyog Pawar deblk_ctbrow_prms_t *ps_deblk_ctb_row_params,
118*c83a76b0SSuyog Pawar ctb_enc_loop_out_t *ps_ctb_out_dblk,
119*c83a76b0SSuyog Pawar WORD32 vert_ctr,
120*c83a76b0SSuyog Pawar frm_ctb_ctxt_t *ps_frm_ctb_prms,
121*c83a76b0SSuyog Pawar ihevce_tile_params_t *ps_col_tile_params)
122*c83a76b0SSuyog Pawar {
123*c83a76b0SSuyog Pawar ctb_enc_loop_out_t *ps_ctb_out;
124*c83a76b0SSuyog Pawar WORD32 ctb_ctr, ctb_start, ctb_end;
125*c83a76b0SSuyog Pawar WORD32 tile_qp_offset, tile_qp_size, i4_offset_for_last_cu_qp;
126*c83a76b0SSuyog Pawar /* Create the Qp map for the entire current CTB-row for deblocking purpose(only)*/
127*c83a76b0SSuyog Pawar /* Do this iff cur pic is referred or recon dump is enabled or psnr calc is on*/
128*c83a76b0SSuyog Pawar /*Qp of the last CU of previous CTB row*/
129*c83a76b0SSuyog Pawar WORD8 i1_last_cu_qp;
130*c83a76b0SSuyog Pawar /*A pointer pointing to the top 4x4 block's Qp for all CTb rows*/
131*c83a76b0SSuyog Pawar WORD8 *pi1_qp_top_4x4_ctb_row =
132*c83a76b0SSuyog Pawar ps_deblk_ctb_row_params->api1_qp_top_4x4_ctb_row[ps_ctxt->i4_enc_frm_id] +
133*c83a76b0SSuyog Pawar (ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_size * ps_ctxt->i4_bitrate_instance_num);
134*c83a76b0SSuyog Pawar
135*c83a76b0SSuyog Pawar UWORD32 u4_qp_top_4x4_buf_strd = ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_strd;
136*c83a76b0SSuyog Pawar
137*c83a76b0SSuyog Pawar /*The Qp map which has to be populated*/
138*c83a76b0SSuyog Pawar UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride;
139*c83a76b0SSuyog Pawar WORD8 *pi1_ctb_tile_qp = ps_deblk_ctb_row_params->pi1_ctb_row_qp;
140*c83a76b0SSuyog Pawar
141*c83a76b0SSuyog Pawar /*Temporary pointers to Qp map at CTB level*/
142*c83a76b0SSuyog Pawar WORD8 *pi1_ctb_qp_map_tile;
143*c83a76b0SSuyog Pawar
144*c83a76b0SSuyog Pawar i4_offset_for_last_cu_qp = ps_ctxt->pi4_offset_for_last_cu_qp[ps_ctxt->i4_tile_col_idx];
145*c83a76b0SSuyog Pawar /* total QPs to be copied for current row is : */
146*c83a76b0SSuyog Pawar tile_qp_size = i4_offset_for_last_cu_qp + 1;
147*c83a76b0SSuyog Pawar /*Pointing to the first CTB of current CTB row*/
148*c83a76b0SSuyog Pawar ps_ctb_out = ps_ctb_out_dblk;
149*c83a76b0SSuyog Pawar /* Offset req. for the row QP to the tile start */
150*c83a76b0SSuyog Pawar tile_qp_offset = ps_col_tile_params->i4_first_ctb_x * (ps_frm_ctb_prms->i4_ctb_size / 4);
151*c83a76b0SSuyog Pawar
152*c83a76b0SSuyog Pawar ctb_start = ps_col_tile_params->i4_first_ctb_x;
153*c83a76b0SSuyog Pawar ctb_end =
154*c83a76b0SSuyog Pawar (ps_col_tile_params->i4_first_ctb_x + ps_col_tile_params->i4_curr_tile_wd_in_ctb_unit);
155*c83a76b0SSuyog Pawar
156*c83a76b0SSuyog Pawar if(vert_ctr) /*Not first CTB row of frame*/
157*c83a76b0SSuyog Pawar {
158*c83a76b0SSuyog Pawar /*copy from top4x4_array data stored by upper CTB-row to qp-map*/
159*c83a76b0SSuyog Pawar memcpy(
160*c83a76b0SSuyog Pawar pi1_ctb_tile_qp,
161*c83a76b0SSuyog Pawar (pi1_qp_top_4x4_ctb_row + (vert_ctr - 1) * u4_qp_top_4x4_buf_strd + tile_qp_offset),
162*c83a76b0SSuyog Pawar tile_qp_size);
163*c83a76b0SSuyog Pawar }
164*c83a76b0SSuyog Pawar
165*c83a76b0SSuyog Pawar /*pu1_ctb_row_qp points to top4x4 row in Qp-map.
166*c83a76b0SSuyog Pawar Now pointing pu1_ctb_qp_map to cur 4x4 row*/
167*c83a76b0SSuyog Pawar pi1_ctb_qp_map_tile = pi1_ctb_tile_qp + u4_qp_buffer_stride;
168*c83a76b0SSuyog Pawar
169*c83a76b0SSuyog Pawar /* This i1_last_cu_qp will be conditionally overwritten later */
170*c83a76b0SSuyog Pawar i1_last_cu_qp = ps_ctxt->i4_frame_qp;
171*c83a76b0SSuyog Pawar
172*c83a76b0SSuyog Pawar /* -- Loop over all the CTBs in a CTB-row for populating the Qp-map ----- */
173*c83a76b0SSuyog Pawar for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
174*c83a76b0SSuyog Pawar {
175*c83a76b0SSuyog Pawar WORD32 cu_ctr;
176*c83a76b0SSuyog Pawar cu_enc_loop_out_t *ps_curr_cu;
177*c83a76b0SSuyog Pawar
178*c83a76b0SSuyog Pawar /* Update i1_last_cu_qp based on CTB's position in tile */
179*c83a76b0SSuyog Pawar update_last_coded_cu_qp(
180*c83a76b0SSuyog Pawar (ps_deblk_ctb_row_params->pi1_ctb_row_qp + i4_offset_for_last_cu_qp),
181*c83a76b0SSuyog Pawar ps_ctxt->i1_entropy_coding_sync_enabled_flag,
182*c83a76b0SSuyog Pawar ps_frm_ctb_prms,
183*c83a76b0SSuyog Pawar ps_ctxt->i4_frame_qp,
184*c83a76b0SSuyog Pawar vert_ctr,
185*c83a76b0SSuyog Pawar ctb_ctr,
186*c83a76b0SSuyog Pawar &i1_last_cu_qp);
187*c83a76b0SSuyog Pawar
188*c83a76b0SSuyog Pawar /* store the pointer of first cu of current ctb */
189*c83a76b0SSuyog Pawar ps_curr_cu = ps_ctb_out->ps_enc_cu;
190*c83a76b0SSuyog Pawar
191*c83a76b0SSuyog Pawar /* --------- loop over all the CUs in the CTB --------------- */
192*c83a76b0SSuyog Pawar for(cu_ctr = 0; cu_ctr < ps_ctb_out->u1_num_cus_in_ctb; cu_ctr++)
193*c83a76b0SSuyog Pawar {
194*c83a76b0SSuyog Pawar UWORD8 u1_vert_4x4, u1_horz_4x4; //for_loop counters
195*c83a76b0SSuyog Pawar WORD8 *pi1_cu_qp_map;
196*c83a76b0SSuyog Pawar
197*c83a76b0SSuyog Pawar WORD8 i1_qp, i1_qp_left, i1_qp_top;
198*c83a76b0SSuyog Pawar
199*c83a76b0SSuyog Pawar pi1_cu_qp_map = pi1_ctb_qp_map_tile +
200*c83a76b0SSuyog Pawar (ps_curr_cu->b3_cu_pos_y * 2) * u4_qp_buffer_stride +
201*c83a76b0SSuyog Pawar (ps_curr_cu->b3_cu_pos_x * 2);
202*c83a76b0SSuyog Pawar
203*c83a76b0SSuyog Pawar /*If the current CU is coded in skip_mode/zero_CBF then
204*c83a76b0SSuyog Pawar for deblocking, Qp of the previously coded CU will be used*/
205*c83a76b0SSuyog Pawar if(ps_curr_cu->b1_skip_flag || ps_curr_cu->b1_no_residual_syntax_flag)
206*c83a76b0SSuyog Pawar {
207*c83a76b0SSuyog Pawar if(0 == ps_curr_cu->b3_cu_pos_x)
208*c83a76b0SSuyog Pawar i1_qp_left = i1_last_cu_qp;
209*c83a76b0SSuyog Pawar else
210*c83a76b0SSuyog Pawar i1_qp_left = *(pi1_cu_qp_map - 1);
211*c83a76b0SSuyog Pawar
212*c83a76b0SSuyog Pawar if(0 == ps_curr_cu->b3_cu_pos_y)
213*c83a76b0SSuyog Pawar i1_qp_top = i1_last_cu_qp;
214*c83a76b0SSuyog Pawar else
215*c83a76b0SSuyog Pawar i1_qp_top = *(pi1_cu_qp_map - u4_qp_buffer_stride);
216*c83a76b0SSuyog Pawar
217*c83a76b0SSuyog Pawar i1_qp = (i1_qp_left + i1_qp_top + 1) / 2;
218*c83a76b0SSuyog Pawar
219*c83a76b0SSuyog Pawar if(0 == ps_curr_cu->b1_first_cu_in_qg)
220*c83a76b0SSuyog Pawar {
221*c83a76b0SSuyog Pawar i1_qp = i1_last_cu_qp;
222*c83a76b0SSuyog Pawar }
223*c83a76b0SSuyog Pawar }
224*c83a76b0SSuyog Pawar else
225*c83a76b0SSuyog Pawar {
226*c83a76b0SSuyog Pawar i1_qp = ps_curr_cu->i1_cu_qp;
227*c83a76b0SSuyog Pawar }
228*c83a76b0SSuyog Pawar
229*c83a76b0SSuyog Pawar i1_last_cu_qp = i1_qp;
230*c83a76b0SSuyog Pawar
231*c83a76b0SSuyog Pawar /*---- Loop for populating Qp map for the current CU -------*/
232*c83a76b0SSuyog Pawar for(u1_vert_4x4 = 0; u1_vert_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_vert_4x4++)
233*c83a76b0SSuyog Pawar {
234*c83a76b0SSuyog Pawar for(u1_horz_4x4 = 0; u1_horz_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_horz_4x4++)
235*c83a76b0SSuyog Pawar {
236*c83a76b0SSuyog Pawar pi1_cu_qp_map[u1_horz_4x4] = i1_qp;
237*c83a76b0SSuyog Pawar }
238*c83a76b0SSuyog Pawar pi1_cu_qp_map += u4_qp_buffer_stride;
239*c83a76b0SSuyog Pawar }
240*c83a76b0SSuyog Pawar /*Update Qp-map ptr. Qp map is at 4x4 level but b4_cu_size is at 8x8 level*/
241*c83a76b0SSuyog Pawar ps_curr_cu++;
242*c83a76b0SSuyog Pawar }
243*c83a76b0SSuyog Pawar pi1_ctb_qp_map_tile += (ps_frm_ctb_prms->i4_ctb_size / 4); //one qp per 4x4 block.
244*c83a76b0SSuyog Pawar ps_ctb_out++;
245*c83a76b0SSuyog Pawar
246*c83a76b0SSuyog Pawar } //for(ctb_ctr = 0; ctb_ctr < num_ctbs_horz; ctb_ctr++)
247*c83a76b0SSuyog Pawar
248*c83a76b0SSuyog Pawar /*fill into the top4x4_array Qp for the lower CTB-row from bottom part of cur CTB row*/
249*c83a76b0SSuyog Pawar memcpy(
250*c83a76b0SSuyog Pawar (pi1_qp_top_4x4_ctb_row + vert_ctr * u4_qp_top_4x4_buf_strd + tile_qp_offset),
251*c83a76b0SSuyog Pawar (pi1_ctb_tile_qp + (ps_frm_ctb_prms->i4_ctb_size / 4) * u4_qp_buffer_stride),
252*c83a76b0SSuyog Pawar tile_qp_size);
253*c83a76b0SSuyog Pawar }
254*c83a76b0SSuyog Pawar
255*c83a76b0SSuyog Pawar /**
256*c83a76b0SSuyog Pawar *******************************************************************************
257*c83a76b0SSuyog Pawar *
258*c83a76b0SSuyog Pawar * @brief
259*c83a76b0SSuyog Pawar * Deblock CTB level function.
260*c83a76b0SSuyog Pawar *
261*c83a76b0SSuyog Pawar * @par Description:
262*c83a76b0SSuyog Pawar * For a given CTB, deblocking on both vertical and
263*c83a76b0SSuyog Pawar * horizontal edges is done. Both the luma and chroma
264*c83a76b0SSuyog Pawar * blocks are processed
265*c83a76b0SSuyog Pawar *
266*c83a76b0SSuyog Pawar * @param[in]
267*c83a76b0SSuyog Pawar * ps_deblk: Pointer to the deblock context
268*c83a76b0SSuyog Pawar * last_col: if the CTB is the last CTB of current CTB-row value is 1 else 0
269*c83a76b0SSuyog Pawar * ps_deblk_ctb_row_params: deblk ctb row params
270*c83a76b0SSuyog Pawar *
271*c83a76b0SSuyog Pawar * @returns
272*c83a76b0SSuyog Pawar *
273*c83a76b0SSuyog Pawar * @remarks
274*c83a76b0SSuyog Pawar * None
275*c83a76b0SSuyog Pawar *
276*c83a76b0SSuyog Pawar *******************************************************************************
277*c83a76b0SSuyog Pawar */
ihevce_deblk_ctb(deblk_ctb_params_t * ps_deblk,WORD32 last_col,deblk_ctbrow_prms_t * ps_deblk_ctb_row_params)278*c83a76b0SSuyog Pawar void ihevce_deblk_ctb(
279*c83a76b0SSuyog Pawar deblk_ctb_params_t *ps_deblk, WORD32 last_col, deblk_ctbrow_prms_t *ps_deblk_ctb_row_params)
280*c83a76b0SSuyog Pawar {
281*c83a76b0SSuyog Pawar WORD32 ctb_size;
282*c83a76b0SSuyog Pawar UWORD32 u4_bs;
283*c83a76b0SSuyog Pawar WORD32 bs_lz; /*Leading zeros in boundary strength*/
284*c83a76b0SSuyog Pawar WORD32 qp_p, qp_q;
285*c83a76b0SSuyog Pawar UWORD8 *pu1_src;
286*c83a76b0SSuyog Pawar UWORD8 *pu1_src_uv;
287*c83a76b0SSuyog Pawar UWORD8 *pu1_curr_src;
288*c83a76b0SSuyog Pawar WORD32 col_size;
289*c83a76b0SSuyog Pawar WORD32 col, row, i4_edge_count;
290*c83a76b0SSuyog Pawar WORD32 num_columns_for_vert_filt;
291*c83a76b0SSuyog Pawar WORD32 num_blks_for_vert_filt;
292*c83a76b0SSuyog Pawar WORD32 num_rows_for_horz_filt;
293*c83a76b0SSuyog Pawar
294*c83a76b0SSuyog Pawar ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_horz;
295*c83a76b0SSuyog Pawar ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_vert;
296*c83a76b0SSuyog Pawar
297*c83a76b0SSuyog Pawar /* Filter flags are packed along with the qp info.
298*c83a76b0SSuyog Pawar 6 out of the 8 bits correspond to qp and 1 to filter flag. */
299*c83a76b0SSuyog Pawar /* filter_p and filter_q are initialized to 1.
300*c83a76b0SSuyog Pawar They are to be extracted along with the qp info. */
301*c83a76b0SSuyog Pawar WORD32 filter_p, filter_q;
302*c83a76b0SSuyog Pawar WORD8 *pi1_ctb_row_qp_p, *pi1_ctb_row_qp_temp;
303*c83a76b0SSuyog Pawar WORD8 *pi1_ctb_row_qp_q;
304*c83a76b0SSuyog Pawar
305*c83a76b0SSuyog Pawar func_selector_t *ps_func_slector = ps_deblk->ps_func_selector;
306*c83a76b0SSuyog Pawar
307*c83a76b0SSuyog Pawar WORD32 left_luma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge;
308*c83a76b0SSuyog Pawar WORD32 top_luma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge;
309*c83a76b0SSuyog Pawar WORD32 left_chroma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge;
310*c83a76b0SSuyog Pawar WORD32 top_chroma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge;
311*c83a76b0SSuyog Pawar UWORD32 *bs_vert = ps_deblk_ctb_row_params->pu4_ctb_row_bs_vert;
312*c83a76b0SSuyog Pawar UWORD32 *bs_horz = ps_deblk_ctb_row_params->pu4_ctb_row_bs_horz;
313*c83a76b0SSuyog Pawar UWORD32 *bs_vert_uv = bs_vert;
314*c83a76b0SSuyog Pawar UWORD32 *bs_horz_uv = bs_horz;
315*c83a76b0SSuyog Pawar UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride;
316*c83a76b0SSuyog Pawar UWORD8 u1_is_422 = (ps_deblk->u1_chroma_array_type == 2);
317*c83a76b0SSuyog Pawar
318*c83a76b0SSuyog Pawar if(u1_is_422)
319*c83a76b0SSuyog Pawar {
320*c83a76b0SSuyog Pawar pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_422chroma_horz_fptr;
321*c83a76b0SSuyog Pawar pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_422chroma_vert_fptr;
322*c83a76b0SSuyog Pawar }
323*c83a76b0SSuyog Pawar else
324*c83a76b0SSuyog Pawar {
325*c83a76b0SSuyog Pawar pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_chroma_horz_fptr;
326*c83a76b0SSuyog Pawar pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_chroma_vert_fptr;
327*c83a76b0SSuyog Pawar }
328*c83a76b0SSuyog Pawar
329*c83a76b0SSuyog Pawar ctb_size = ps_deblk->i4_ctb_size;
330*c83a76b0SSuyog Pawar
331*c83a76b0SSuyog Pawar /* The PCM filter flag and bypass trans flag are always set to 1 in encoder profile */
332*c83a76b0SSuyog Pawar /* Can be removed during optimization */
333*c83a76b0SSuyog Pawar filter_q = 1;
334*c83a76b0SSuyog Pawar filter_p = 1;
335*c83a76b0SSuyog Pawar
336*c83a76b0SSuyog Pawar //////////////////////////////////////////////////////////////////////////////
337*c83a76b0SSuyog Pawar /* Luma Veritcal Edge */
338*c83a76b0SSuyog Pawar pu1_src = ps_deblk->pu1_ctb_y;
339*c83a76b0SSuyog Pawar pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride;
340*c83a76b0SSuyog Pawar num_columns_for_vert_filt = ctb_size / 8;
341*c83a76b0SSuyog Pawar num_blks_for_vert_filt = ctb_size / 4;
342*c83a76b0SSuyog Pawar
343*c83a76b0SSuyog Pawar for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++)
344*c83a76b0SSuyog Pawar {
345*c83a76b0SSuyog Pawar u4_bs = *bs_vert;
346*c83a76b0SSuyog Pawar /* get the current 4x4 vertical pointer */
347*c83a76b0SSuyog Pawar pu1_curr_src = pu1_src;
348*c83a76b0SSuyog Pawar pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 1);
349*c83a76b0SSuyog Pawar
350*c83a76b0SSuyog Pawar /* If the current edge is not the 1st edge of frame or slice */
351*c83a76b0SSuyog Pawar if(1 == left_luma_edge_filter_flag)
352*c83a76b0SSuyog Pawar {
353*c83a76b0SSuyog Pawar for(row = 0; row < num_blks_for_vert_filt;)
354*c83a76b0SSuyog Pawar {
355*c83a76b0SSuyog Pawar bs_lz = CLZ(u4_bs) >> 1;
356*c83a76b0SSuyog Pawar /* If BS = 0, skip the egde filtering */
357*c83a76b0SSuyog Pawar if(0 != bs_lz)
358*c83a76b0SSuyog Pawar {
359*c83a76b0SSuyog Pawar u4_bs = u4_bs << (bs_lz << 1);
360*c83a76b0SSuyog Pawar pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_luma_pic_stride);
361*c83a76b0SSuyog Pawar pi1_ctb_row_qp_q += (bs_lz * u4_qp_buffer_stride);
362*c83a76b0SSuyog Pawar row += bs_lz;
363*c83a76b0SSuyog Pawar continue;
364*c83a76b0SSuyog Pawar }
365*c83a76b0SSuyog Pawar qp_p = *(pi1_ctb_row_qp_q - 1);
366*c83a76b0SSuyog Pawar qp_q = *pi1_ctb_row_qp_q;
367*c83a76b0SSuyog Pawar
368*c83a76b0SSuyog Pawar ps_func_slector->ihevc_deblk_luma_vert_fptr(
369*c83a76b0SSuyog Pawar pu1_curr_src,
370*c83a76b0SSuyog Pawar ps_deblk->i4_luma_pic_stride,
371*c83a76b0SSuyog Pawar (u4_bs >> 30), /* bits 31 and 30 are extracted */
372*c83a76b0SSuyog Pawar qp_p,
373*c83a76b0SSuyog Pawar qp_q,
374*c83a76b0SSuyog Pawar ps_deblk->i4_beta_offset_div2,
375*c83a76b0SSuyog Pawar ps_deblk->i4_tc_offset_div2,
376*c83a76b0SSuyog Pawar filter_p,
377*c83a76b0SSuyog Pawar filter_q);
378*c83a76b0SSuyog Pawar
379*c83a76b0SSuyog Pawar u4_bs = u4_bs << 2;
380*c83a76b0SSuyog Pawar pu1_curr_src += (ps_deblk->i4_luma_pic_stride << 2);
381*c83a76b0SSuyog Pawar pi1_ctb_row_qp_q += u4_qp_buffer_stride;
382*c83a76b0SSuyog Pawar row++;
383*c83a76b0SSuyog Pawar }
384*c83a76b0SSuyog Pawar }
385*c83a76b0SSuyog Pawar
386*c83a76b0SSuyog Pawar /* Increment the boundary strength and src pointer for the next column */
387*c83a76b0SSuyog Pawar bs_vert += 1;
388*c83a76b0SSuyog Pawar pu1_src += 8;
389*c83a76b0SSuyog Pawar
390*c83a76b0SSuyog Pawar /* Enable for the next edges of ctb*/
391*c83a76b0SSuyog Pawar left_luma_edge_filter_flag = 1;
392*c83a76b0SSuyog Pawar }
393*c83a76b0SSuyog Pawar
394*c83a76b0SSuyog Pawar //////////////////////////////////////////////////////////////////////////////
395*c83a76b0SSuyog Pawar /* Chroma Veritcal Edge */
396*c83a76b0SSuyog Pawar pu1_src_uv = ps_deblk->pu1_ctb_uv;
397*c83a76b0SSuyog Pawar pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride;
398*c83a76b0SSuyog Pawar
399*c83a76b0SSuyog Pawar /* Column spacing is 4 for each chroma component */
400*c83a76b0SSuyog Pawar /* and hence 8 when they are interleaved. */
401*c83a76b0SSuyog Pawar /* But, only those columns with a x co-ordinate */
402*c83a76b0SSuyog Pawar /* that is divisiblee by 8 are filtered */
403*c83a76b0SSuyog Pawar /* Hence, denominator is 16 */
404*c83a76b0SSuyog Pawar num_columns_for_vert_filt = ctb_size / 16;
405*c83a76b0SSuyog Pawar /* blk_size is 4 and chroma_ctb_height is ctb_size/2 */
406*c83a76b0SSuyog Pawar num_blks_for_vert_filt = (0 == u1_is_422) ? (ctb_size / 2) / 4 : (ctb_size) / 4;
407*c83a76b0SSuyog Pawar
408*c83a76b0SSuyog Pawar for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++)
409*c83a76b0SSuyog Pawar {
410*c83a76b0SSuyog Pawar /* Every alternate boundary strength value is used for 420 chroma */
411*c83a76b0SSuyog Pawar u4_bs = *(bs_vert_uv) & ((0 == u1_is_422) ? 0x88888888 : 0xaaaaaaaa);
412*c83a76b0SSuyog Pawar pu1_curr_src = pu1_src_uv;
413*c83a76b0SSuyog Pawar pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 2);
414*c83a76b0SSuyog Pawar
415*c83a76b0SSuyog Pawar /* If the current edge is not the 1st edge of frame or slice */
416*c83a76b0SSuyog Pawar if(1 == left_chroma_edge_filter_flag)
417*c83a76b0SSuyog Pawar {
418*c83a76b0SSuyog Pawar /* Each 'bs' is 2 bits long */
419*c83a76b0SSuyog Pawar /* The divby4 in 420 is */
420*c83a76b0SSuyog Pawar /* necessitated by the fact that */
421*c83a76b0SSuyog Pawar /* chroma ctb_ht is half that of luma */
422*c83a76b0SSuyog Pawar WORD32 i4_log2_num_bits_per_bs = ((0 == u1_is_422) + 1);
423*c83a76b0SSuyog Pawar /* i4_sub_heightC = 2 for 420 */
424*c83a76b0SSuyog Pawar /* i4_sub_heightC = 1 for 422 */
425*c83a76b0SSuyog Pawar WORD32 i4_sub_heightC = i4_log2_num_bits_per_bs;
426*c83a76b0SSuyog Pawar
427*c83a76b0SSuyog Pawar for(row = 0; row < num_blks_for_vert_filt;)
428*c83a76b0SSuyog Pawar {
429*c83a76b0SSuyog Pawar bs_lz = CLZ(u4_bs) >> i4_log2_num_bits_per_bs;
430*c83a76b0SSuyog Pawar
431*c83a76b0SSuyog Pawar /* If BS = 0, skip the egde filtering */
432*c83a76b0SSuyog Pawar if(0 != bs_lz)
433*c83a76b0SSuyog Pawar {
434*c83a76b0SSuyog Pawar row += bs_lz;
435*c83a76b0SSuyog Pawar u4_bs = u4_bs << (bs_lz << i4_log2_num_bits_per_bs);
436*c83a76b0SSuyog Pawar /* '<<2' because of blk_size being 4x4 */
437*c83a76b0SSuyog Pawar pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_chroma_pic_stride);
438*c83a76b0SSuyog Pawar
439*c83a76b0SSuyog Pawar /* In 420, every alternate QP row is skipped, because chroma height */
440*c83a76b0SSuyog Pawar /* In 422, no row is skipped */
441*c83a76b0SSuyog Pawar pi1_ctb_row_qp_q += ((u4_qp_buffer_stride << (i4_sub_heightC - 1)) * bs_lz);
442*c83a76b0SSuyog Pawar
443*c83a76b0SSuyog Pawar continue;
444*c83a76b0SSuyog Pawar }
445*c83a76b0SSuyog Pawar
446*c83a76b0SSuyog Pawar qp_p = *(pi1_ctb_row_qp_q - i4_sub_heightC);
447*c83a76b0SSuyog Pawar qp_q = *pi1_ctb_row_qp_q;
448*c83a76b0SSuyog Pawar
449*c83a76b0SSuyog Pawar pf_deblk_chroma_vert(
450*c83a76b0SSuyog Pawar pu1_curr_src,
451*c83a76b0SSuyog Pawar ps_deblk->i4_chroma_pic_stride,
452*c83a76b0SSuyog Pawar qp_p,
453*c83a76b0SSuyog Pawar qp_q,
454*c83a76b0SSuyog Pawar ps_deblk->i4_cb_qp_indx_offset,
455*c83a76b0SSuyog Pawar ps_deblk->i4_cr_qp_indx_offset,
456*c83a76b0SSuyog Pawar ps_deblk->i4_tc_offset_div2,
457*c83a76b0SSuyog Pawar filter_p,
458*c83a76b0SSuyog Pawar filter_q);
459*c83a76b0SSuyog Pawar
460*c83a76b0SSuyog Pawar u4_bs = u4_bs << (1 << i4_log2_num_bits_per_bs);
461*c83a76b0SSuyog Pawar pu1_curr_src += (ps_deblk->i4_chroma_pic_stride << 2);
462*c83a76b0SSuyog Pawar pi1_ctb_row_qp_q += (u4_qp_buffer_stride << (i4_sub_heightC - 1));
463*c83a76b0SSuyog Pawar row++;
464*c83a76b0SSuyog Pawar }
465*c83a76b0SSuyog Pawar }
466*c83a76b0SSuyog Pawar /* Increment the boundary strength by 2 and src pointer for the next column */
467*c83a76b0SSuyog Pawar /* As the edge filtering happens for alternate column */
468*c83a76b0SSuyog Pawar bs_vert_uv += 2;
469*c83a76b0SSuyog Pawar pu1_src_uv += 16;
470*c83a76b0SSuyog Pawar left_chroma_edge_filter_flag = 1;
471*c83a76b0SSuyog Pawar }
472*c83a76b0SSuyog Pawar
473*c83a76b0SSuyog Pawar //////////////////////////////////////////////////////////////////////////////
474*c83a76b0SSuyog Pawar
475*c83a76b0SSuyog Pawar /* Luma Horizontal Edge */
476*c83a76b0SSuyog Pawar pu1_src = ps_deblk->pu1_ctb_y;
477*c83a76b0SSuyog Pawar col_size = ctb_size / 4;
478*c83a76b0SSuyog Pawar
479*c83a76b0SSuyog Pawar /* If the ctb is the 1st ctb of row, */
480*c83a76b0SSuyog Pawar /* Decrement the loop count to exclude filtering of last 4 pixels */
481*c83a76b0SSuyog Pawar /* else shift the src pointer by 4 pixels to do filtering for shifted ctb */
482*c83a76b0SSuyog Pawar if(ps_deblk->i4_deblock_left_ctb_edge == 1)
483*c83a76b0SSuyog Pawar {
484*c83a76b0SSuyog Pawar pu1_src -= 4;
485*c83a76b0SSuyog Pawar /*If the ctb is at the horizonatl end of PIC*/
486*c83a76b0SSuyog Pawar /* Increase the column size to filter last 4 pixels */
487*c83a76b0SSuyog Pawar col_size += last_col;
488*c83a76b0SSuyog Pawar }
489*c83a76b0SSuyog Pawar else if(!last_col)
490*c83a76b0SSuyog Pawar {
491*c83a76b0SSuyog Pawar col_size -= 1;
492*c83a76b0SSuyog Pawar }
493*c83a76b0SSuyog Pawar {
494*c83a76b0SSuyog Pawar UWORD8 *pu1_src_temp = pu1_src;
495*c83a76b0SSuyog Pawar //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows
496*c83a76b0SSuyog Pawar pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp;
497*c83a76b0SSuyog Pawar
498*c83a76b0SSuyog Pawar num_rows_for_horz_filt = ctb_size / 8;
499*c83a76b0SSuyog Pawar
500*c83a76b0SSuyog Pawar for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++)
501*c83a76b0SSuyog Pawar {
502*c83a76b0SSuyog Pawar WORD32 col_size_temp = col_size;
503*c83a76b0SSuyog Pawar pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride;
504*c83a76b0SSuyog Pawar pu1_src = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_luma_pic_stride);
505*c83a76b0SSuyog Pawar
506*c83a76b0SSuyog Pawar if(1 == top_luma_edge_filter_flag)
507*c83a76b0SSuyog Pawar {
508*c83a76b0SSuyog Pawar //Deblock the last vertical_4x4_column of previous CTB
509*c83a76b0SSuyog Pawar if(ps_deblk->i4_deblock_left_ctb_edge == 1)
510*c83a76b0SSuyog Pawar {
511*c83a76b0SSuyog Pawar u4_bs = ps_deblk->au1_prev_bs[i4_edge_count] & 0x3;
512*c83a76b0SSuyog Pawar if(u4_bs != 0)
513*c83a76b0SSuyog Pawar {
514*c83a76b0SSuyog Pawar qp_p = *(pi1_ctb_row_qp_p - 1);
515*c83a76b0SSuyog Pawar qp_q = *(pi1_ctb_row_qp_q - 1);
516*c83a76b0SSuyog Pawar
517*c83a76b0SSuyog Pawar ps_func_slector->ihevc_deblk_luma_horz_fptr(
518*c83a76b0SSuyog Pawar pu1_src,
519*c83a76b0SSuyog Pawar ps_deblk->i4_luma_pic_stride,
520*c83a76b0SSuyog Pawar u4_bs,
521*c83a76b0SSuyog Pawar qp_p,
522*c83a76b0SSuyog Pawar qp_q,
523*c83a76b0SSuyog Pawar ps_deblk->i4_beta_offset_div2,
524*c83a76b0SSuyog Pawar ps_deblk->i4_tc_offset_div2,
525*c83a76b0SSuyog Pawar 1,
526*c83a76b0SSuyog Pawar 1);
527*c83a76b0SSuyog Pawar }
528*c83a76b0SSuyog Pawar
529*c83a76b0SSuyog Pawar pu1_src += 4;
530*c83a76b0SSuyog Pawar col_size_temp--;
531*c83a76b0SSuyog Pawar }
532*c83a76b0SSuyog Pawar //Start deblocking current CTB
533*c83a76b0SSuyog Pawar u4_bs = *(bs_horz);
534*c83a76b0SSuyog Pawar
535*c83a76b0SSuyog Pawar for(col = 0; col < col_size_temp;)
536*c83a76b0SSuyog Pawar {
537*c83a76b0SSuyog Pawar bs_lz = CLZ(u4_bs) >> 1;
538*c83a76b0SSuyog Pawar if(0 != bs_lz)
539*c83a76b0SSuyog Pawar {
540*c83a76b0SSuyog Pawar u4_bs = u4_bs << (bs_lz << 1);
541*c83a76b0SSuyog Pawar pu1_src += 4 * bs_lz;
542*c83a76b0SSuyog Pawar col += bs_lz;
543*c83a76b0SSuyog Pawar continue;
544*c83a76b0SSuyog Pawar }
545*c83a76b0SSuyog Pawar qp_p = *(pi1_ctb_row_qp_p + col);
546*c83a76b0SSuyog Pawar qp_q = *(pi1_ctb_row_qp_q + col);
547*c83a76b0SSuyog Pawar
548*c83a76b0SSuyog Pawar ps_func_slector->ihevc_deblk_luma_horz_fptr(
549*c83a76b0SSuyog Pawar pu1_src,
550*c83a76b0SSuyog Pawar ps_deblk->i4_luma_pic_stride,
551*c83a76b0SSuyog Pawar u4_bs >> (sizeof(u4_bs) * 8 - 2),
552*c83a76b0SSuyog Pawar qp_p,
553*c83a76b0SSuyog Pawar qp_q,
554*c83a76b0SSuyog Pawar ps_deblk->i4_beta_offset_div2,
555*c83a76b0SSuyog Pawar ps_deblk->i4_tc_offset_div2,
556*c83a76b0SSuyog Pawar filter_p,
557*c83a76b0SSuyog Pawar filter_q);
558*c83a76b0SSuyog Pawar
559*c83a76b0SSuyog Pawar pu1_src += 4;
560*c83a76b0SSuyog Pawar u4_bs = u4_bs << 2;
561*c83a76b0SSuyog Pawar col++;
562*c83a76b0SSuyog Pawar }
563*c83a76b0SSuyog Pawar //Store the last vertical_4x4 column of CTB's info for next CTB deblocking
564*c83a76b0SSuyog Pawar u4_bs = *bs_horz;
565*c83a76b0SSuyog Pawar ps_deblk->au1_prev_bs[i4_edge_count] =
566*c83a76b0SSuyog Pawar (UWORD8)(((u4_bs << ((ctb_size >> 1) - 2))) >> 30);
567*c83a76b0SSuyog Pawar }
568*c83a76b0SSuyog Pawar bs_horz += 1;
569*c83a76b0SSuyog Pawar pi1_ctb_row_qp_p += (u4_qp_buffer_stride << 1);
570*c83a76b0SSuyog Pawar top_luma_edge_filter_flag = 1;
571*c83a76b0SSuyog Pawar }
572*c83a76b0SSuyog Pawar }
573*c83a76b0SSuyog Pawar
574*c83a76b0SSuyog Pawar //////////////////////////////////////////////////////////////////////////////
575*c83a76b0SSuyog Pawar /* Chroma Horizontal Edge */
576*c83a76b0SSuyog Pawar pu1_src_uv = ps_deblk->pu1_ctb_uv;
577*c83a76b0SSuyog Pawar col_size = ctb_size / 8;
578*c83a76b0SSuyog Pawar
579*c83a76b0SSuyog Pawar /* If the ctb is the 1st ctb of row, */
580*c83a76b0SSuyog Pawar /* Decrement the loop count to exclude filtering of last 4 pixels */
581*c83a76b0SSuyog Pawar /* else shift the src pointer by 8 (uv) pixels to do filtering for shifted ctb */
582*c83a76b0SSuyog Pawar if(ps_deblk->i4_deblock_left_ctb_edge == 1)
583*c83a76b0SSuyog Pawar {
584*c83a76b0SSuyog Pawar pu1_src_uv -= 8;
585*c83a76b0SSuyog Pawar
586*c83a76b0SSuyog Pawar /*If the ctb is at the horizonatl end of PIC*/
587*c83a76b0SSuyog Pawar /* Increase the column size to filter last 8 (uv) pixels */
588*c83a76b0SSuyog Pawar col_size += last_col;
589*c83a76b0SSuyog Pawar }
590*c83a76b0SSuyog Pawar else if(!last_col)
591*c83a76b0SSuyog Pawar {
592*c83a76b0SSuyog Pawar col_size--;
593*c83a76b0SSuyog Pawar }
594*c83a76b0SSuyog Pawar
595*c83a76b0SSuyog Pawar {
596*c83a76b0SSuyog Pawar UWORD8 *pu1_src_temp = pu1_src_uv;
597*c83a76b0SSuyog Pawar
598*c83a76b0SSuyog Pawar //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows
599*c83a76b0SSuyog Pawar pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp;
600*c83a76b0SSuyog Pawar num_rows_for_horz_filt = ctb_size / ((0 == u1_is_422) ? 16 : 8);
601*c83a76b0SSuyog Pawar
602*c83a76b0SSuyog Pawar for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++)
603*c83a76b0SSuyog Pawar {
604*c83a76b0SSuyog Pawar WORD32 col_size_temp = col_size;
605*c83a76b0SSuyog Pawar
606*c83a76b0SSuyog Pawar pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride;
607*c83a76b0SSuyog Pawar pu1_src_uv = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_chroma_pic_stride);
608*c83a76b0SSuyog Pawar
609*c83a76b0SSuyog Pawar if(1 == top_chroma_edge_filter_flag)
610*c83a76b0SSuyog Pawar {
611*c83a76b0SSuyog Pawar //Deblock the last vertical _4x4_column of previous CTB
612*c83a76b0SSuyog Pawar if(ps_deblk->i4_deblock_left_ctb_edge == 1)
613*c83a76b0SSuyog Pawar {
614*c83a76b0SSuyog Pawar u4_bs = ps_deblk->au1_prev_bs_uv[i4_edge_count] & 0x2;
615*c83a76b0SSuyog Pawar
616*c83a76b0SSuyog Pawar if(u4_bs == 2)
617*c83a76b0SSuyog Pawar {
618*c83a76b0SSuyog Pawar qp_p = *(pi1_ctb_row_qp_p - 1);
619*c83a76b0SSuyog Pawar qp_q = *(pi1_ctb_row_qp_q - 1);
620*c83a76b0SSuyog Pawar
621*c83a76b0SSuyog Pawar pf_deblk_chroma_horz(
622*c83a76b0SSuyog Pawar pu1_src_uv,
623*c83a76b0SSuyog Pawar ps_deblk->i4_chroma_pic_stride,
624*c83a76b0SSuyog Pawar qp_p,
625*c83a76b0SSuyog Pawar qp_q,
626*c83a76b0SSuyog Pawar ps_deblk->i4_cb_qp_indx_offset,
627*c83a76b0SSuyog Pawar ps_deblk->i4_cr_qp_indx_offset,
628*c83a76b0SSuyog Pawar ps_deblk->i4_tc_offset_div2,
629*c83a76b0SSuyog Pawar 1,
630*c83a76b0SSuyog Pawar 1);
631*c83a76b0SSuyog Pawar }
632*c83a76b0SSuyog Pawar
633*c83a76b0SSuyog Pawar pu1_src_uv += 8;
634*c83a76b0SSuyog Pawar col_size_temp--;
635*c83a76b0SSuyog Pawar }
636*c83a76b0SSuyog Pawar
637*c83a76b0SSuyog Pawar //Start deblocking current CTB
638*c83a76b0SSuyog Pawar u4_bs = *(bs_horz_uv)&0x88888888;
639*c83a76b0SSuyog Pawar
640*c83a76b0SSuyog Pawar for(col = 0; col < col_size_temp;)
641*c83a76b0SSuyog Pawar {
642*c83a76b0SSuyog Pawar bs_lz = CLZ(u4_bs) >> 2;
643*c83a76b0SSuyog Pawar
644*c83a76b0SSuyog Pawar if(0 != bs_lz)
645*c83a76b0SSuyog Pawar {
646*c83a76b0SSuyog Pawar u4_bs = u4_bs << (bs_lz << 2);
647*c83a76b0SSuyog Pawar pu1_src_uv += (8 * bs_lz);
648*c83a76b0SSuyog Pawar
649*c83a76b0SSuyog Pawar col += bs_lz;
650*c83a76b0SSuyog Pawar continue;
651*c83a76b0SSuyog Pawar }
652*c83a76b0SSuyog Pawar
653*c83a76b0SSuyog Pawar qp_p = *(pi1_ctb_row_qp_p + (col << 1));
654*c83a76b0SSuyog Pawar qp_q = *(pi1_ctb_row_qp_q + (col << 1));
655*c83a76b0SSuyog Pawar
656*c83a76b0SSuyog Pawar pf_deblk_chroma_horz(
657*c83a76b0SSuyog Pawar pu1_src_uv,
658*c83a76b0SSuyog Pawar ps_deblk->i4_chroma_pic_stride,
659*c83a76b0SSuyog Pawar qp_p,
660*c83a76b0SSuyog Pawar qp_q,
661*c83a76b0SSuyog Pawar ps_deblk->i4_cb_qp_indx_offset,
662*c83a76b0SSuyog Pawar ps_deblk->i4_cr_qp_indx_offset,
663*c83a76b0SSuyog Pawar ps_deblk->i4_tc_offset_div2,
664*c83a76b0SSuyog Pawar filter_p,
665*c83a76b0SSuyog Pawar filter_q);
666*c83a76b0SSuyog Pawar
667*c83a76b0SSuyog Pawar pu1_src_uv += 8;
668*c83a76b0SSuyog Pawar u4_bs = u4_bs << 4;
669*c83a76b0SSuyog Pawar col++;
670*c83a76b0SSuyog Pawar }
671*c83a76b0SSuyog Pawar
672*c83a76b0SSuyog Pawar //Store the last vertical_4x4 column of CTB's info for next CTB deblocking
673*c83a76b0SSuyog Pawar u4_bs = *bs_horz_uv;
674*c83a76b0SSuyog Pawar ps_deblk->au1_prev_bs_uv[i4_edge_count] =
675*c83a76b0SSuyog Pawar (UWORD8)(((u4_bs << ((ctb_size >> 1) - 4))) >> 30);
676*c83a76b0SSuyog Pawar }
677*c83a76b0SSuyog Pawar
678*c83a76b0SSuyog Pawar bs_horz_uv += ((0 == u1_is_422) + 1);
679*c83a76b0SSuyog Pawar pi1_ctb_row_qp_p += (u4_qp_buffer_stride << ((0 == u1_is_422) + 1));
680*c83a76b0SSuyog Pawar top_chroma_edge_filter_flag = 1;
681*c83a76b0SSuyog Pawar }
682*c83a76b0SSuyog Pawar }
683*c83a76b0SSuyog Pawar
684*c83a76b0SSuyog Pawar return;
685*c83a76b0SSuyog Pawar }
686