1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar ******************************************************************************/
18*c83a76b0SSuyog Pawar /**
19*c83a76b0SSuyog Pawar *******************************************************************************
20*c83a76b0SSuyog Pawar * @file
21*c83a76b0SSuyog Pawar * ihevc_deblk.c
22*c83a76b0SSuyog Pawar *
23*c83a76b0SSuyog Pawar * @brief
24*c83a76b0SSuyog Pawar * Contains definition for the ctb level deblk function
25*c83a76b0SSuyog Pawar *
26*c83a76b0SSuyog Pawar * @author
27*c83a76b0SSuyog Pawar * Srinivas T
28*c83a76b0SSuyog Pawar *
29*c83a76b0SSuyog Pawar * @par List of Functions:
30*c83a76b0SSuyog Pawar * - ihevc_deblk()
31*c83a76b0SSuyog Pawar *
32*c83a76b0SSuyog Pawar * @remarks
33*c83a76b0SSuyog Pawar * None
34*c83a76b0SSuyog Pawar *
35*c83a76b0SSuyog Pawar *******************************************************************************
36*c83a76b0SSuyog Pawar */
37*c83a76b0SSuyog Pawar
38*c83a76b0SSuyog Pawar #include <stdio.h>
39*c83a76b0SSuyog Pawar #include <stddef.h>
40*c83a76b0SSuyog Pawar #include <stdlib.h>
41*c83a76b0SSuyog Pawar #include <string.h>
42*c83a76b0SSuyog Pawar #include <assert.h>
43*c83a76b0SSuyog Pawar
44*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
45*c83a76b0SSuyog Pawar #include "iv.h"
46*c83a76b0SSuyog Pawar #include "ivd.h"
47*c83a76b0SSuyog Pawar #include "ihevcd_cxa.h"
48*c83a76b0SSuyog Pawar #include "ithread.h"
49*c83a76b0SSuyog Pawar
50*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
51*c83a76b0SSuyog Pawar #include "ihevc_debug.h"
52*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
53*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
54*c83a76b0SSuyog Pawar #include "ihevc_macros.h"
55*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
56*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
57*c83a76b0SSuyog Pawar
58*c83a76b0SSuyog Pawar #include "ihevc_error.h"
59*c83a76b0SSuyog Pawar #include "ihevc_common_tables.h"
60*c83a76b0SSuyog Pawar
61*c83a76b0SSuyog Pawar #include "ihevcd_trace.h"
62*c83a76b0SSuyog Pawar #include "ihevcd_defs.h"
63*c83a76b0SSuyog Pawar #include "ihevcd_function_selector.h"
64*c83a76b0SSuyog Pawar #include "ihevcd_structs.h"
65*c83a76b0SSuyog Pawar #include "ihevcd_error.h"
66*c83a76b0SSuyog Pawar #include "ihevcd_nal.h"
67*c83a76b0SSuyog Pawar #include "ihevcd_bitstream.h"
68*c83a76b0SSuyog Pawar #include "ihevcd_job_queue.h"
69*c83a76b0SSuyog Pawar #include "ihevcd_utils.h"
70*c83a76b0SSuyog Pawar #include "ihevcd_debug.h"
71*c83a76b0SSuyog Pawar
72*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
73*c83a76b0SSuyog Pawar #include "ihevc_deblk_tables.h"
74*c83a76b0SSuyog Pawar #include "ihevcd_profile.h"
75*c83a76b0SSuyog Pawar /**
76*c83a76b0SSuyog Pawar *******************************************************************************
77*c83a76b0SSuyog Pawar *
78*c83a76b0SSuyog Pawar * @brief
79*c83a76b0SSuyog Pawar * Deblock CTB level function.
80*c83a76b0SSuyog Pawar *
81*c83a76b0SSuyog Pawar * @par Description:
82*c83a76b0SSuyog Pawar * For a given CTB, deblocking on both vertical and
83*c83a76b0SSuyog Pawar * horizontal edges is done. Both the luma and chroma
84*c83a76b0SSuyog Pawar * blocks are processed
85*c83a76b0SSuyog Pawar *
86*c83a76b0SSuyog Pawar * @param[in] ps_deblk
87*c83a76b0SSuyog Pawar * Pointer to the deblock context
88*c83a76b0SSuyog Pawar *
89*c83a76b0SSuyog Pawar * @returns
90*c83a76b0SSuyog Pawar *
91*c83a76b0SSuyog Pawar * @remarks
92*c83a76b0SSuyog Pawar * None
93*c83a76b0SSuyog Pawar *
94*c83a76b0SSuyog Pawar *******************************************************************************
95*c83a76b0SSuyog Pawar */
96*c83a76b0SSuyog Pawar
ihevcd_deblk_ctb(deblk_ctxt_t * ps_deblk,WORD32 i4_is_last_ctb_x,WORD32 i4_is_last_ctb_y)97*c83a76b0SSuyog Pawar void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk,
98*c83a76b0SSuyog Pawar WORD32 i4_is_last_ctb_x,
99*c83a76b0SSuyog Pawar WORD32 i4_is_last_ctb_y)
100*c83a76b0SSuyog Pawar {
101*c83a76b0SSuyog Pawar WORD32 ctb_size;
102*c83a76b0SSuyog Pawar WORD32 log2_ctb_size;
103*c83a76b0SSuyog Pawar UWORD32 u4_bs;
104*c83a76b0SSuyog Pawar WORD32 bs_tz; /*Leading zeros in boundary strength*/
105*c83a76b0SSuyog Pawar WORD32 qp_p, qp_q;
106*c83a76b0SSuyog Pawar
107*c83a76b0SSuyog Pawar WORD32 filter_p, filter_q;
108*c83a76b0SSuyog Pawar
109*c83a76b0SSuyog Pawar UWORD8 *pu1_src;
110*c83a76b0SSuyog Pawar WORD32 qp_strd;
111*c83a76b0SSuyog Pawar UWORD32 *pu4_vert_bs, *pu4_horz_bs;
112*c83a76b0SSuyog Pawar UWORD32 *pu4_ctb_vert_bs, *pu4_ctb_horz_bs;
113*c83a76b0SSuyog Pawar WORD32 bs_strd;
114*c83a76b0SSuyog Pawar WORD32 src_strd;
115*c83a76b0SSuyog Pawar UWORD8 *pu1_qp;
116*c83a76b0SSuyog Pawar UWORD16 *pu2_ctb_no_loop_filter_flag;
117*c83a76b0SSuyog Pawar UWORD16 au2_ctb_no_loop_filter_flag[9];
118*c83a76b0SSuyog Pawar
119*c83a76b0SSuyog Pawar WORD32 col, row;
120*c83a76b0SSuyog Pawar
121*c83a76b0SSuyog Pawar /* Flag to indicate if QP is constant in CTB
122*c83a76b0SSuyog Pawar * 0 - top_left, 1 - top, 2 - left, 3 - current */
123*c83a76b0SSuyog Pawar UWORD32 u4_qp_const_in_ctb[4] = { 0, 0, 0, 0 };
124*c83a76b0SSuyog Pawar WORD32 ctb_indx;
125*c83a76b0SSuyog Pawar WORD32 chroma_yuv420sp_vu = ps_deblk->is_chroma_yuv420sp_vu;
126*c83a76b0SSuyog Pawar sps_t *ps_sps;
127*c83a76b0SSuyog Pawar pps_t *ps_pps;
128*c83a76b0SSuyog Pawar codec_t *ps_codec;
129*c83a76b0SSuyog Pawar slice_header_t *ps_slice_hdr;
130*c83a76b0SSuyog Pawar
131*c83a76b0SSuyog Pawar PROFILE_DISABLE_DEBLK();
132*c83a76b0SSuyog Pawar
133*c83a76b0SSuyog Pawar ps_sps = ps_deblk->ps_sps;
134*c83a76b0SSuyog Pawar ps_pps = ps_deblk->ps_pps;
135*c83a76b0SSuyog Pawar ps_codec = ps_deblk->ps_codec;
136*c83a76b0SSuyog Pawar ps_slice_hdr = ps_deblk->ps_slice_hdr;
137*c83a76b0SSuyog Pawar
138*c83a76b0SSuyog Pawar log2_ctb_size = ps_sps->i1_log2_ctb_size;
139*c83a76b0SSuyog Pawar ctb_size = (1 << ps_sps->i1_log2_ctb_size);
140*c83a76b0SSuyog Pawar
141*c83a76b0SSuyog Pawar /* strides are in units of number of bytes */
142*c83a76b0SSuyog Pawar /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
143*c83a76b0SSuyog Pawar bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
144*c83a76b0SSuyog Pawar
145*c83a76b0SSuyog Pawar pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_vert_bs +
146*c83a76b0SSuyog Pawar (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
147*c83a76b0SSuyog Pawar ps_deblk->i4_ctb_y * bs_strd);
148*c83a76b0SSuyog Pawar pu4_ctb_vert_bs = pu4_vert_bs;
149*c83a76b0SSuyog Pawar
150*c83a76b0SSuyog Pawar pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_horz_bs +
151*c83a76b0SSuyog Pawar (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
152*c83a76b0SSuyog Pawar ps_deblk->i4_ctb_y * bs_strd);
153*c83a76b0SSuyog Pawar pu4_ctb_horz_bs = pu4_horz_bs;
154*c83a76b0SSuyog Pawar
155*c83a76b0SSuyog Pawar qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
156*c83a76b0SSuyog Pawar pu1_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
157*c83a76b0SSuyog Pawar
158*c83a76b0SSuyog Pawar pu2_ctb_no_loop_filter_flag = ps_deblk->au2_ctb_no_loop_filter_flag;
159*c83a76b0SSuyog Pawar
160*c83a76b0SSuyog Pawar ctb_indx = ps_deblk->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_deblk->i4_ctb_y;
161*c83a76b0SSuyog Pawar if(i4_is_last_ctb_y)
162*c83a76b0SSuyog Pawar {
163*c83a76b0SSuyog Pawar pu4_vert_bs = (UWORD32 *)((UWORD8 *)pu4_vert_bs + bs_strd);
164*c83a76b0SSuyog Pawar pu4_ctb_vert_bs = pu4_vert_bs;
165*c83a76b0SSuyog Pawar /* ctb_size/8 is the number of edges per CTB
166*c83a76b0SSuyog Pawar * ctb_size/4 is the number of BS values needed per edge
167*c83a76b0SSuyog Pawar * divided by 8 for the number of bytes
168*c83a76b0SSuyog Pawar * 2 is the number of bits needed for each BS value */
169*c83a76b0SSuyog Pawar memset(pu4_vert_bs, 0, 1 << (2 * log2_ctb_size - 7));
170*c83a76b0SSuyog Pawar
171*c83a76b0SSuyog Pawar pu1_qp += (qp_strd << (log2_ctb_size - 3));
172*c83a76b0SSuyog Pawar pu2_ctb_no_loop_filter_flag += (ctb_size >> 3);
173*c83a76b0SSuyog Pawar ctb_indx += ps_sps->i2_pic_wd_in_ctb;
174*c83a76b0SSuyog Pawar }
175*c83a76b0SSuyog Pawar
176*c83a76b0SSuyog Pawar if(i4_is_last_ctb_x)
177*c83a76b0SSuyog Pawar {
178*c83a76b0SSuyog Pawar pu4_horz_bs = (UWORD32 *)((UWORD8 *)pu4_horz_bs + (1 << (2 * log2_ctb_size - 7)));
179*c83a76b0SSuyog Pawar pu4_ctb_horz_bs = pu4_horz_bs;
180*c83a76b0SSuyog Pawar memset(pu4_horz_bs, 0, 1 << (2 * log2_ctb_size - 7));
181*c83a76b0SSuyog Pawar
182*c83a76b0SSuyog Pawar pu1_qp += (ctb_size >> 3);
183*c83a76b0SSuyog Pawar
184*c83a76b0SSuyog Pawar for(row = 0; row < (ctb_size >> 3) + 1; row++)
185*c83a76b0SSuyog Pawar au2_ctb_no_loop_filter_flag[row] = ps_deblk->au2_ctb_no_loop_filter_flag[row] >> (ctb_size >> 3);
186*c83a76b0SSuyog Pawar pu2_ctb_no_loop_filter_flag = au2_ctb_no_loop_filter_flag;
187*c83a76b0SSuyog Pawar ctb_indx += 1;
188*c83a76b0SSuyog Pawar }
189*c83a76b0SSuyog Pawar
190*c83a76b0SSuyog Pawar u4_qp_const_in_ctb[3] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx) >> 3] & (1 << (ctb_indx & 7));
191*c83a76b0SSuyog Pawar
192*c83a76b0SSuyog Pawar if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
193*c83a76b0SSuyog Pawar {
194*c83a76b0SSuyog Pawar u4_qp_const_in_ctb[2] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - 1) >> 3] & (1 << ((ctb_indx - 1) & 7));
195*c83a76b0SSuyog Pawar }
196*c83a76b0SSuyog Pawar
197*c83a76b0SSuyog Pawar if((ps_deblk->i4_ctb_x || i4_is_last_ctb_x) && (ps_deblk->i4_ctb_y || i4_is_last_ctb_y))
198*c83a76b0SSuyog Pawar {
199*c83a76b0SSuyog Pawar u4_qp_const_in_ctb[0] =
200*c83a76b0SSuyog Pawar ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) >> 3] &
201*c83a76b0SSuyog Pawar (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) & 7));
202*c83a76b0SSuyog Pawar }
203*c83a76b0SSuyog Pawar
204*c83a76b0SSuyog Pawar
205*c83a76b0SSuyog Pawar
206*c83a76b0SSuyog Pawar if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
207*c83a76b0SSuyog Pawar {
208*c83a76b0SSuyog Pawar u4_qp_const_in_ctb[1] =
209*c83a76b0SSuyog Pawar ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb) >> 3] &
210*c83a76b0SSuyog Pawar (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb) & 7));
211*c83a76b0SSuyog Pawar }
212*c83a76b0SSuyog Pawar
213*c83a76b0SSuyog Pawar src_strd = ps_codec->i4_strd;
214*c83a76b0SSuyog Pawar
215*c83a76b0SSuyog Pawar /* Luma Vertical Edge */
216*c83a76b0SSuyog Pawar
217*c83a76b0SSuyog Pawar if(0 == i4_is_last_ctb_x)
218*c83a76b0SSuyog Pawar {
219*c83a76b0SSuyog Pawar /* Top CTB's slice header */
220*c83a76b0SSuyog Pawar slice_header_t *ps_slice_hdr_top;
221*c83a76b0SSuyog Pawar {
222*c83a76b0SSuyog Pawar WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
223*c83a76b0SSuyog Pawar if(i4_is_last_ctb_y)
224*c83a76b0SSuyog Pawar cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
225*c83a76b0SSuyog Pawar ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
226*c83a76b0SSuyog Pawar }
227*c83a76b0SSuyog Pawar
228*c83a76b0SSuyog Pawar pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << (log2_ctb_size));
229*c83a76b0SSuyog Pawar pu1_src += i4_is_last_ctb_y ? ps_deblk->ps_codec->i4_strd << log2_ctb_size : 0;
230*c83a76b0SSuyog Pawar
231*c83a76b0SSuyog Pawar /** Deblocking is done on a shifted CTB -
232*c83a76b0SSuyog Pawar * Vertical edge processing is done by shifting the CTB up by four pixels */
233*c83a76b0SSuyog Pawar pu1_src -= 4 * src_strd;
234*c83a76b0SSuyog Pawar
235*c83a76b0SSuyog Pawar for(col = 0; col < ctb_size / 8; col++)
236*c83a76b0SSuyog Pawar {
237*c83a76b0SSuyog Pawar WORD32 shift = 0;
238*c83a76b0SSuyog Pawar
239*c83a76b0SSuyog Pawar /* downshift vert_bs by ctb_size/2 for each column
240*c83a76b0SSuyog Pawar * shift = (col & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1);
241*c83a76b0SSuyog Pawar * which will reduce to the following assuming ctb size is one of 16, 32 and 64
242*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
243*c83a76b0SSuyog Pawar */
244*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
245*c83a76b0SSuyog Pawar shift = (col & 1) << (log2_ctb_size - 1);
246*c83a76b0SSuyog Pawar
247*c83a76b0SSuyog Pawar /* BS for the column - Last row is excluded and the top row is included*/
248*c83a76b0SSuyog Pawar u4_bs = (pu4_vert_bs[0] >> shift) << 2;
249*c83a76b0SSuyog Pawar
250*c83a76b0SSuyog Pawar if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
251*c83a76b0SSuyog Pawar {
252*c83a76b0SSuyog Pawar /* Picking the last BS of the previous CTB corresponding to the same column */
253*c83a76b0SSuyog Pawar UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
254*c83a76b0SSuyog Pawar UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
255*c83a76b0SSuyog Pawar u4_bs |= u4_top_bs & 3;
256*c83a76b0SSuyog Pawar }
257*c83a76b0SSuyog Pawar
258*c83a76b0SSuyog Pawar for(row = 0; row < ctb_size / 4;)
259*c83a76b0SSuyog Pawar {
260*c83a76b0SSuyog Pawar WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
261*c83a76b0SSuyog Pawar WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
262*c83a76b0SSuyog Pawar
263*c83a76b0SSuyog Pawar /* Trailing zeros are computed and the corresponding rows are not processed */
264*c83a76b0SSuyog Pawar bs_tz = CTZ(u4_bs) >> 1;
265*c83a76b0SSuyog Pawar if(0 != bs_tz)
266*c83a76b0SSuyog Pawar {
267*c83a76b0SSuyog Pawar u4_bs = u4_bs >> (bs_tz << 1);
268*c83a76b0SSuyog Pawar if((row + bs_tz) >= (ctb_size / 4))
269*c83a76b0SSuyog Pawar pu1_src += 4 * (ctb_size / 4 - row) * src_strd;
270*c83a76b0SSuyog Pawar else
271*c83a76b0SSuyog Pawar pu1_src += 4 * bs_tz * src_strd;
272*c83a76b0SSuyog Pawar
273*c83a76b0SSuyog Pawar row += bs_tz;
274*c83a76b0SSuyog Pawar continue;
275*c83a76b0SSuyog Pawar }
276*c83a76b0SSuyog Pawar
277*c83a76b0SSuyog Pawar if(0 == row)
278*c83a76b0SSuyog Pawar {
279*c83a76b0SSuyog Pawar i1_beta_offset_div2 = ps_slice_hdr_top->i1_beta_offset_div2;
280*c83a76b0SSuyog Pawar i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
281*c83a76b0SSuyog Pawar
282*c83a76b0SSuyog Pawar if(0 == col)
283*c83a76b0SSuyog Pawar {
284*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[0] ?
285*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
286*c83a76b0SSuyog Pawar pu1_qp[-qp_strd - 1];
287*c83a76b0SSuyog Pawar }
288*c83a76b0SSuyog Pawar else
289*c83a76b0SSuyog Pawar {
290*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[1] ?
291*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8 * qp_strd] :
292*c83a76b0SSuyog Pawar pu1_qp[col - 1 - qp_strd];
293*c83a76b0SSuyog Pawar }
294*c83a76b0SSuyog Pawar
295*c83a76b0SSuyog Pawar qp_q = u4_qp_const_in_ctb[1] ?
296*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8 * qp_strd] :
297*c83a76b0SSuyog Pawar pu1_qp[col - qp_strd];
298*c83a76b0SSuyog Pawar }
299*c83a76b0SSuyog Pawar else
300*c83a76b0SSuyog Pawar {
301*c83a76b0SSuyog Pawar if(0 == col)
302*c83a76b0SSuyog Pawar {
303*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[2] ?
304*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8] :
305*c83a76b0SSuyog Pawar pu1_qp[((row - 1) >> 1) * qp_strd - 1];
306*c83a76b0SSuyog Pawar }
307*c83a76b0SSuyog Pawar else
308*c83a76b0SSuyog Pawar {
309*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[3] ?
310*c83a76b0SSuyog Pawar pu1_qp[0] :
311*c83a76b0SSuyog Pawar pu1_qp[((row - 1) >> 1) * qp_strd + col - 1];
312*c83a76b0SSuyog Pawar }
313*c83a76b0SSuyog Pawar
314*c83a76b0SSuyog Pawar qp_q = u4_qp_const_in_ctb[3] ?
315*c83a76b0SSuyog Pawar pu1_qp[0] :
316*c83a76b0SSuyog Pawar pu1_qp[((row - 1) >> 1) * qp_strd + col];
317*c83a76b0SSuyog Pawar }
318*c83a76b0SSuyog Pawar
319*c83a76b0SSuyog Pawar filter_p = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 1;
320*c83a76b0SSuyog Pawar filter_q = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 2;
321*c83a76b0SSuyog Pawar /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
322*c83a76b0SSuyog Pawar filter_p = !filter_p;
323*c83a76b0SSuyog Pawar filter_q = !filter_q;
324*c83a76b0SSuyog Pawar
325*c83a76b0SSuyog Pawar if(filter_p || filter_q)
326*c83a76b0SSuyog Pawar {
327*c83a76b0SSuyog Pawar DUMP_DEBLK_LUMA_VERT(pu1_src, src_strd,
328*c83a76b0SSuyog Pawar u4_bs & 3, qp_p, qp_q,
329*c83a76b0SSuyog Pawar ps_slice_hdr->i1_beta_offset_div2,
330*c83a76b0SSuyog Pawar ps_slice_hdr->i1_tc_offset_div2,
331*c83a76b0SSuyog Pawar filter_p, filter_q);
332*c83a76b0SSuyog Pawar ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr(pu1_src, src_strd,
333*c83a76b0SSuyog Pawar u4_bs & 3, qp_p, qp_q,
334*c83a76b0SSuyog Pawar i1_beta_offset_div2,
335*c83a76b0SSuyog Pawar i1_tc_offset_div2,
336*c83a76b0SSuyog Pawar filter_p, filter_q);
337*c83a76b0SSuyog Pawar }
338*c83a76b0SSuyog Pawar
339*c83a76b0SSuyog Pawar pu1_src += 4 * src_strd;
340*c83a76b0SSuyog Pawar u4_bs = u4_bs >> 2;
341*c83a76b0SSuyog Pawar row++;
342*c83a76b0SSuyog Pawar }
343*c83a76b0SSuyog Pawar
344*c83a76b0SSuyog Pawar if((64 == ctb_size) ||
345*c83a76b0SSuyog Pawar ((32 == ctb_size) && (col & 1)))
346*c83a76b0SSuyog Pawar {
347*c83a76b0SSuyog Pawar pu4_vert_bs++;
348*c83a76b0SSuyog Pawar }
349*c83a76b0SSuyog Pawar pu1_src -= (src_strd << log2_ctb_size);
350*c83a76b0SSuyog Pawar pu1_src += 8;
351*c83a76b0SSuyog Pawar }
352*c83a76b0SSuyog Pawar pu4_vert_bs = pu4_ctb_vert_bs;
353*c83a76b0SSuyog Pawar }
354*c83a76b0SSuyog Pawar
355*c83a76b0SSuyog Pawar
356*c83a76b0SSuyog Pawar /* Luma Horizontal Edge */
357*c83a76b0SSuyog Pawar
358*c83a76b0SSuyog Pawar if(0 == i4_is_last_ctb_y)
359*c83a76b0SSuyog Pawar {
360*c83a76b0SSuyog Pawar
361*c83a76b0SSuyog Pawar /* Left CTB's slice header */
362*c83a76b0SSuyog Pawar slice_header_t *ps_slice_hdr_left;
363*c83a76b0SSuyog Pawar {
364*c83a76b0SSuyog Pawar WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
365*c83a76b0SSuyog Pawar if(i4_is_last_ctb_x)
366*c83a76b0SSuyog Pawar cur_ctb_indx += 1;
367*c83a76b0SSuyog Pawar ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
368*c83a76b0SSuyog Pawar }
369*c83a76b0SSuyog Pawar pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << log2_ctb_size);
370*c83a76b0SSuyog Pawar pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
371*c83a76b0SSuyog Pawar
372*c83a76b0SSuyog Pawar /** Deblocking is done on a shifted CTB -
373*c83a76b0SSuyog Pawar * Horizontal edge processing is done by shifting the CTB left by four pixels */
374*c83a76b0SSuyog Pawar pu1_src -= 4;
375*c83a76b0SSuyog Pawar for(row = 0; row < ctb_size / 8; row++)
376*c83a76b0SSuyog Pawar {
377*c83a76b0SSuyog Pawar WORD32 shift = 0;
378*c83a76b0SSuyog Pawar
379*c83a76b0SSuyog Pawar /* downshift vert_bs by ctb_size/2 for each column
380*c83a76b0SSuyog Pawar * shift = (row & (MAX_CTB_SIZE / ctb_size - 1)) * ctb_size / 2;
381*c83a76b0SSuyog Pawar * which will reduce to the following assuming ctb size is one of 16, 32 and 64
382*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
383*c83a76b0SSuyog Pawar */
384*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
385*c83a76b0SSuyog Pawar shift = (row & 1) << (log2_ctb_size - 1);
386*c83a76b0SSuyog Pawar
387*c83a76b0SSuyog Pawar /* BS for the row - Last column is excluded and the left column is included*/
388*c83a76b0SSuyog Pawar u4_bs = (pu4_horz_bs[0] >> shift) << 2;
389*c83a76b0SSuyog Pawar
390*c83a76b0SSuyog Pawar if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
391*c83a76b0SSuyog Pawar {
392*c83a76b0SSuyog Pawar /** Picking the last BS of the previous CTB corresponding to the same row
393*c83a76b0SSuyog Pawar * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
394*c83a76b0SSuyog Pawar */
395*c83a76b0SSuyog Pawar UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
396*c83a76b0SSuyog Pawar UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
397*c83a76b0SSuyog Pawar u4_bs |= u4_left_bs & 3;
398*c83a76b0SSuyog Pawar }
399*c83a76b0SSuyog Pawar
400*c83a76b0SSuyog Pawar for(col = 0; col < ctb_size / 4;)
401*c83a76b0SSuyog Pawar {
402*c83a76b0SSuyog Pawar WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
403*c83a76b0SSuyog Pawar WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
404*c83a76b0SSuyog Pawar
405*c83a76b0SSuyog Pawar bs_tz = CTZ(u4_bs) >> 1;
406*c83a76b0SSuyog Pawar if(0 != bs_tz)
407*c83a76b0SSuyog Pawar {
408*c83a76b0SSuyog Pawar u4_bs = u4_bs >> (bs_tz << 1);
409*c83a76b0SSuyog Pawar
410*c83a76b0SSuyog Pawar if((col + bs_tz) >= (ctb_size / 4))
411*c83a76b0SSuyog Pawar pu1_src += 4 * (ctb_size / 4 - col);
412*c83a76b0SSuyog Pawar else
413*c83a76b0SSuyog Pawar pu1_src += 4 * bs_tz;
414*c83a76b0SSuyog Pawar
415*c83a76b0SSuyog Pawar col += bs_tz;
416*c83a76b0SSuyog Pawar continue;
417*c83a76b0SSuyog Pawar }
418*c83a76b0SSuyog Pawar
419*c83a76b0SSuyog Pawar if(0 == col)
420*c83a76b0SSuyog Pawar {
421*c83a76b0SSuyog Pawar i1_beta_offset_div2 = ps_slice_hdr_left->i1_beta_offset_div2;
422*c83a76b0SSuyog Pawar i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
423*c83a76b0SSuyog Pawar
424*c83a76b0SSuyog Pawar if(0 == row)
425*c83a76b0SSuyog Pawar {
426*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[0] ?
427*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
428*c83a76b0SSuyog Pawar pu1_qp[-qp_strd - 1];
429*c83a76b0SSuyog Pawar }
430*c83a76b0SSuyog Pawar else
431*c83a76b0SSuyog Pawar {
432*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[2] ?
433*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8] :
434*c83a76b0SSuyog Pawar pu1_qp[(row - 1) * qp_strd - 1];
435*c83a76b0SSuyog Pawar }
436*c83a76b0SSuyog Pawar
437*c83a76b0SSuyog Pawar qp_q = u4_qp_const_in_ctb[2] ?
438*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8] :
439*c83a76b0SSuyog Pawar pu1_qp[row * qp_strd - 1];
440*c83a76b0SSuyog Pawar }
441*c83a76b0SSuyog Pawar else
442*c83a76b0SSuyog Pawar {
443*c83a76b0SSuyog Pawar if(0 == row)
444*c83a76b0SSuyog Pawar {
445*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[1] ?
446*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8 * qp_strd] :
447*c83a76b0SSuyog Pawar pu1_qp[((col - 1) >> 1) - qp_strd];
448*c83a76b0SSuyog Pawar }
449*c83a76b0SSuyog Pawar else
450*c83a76b0SSuyog Pawar {
451*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[3] ?
452*c83a76b0SSuyog Pawar pu1_qp[0] :
453*c83a76b0SSuyog Pawar pu1_qp[((col - 1) >> 1) + (row - 1) * qp_strd];
454*c83a76b0SSuyog Pawar }
455*c83a76b0SSuyog Pawar
456*c83a76b0SSuyog Pawar qp_q = u4_qp_const_in_ctb[3] ?
457*c83a76b0SSuyog Pawar pu1_qp[0] :
458*c83a76b0SSuyog Pawar pu1_qp[((col - 1) >> 1) + row * qp_strd];
459*c83a76b0SSuyog Pawar }
460*c83a76b0SSuyog Pawar
461*c83a76b0SSuyog Pawar filter_p = (pu2_ctb_no_loop_filter_flag[row] >> ((col + 1) >> 1)) & 1;
462*c83a76b0SSuyog Pawar filter_q = (pu2_ctb_no_loop_filter_flag[row + 1] >> ((col + 1) >> 1)) & 1;
463*c83a76b0SSuyog Pawar /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
464*c83a76b0SSuyog Pawar filter_p = !filter_p;
465*c83a76b0SSuyog Pawar filter_q = !filter_q;
466*c83a76b0SSuyog Pawar
467*c83a76b0SSuyog Pawar if(filter_p || filter_q)
468*c83a76b0SSuyog Pawar {
469*c83a76b0SSuyog Pawar DUMP_DEBLK_LUMA_HORZ(pu1_src, src_strd,
470*c83a76b0SSuyog Pawar u4_bs & 3, qp_p, qp_q,
471*c83a76b0SSuyog Pawar ps_slice_hdr->i1_beta_offset_div2,
472*c83a76b0SSuyog Pawar ps_slice_hdr->i1_tc_offset_div2,
473*c83a76b0SSuyog Pawar filter_p, filter_q);
474*c83a76b0SSuyog Pawar ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr(pu1_src, src_strd,
475*c83a76b0SSuyog Pawar u4_bs & 3, qp_p, qp_q,
476*c83a76b0SSuyog Pawar i1_beta_offset_div2,
477*c83a76b0SSuyog Pawar i1_tc_offset_div2, filter_p, filter_q);
478*c83a76b0SSuyog Pawar }
479*c83a76b0SSuyog Pawar
480*c83a76b0SSuyog Pawar pu1_src += 4;
481*c83a76b0SSuyog Pawar u4_bs = u4_bs >> 2;
482*c83a76b0SSuyog Pawar col++;
483*c83a76b0SSuyog Pawar }
484*c83a76b0SSuyog Pawar
485*c83a76b0SSuyog Pawar if((64 == ctb_size) ||
486*c83a76b0SSuyog Pawar ((32 == ctb_size) && (row & 1)))
487*c83a76b0SSuyog Pawar {
488*c83a76b0SSuyog Pawar pu4_horz_bs++;
489*c83a76b0SSuyog Pawar }
490*c83a76b0SSuyog Pawar pu1_src -= ctb_size;
491*c83a76b0SSuyog Pawar pu1_src += (src_strd << 3);
492*c83a76b0SSuyog Pawar }
493*c83a76b0SSuyog Pawar pu4_horz_bs = pu4_ctb_horz_bs;
494*c83a76b0SSuyog Pawar }
495*c83a76b0SSuyog Pawar
496*c83a76b0SSuyog Pawar
497*c83a76b0SSuyog Pawar /* Chroma Veritcal Edge */
498*c83a76b0SSuyog Pawar
499*c83a76b0SSuyog Pawar if(0 == i4_is_last_ctb_x)
500*c83a76b0SSuyog Pawar {
501*c83a76b0SSuyog Pawar
502*c83a76b0SSuyog Pawar /* Top CTB's slice header */
503*c83a76b0SSuyog Pawar slice_header_t *ps_slice_hdr_top;
504*c83a76b0SSuyog Pawar {
505*c83a76b0SSuyog Pawar WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
506*c83a76b0SSuyog Pawar if(i4_is_last_ctb_y)
507*c83a76b0SSuyog Pawar cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
508*c83a76b0SSuyog Pawar ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
509*c83a76b0SSuyog Pawar }
510*c83a76b0SSuyog Pawar
511*c83a76b0SSuyog Pawar pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
512*c83a76b0SSuyog Pawar pu1_src += i4_is_last_ctb_y ? (ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size : 0;
513*c83a76b0SSuyog Pawar
514*c83a76b0SSuyog Pawar /** Deblocking is done on a shifted CTB -
515*c83a76b0SSuyog Pawar * Vertical edge processing is done by shifting the CTB up by four pixels */
516*c83a76b0SSuyog Pawar pu1_src -= 4 * src_strd;
517*c83a76b0SSuyog Pawar
518*c83a76b0SSuyog Pawar for(col = 0; col < ctb_size / 16; col++)
519*c83a76b0SSuyog Pawar {
520*c83a76b0SSuyog Pawar
521*c83a76b0SSuyog Pawar /* BS for the column - Last row is excluded and the top row is included*/
522*c83a76b0SSuyog Pawar u4_bs = pu4_vert_bs[0] << 2;
523*c83a76b0SSuyog Pawar
524*c83a76b0SSuyog Pawar if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
525*c83a76b0SSuyog Pawar {
526*c83a76b0SSuyog Pawar /* Picking the last BS of the previous CTB corresponding to the same column */
527*c83a76b0SSuyog Pawar UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
528*c83a76b0SSuyog Pawar UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> ((1 << (log2_ctb_size - 1)) - 2);
529*c83a76b0SSuyog Pawar u4_bs |= u4_top_bs & 3;
530*c83a76b0SSuyog Pawar }
531*c83a76b0SSuyog Pawar
532*c83a76b0SSuyog Pawar /* Every alternate boundary strength value is used for chroma */
533*c83a76b0SSuyog Pawar u4_bs &= 0x22222222;
534*c83a76b0SSuyog Pawar
535*c83a76b0SSuyog Pawar for(row = 0; row < ctb_size / 8;)
536*c83a76b0SSuyog Pawar {
537*c83a76b0SSuyog Pawar WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
538*c83a76b0SSuyog Pawar
539*c83a76b0SSuyog Pawar bs_tz = CTZ(u4_bs) >> 2;
540*c83a76b0SSuyog Pawar if(0 != bs_tz)
541*c83a76b0SSuyog Pawar {
542*c83a76b0SSuyog Pawar if((row + bs_tz) >= (ctb_size / 8))
543*c83a76b0SSuyog Pawar pu1_src += 4 * (ctb_size / 8 - row) * src_strd;
544*c83a76b0SSuyog Pawar else
545*c83a76b0SSuyog Pawar pu1_src += 4 * bs_tz * src_strd;
546*c83a76b0SSuyog Pawar row += bs_tz;
547*c83a76b0SSuyog Pawar u4_bs = u4_bs >> (bs_tz << 2);
548*c83a76b0SSuyog Pawar continue;
549*c83a76b0SSuyog Pawar }
550*c83a76b0SSuyog Pawar
551*c83a76b0SSuyog Pawar if(0 == row)
552*c83a76b0SSuyog Pawar {
553*c83a76b0SSuyog Pawar i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
554*c83a76b0SSuyog Pawar
555*c83a76b0SSuyog Pawar if(0 == col)
556*c83a76b0SSuyog Pawar {
557*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[0] ?
558*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
559*c83a76b0SSuyog Pawar pu1_qp[-qp_strd - 1];
560*c83a76b0SSuyog Pawar }
561*c83a76b0SSuyog Pawar else
562*c83a76b0SSuyog Pawar {
563*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[1] ?
564*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8 * qp_strd] :
565*c83a76b0SSuyog Pawar pu1_qp[2 * col - 1 - qp_strd];
566*c83a76b0SSuyog Pawar }
567*c83a76b0SSuyog Pawar
568*c83a76b0SSuyog Pawar qp_q = u4_qp_const_in_ctb[1] ?
569*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8 * qp_strd] :
570*c83a76b0SSuyog Pawar pu1_qp[2 * col - qp_strd];
571*c83a76b0SSuyog Pawar }
572*c83a76b0SSuyog Pawar else
573*c83a76b0SSuyog Pawar {
574*c83a76b0SSuyog Pawar if(0 == col)
575*c83a76b0SSuyog Pawar {
576*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[2] ?
577*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8] :
578*c83a76b0SSuyog Pawar pu1_qp[(row - 1) * qp_strd - 1];
579*c83a76b0SSuyog Pawar }
580*c83a76b0SSuyog Pawar else
581*c83a76b0SSuyog Pawar {
582*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[3] ?
583*c83a76b0SSuyog Pawar pu1_qp[0] :
584*c83a76b0SSuyog Pawar pu1_qp[(row - 1) * qp_strd + 2 * col - 1];
585*c83a76b0SSuyog Pawar }
586*c83a76b0SSuyog Pawar
587*c83a76b0SSuyog Pawar qp_q = u4_qp_const_in_ctb[3] ?
588*c83a76b0SSuyog Pawar pu1_qp[0] :
589*c83a76b0SSuyog Pawar pu1_qp[(row - 1) * qp_strd + 2 * col];
590*c83a76b0SSuyog Pawar }
591*c83a76b0SSuyog Pawar
592*c83a76b0SSuyog Pawar filter_p = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 1;
593*c83a76b0SSuyog Pawar filter_q = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 2;
594*c83a76b0SSuyog Pawar /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
595*c83a76b0SSuyog Pawar filter_p = !filter_p;
596*c83a76b0SSuyog Pawar filter_q = !filter_q;
597*c83a76b0SSuyog Pawar
598*c83a76b0SSuyog Pawar if(filter_p || filter_q)
599*c83a76b0SSuyog Pawar {
600*c83a76b0SSuyog Pawar ASSERT(1 == ((u4_bs & 3) >> 1));
601*c83a76b0SSuyog Pawar DUMP_DEBLK_CHROMA_VERT(pu1_src, src_strd,
602*c83a76b0SSuyog Pawar u4_bs & 3, qp_p, qp_q,
603*c83a76b0SSuyog Pawar ps_pps->i1_pic_cb_qp_offset,
604*c83a76b0SSuyog Pawar ps_pps->i1_pic_cr_qp_offset,
605*c83a76b0SSuyog Pawar ps_slice_hdr->i1_tc_offset_div2,
606*c83a76b0SSuyog Pawar filter_p, filter_q);
607*c83a76b0SSuyog Pawar if(chroma_yuv420sp_vu)
608*c83a76b0SSuyog Pawar {
609*c83a76b0SSuyog Pawar ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
610*c83a76b0SSuyog Pawar src_strd,
611*c83a76b0SSuyog Pawar qp_q,
612*c83a76b0SSuyog Pawar qp_p,
613*c83a76b0SSuyog Pawar ps_pps->i1_pic_cr_qp_offset,
614*c83a76b0SSuyog Pawar ps_pps->i1_pic_cb_qp_offset,
615*c83a76b0SSuyog Pawar i1_tc_offset_div2,
616*c83a76b0SSuyog Pawar filter_q,
617*c83a76b0SSuyog Pawar filter_p);
618*c83a76b0SSuyog Pawar }
619*c83a76b0SSuyog Pawar else
620*c83a76b0SSuyog Pawar {
621*c83a76b0SSuyog Pawar ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
622*c83a76b0SSuyog Pawar src_strd,
623*c83a76b0SSuyog Pawar qp_p,
624*c83a76b0SSuyog Pawar qp_q,
625*c83a76b0SSuyog Pawar ps_pps->i1_pic_cb_qp_offset,
626*c83a76b0SSuyog Pawar ps_pps->i1_pic_cr_qp_offset,
627*c83a76b0SSuyog Pawar i1_tc_offset_div2,
628*c83a76b0SSuyog Pawar filter_p,
629*c83a76b0SSuyog Pawar filter_q);
630*c83a76b0SSuyog Pawar }
631*c83a76b0SSuyog Pawar }
632*c83a76b0SSuyog Pawar
633*c83a76b0SSuyog Pawar pu1_src += 4 * src_strd;
634*c83a76b0SSuyog Pawar u4_bs = u4_bs >> 4;
635*c83a76b0SSuyog Pawar row++;
636*c83a76b0SSuyog Pawar }
637*c83a76b0SSuyog Pawar
638*c83a76b0SSuyog Pawar pu4_vert_bs += (64 == ctb_size) ? 2 : 1;
639*c83a76b0SSuyog Pawar pu1_src -= ((src_strd / 2) << log2_ctb_size);
640*c83a76b0SSuyog Pawar pu1_src += 16;
641*c83a76b0SSuyog Pawar }
642*c83a76b0SSuyog Pawar }
643*c83a76b0SSuyog Pawar
644*c83a76b0SSuyog Pawar /* Chroma Horizontal Edge */
645*c83a76b0SSuyog Pawar
646*c83a76b0SSuyog Pawar if(0 == i4_is_last_ctb_y)
647*c83a76b0SSuyog Pawar {
648*c83a76b0SSuyog Pawar
649*c83a76b0SSuyog Pawar /* Left CTB's slice header */
650*c83a76b0SSuyog Pawar slice_header_t *ps_slice_hdr_left;
651*c83a76b0SSuyog Pawar {
652*c83a76b0SSuyog Pawar WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
653*c83a76b0SSuyog Pawar if(i4_is_last_ctb_x)
654*c83a76b0SSuyog Pawar cur_ctb_indx += 1;
655*c83a76b0SSuyog Pawar ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
656*c83a76b0SSuyog Pawar }
657*c83a76b0SSuyog Pawar
658*c83a76b0SSuyog Pawar pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
659*c83a76b0SSuyog Pawar pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
660*c83a76b0SSuyog Pawar
661*c83a76b0SSuyog Pawar /** Deblocking is done on a shifted CTB -
662*c83a76b0SSuyog Pawar * Vertical edge processing is done by shifting the CTB up by four pixels (8 here beacuse UV are interleaved) */
663*c83a76b0SSuyog Pawar pu1_src -= 8;
664*c83a76b0SSuyog Pawar for(row = 0; row < ctb_size / 16; row++)
665*c83a76b0SSuyog Pawar {
666*c83a76b0SSuyog Pawar /* BS for the row - Last column is excluded and the left column is included*/
667*c83a76b0SSuyog Pawar u4_bs = pu4_horz_bs[0] << 2;
668*c83a76b0SSuyog Pawar
669*c83a76b0SSuyog Pawar if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
670*c83a76b0SSuyog Pawar {
671*c83a76b0SSuyog Pawar /** Picking the last BS of the previous CTB corresponding to the same row
672*c83a76b0SSuyog Pawar * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
673*c83a76b0SSuyog Pawar */
674*c83a76b0SSuyog Pawar UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
675*c83a76b0SSuyog Pawar UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> ((1 << (log2_ctb_size - 1)) - 2);
676*c83a76b0SSuyog Pawar u4_bs |= u4_left_bs & 3;
677*c83a76b0SSuyog Pawar }
678*c83a76b0SSuyog Pawar
679*c83a76b0SSuyog Pawar /* Every alternate boundary strength value is used for chroma */
680*c83a76b0SSuyog Pawar u4_bs &= 0x22222222;
681*c83a76b0SSuyog Pawar
682*c83a76b0SSuyog Pawar for(col = 0; col < ctb_size / 8;)
683*c83a76b0SSuyog Pawar {
684*c83a76b0SSuyog Pawar WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
685*c83a76b0SSuyog Pawar
686*c83a76b0SSuyog Pawar bs_tz = CTZ(u4_bs) >> 2;
687*c83a76b0SSuyog Pawar if(0 != bs_tz)
688*c83a76b0SSuyog Pawar {
689*c83a76b0SSuyog Pawar u4_bs = u4_bs >> (bs_tz << 2);
690*c83a76b0SSuyog Pawar
691*c83a76b0SSuyog Pawar if((col + bs_tz) >= (ctb_size / 8))
692*c83a76b0SSuyog Pawar pu1_src += 8 * (ctb_size / 8 - col);
693*c83a76b0SSuyog Pawar else
694*c83a76b0SSuyog Pawar pu1_src += 8 * bs_tz;
695*c83a76b0SSuyog Pawar
696*c83a76b0SSuyog Pawar col += bs_tz;
697*c83a76b0SSuyog Pawar continue;
698*c83a76b0SSuyog Pawar }
699*c83a76b0SSuyog Pawar
700*c83a76b0SSuyog Pawar if(0 == col)
701*c83a76b0SSuyog Pawar {
702*c83a76b0SSuyog Pawar i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
703*c83a76b0SSuyog Pawar
704*c83a76b0SSuyog Pawar if(0 == row)
705*c83a76b0SSuyog Pawar {
706*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[0] ?
707*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
708*c83a76b0SSuyog Pawar pu1_qp[-qp_strd - 1];
709*c83a76b0SSuyog Pawar }
710*c83a76b0SSuyog Pawar else
711*c83a76b0SSuyog Pawar {
712*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[2] ?
713*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8] :
714*c83a76b0SSuyog Pawar pu1_qp[(2 * row - 1) * qp_strd - 1];
715*c83a76b0SSuyog Pawar }
716*c83a76b0SSuyog Pawar
717*c83a76b0SSuyog Pawar qp_q = u4_qp_const_in_ctb[2] ?
718*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8] :
719*c83a76b0SSuyog Pawar pu1_qp[(2 * row) * qp_strd - 1];
720*c83a76b0SSuyog Pawar }
721*c83a76b0SSuyog Pawar else
722*c83a76b0SSuyog Pawar {
723*c83a76b0SSuyog Pawar if(0 == row)
724*c83a76b0SSuyog Pawar {
725*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[1] ?
726*c83a76b0SSuyog Pawar pu1_qp[-ctb_size / 8 * qp_strd] :
727*c83a76b0SSuyog Pawar pu1_qp[col - 1 - qp_strd];
728*c83a76b0SSuyog Pawar }
729*c83a76b0SSuyog Pawar else
730*c83a76b0SSuyog Pawar {
731*c83a76b0SSuyog Pawar qp_p = u4_qp_const_in_ctb[3] ?
732*c83a76b0SSuyog Pawar pu1_qp[0] :
733*c83a76b0SSuyog Pawar pu1_qp[(col - 1) + (2 * row - 1) * qp_strd];
734*c83a76b0SSuyog Pawar }
735*c83a76b0SSuyog Pawar
736*c83a76b0SSuyog Pawar qp_q = u4_qp_const_in_ctb[3] ?
737*c83a76b0SSuyog Pawar pu1_qp[0] :
738*c83a76b0SSuyog Pawar pu1_qp[(col - 1) + 2 * row * qp_strd];
739*c83a76b0SSuyog Pawar }
740*c83a76b0SSuyog Pawar
741*c83a76b0SSuyog Pawar filter_p = (pu2_ctb_no_loop_filter_flag[row << 1] >> col) & 1;
742*c83a76b0SSuyog Pawar filter_q = (pu2_ctb_no_loop_filter_flag[(row << 1) + 1] >> col) & 1;
743*c83a76b0SSuyog Pawar /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
744*c83a76b0SSuyog Pawar filter_p = !filter_p;
745*c83a76b0SSuyog Pawar filter_q = !filter_q;
746*c83a76b0SSuyog Pawar
747*c83a76b0SSuyog Pawar if(filter_p || filter_q)
748*c83a76b0SSuyog Pawar {
749*c83a76b0SSuyog Pawar ASSERT(1 == ((u4_bs & 3) >> 1));
750*c83a76b0SSuyog Pawar DUMP_DEBLK_CHROMA_HORZ(pu1_src, src_strd,
751*c83a76b0SSuyog Pawar u4_bs & 3, qp_p, qp_q,
752*c83a76b0SSuyog Pawar ps_pps->i1_pic_cb_qp_offset,
753*c83a76b0SSuyog Pawar ps_pps->i1_pic_cr_qp_offset,
754*c83a76b0SSuyog Pawar ps_slice_hdr->i1_tc_offset_div2,
755*c83a76b0SSuyog Pawar filter_p, filter_q);
756*c83a76b0SSuyog Pawar if(chroma_yuv420sp_vu)
757*c83a76b0SSuyog Pawar {
758*c83a76b0SSuyog Pawar ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
759*c83a76b0SSuyog Pawar src_strd,
760*c83a76b0SSuyog Pawar qp_q,
761*c83a76b0SSuyog Pawar qp_p,
762*c83a76b0SSuyog Pawar ps_pps->i1_pic_cr_qp_offset,
763*c83a76b0SSuyog Pawar ps_pps->i1_pic_cb_qp_offset,
764*c83a76b0SSuyog Pawar i1_tc_offset_div2,
765*c83a76b0SSuyog Pawar filter_q,
766*c83a76b0SSuyog Pawar filter_p);
767*c83a76b0SSuyog Pawar }
768*c83a76b0SSuyog Pawar else
769*c83a76b0SSuyog Pawar {
770*c83a76b0SSuyog Pawar ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
771*c83a76b0SSuyog Pawar src_strd,
772*c83a76b0SSuyog Pawar qp_p,
773*c83a76b0SSuyog Pawar qp_q,
774*c83a76b0SSuyog Pawar ps_pps->i1_pic_cb_qp_offset,
775*c83a76b0SSuyog Pawar ps_pps->i1_pic_cr_qp_offset,
776*c83a76b0SSuyog Pawar i1_tc_offset_div2,
777*c83a76b0SSuyog Pawar filter_p,
778*c83a76b0SSuyog Pawar filter_q);
779*c83a76b0SSuyog Pawar }
780*c83a76b0SSuyog Pawar }
781*c83a76b0SSuyog Pawar
782*c83a76b0SSuyog Pawar pu1_src += 8;
783*c83a76b0SSuyog Pawar u4_bs = u4_bs >> 4;
784*c83a76b0SSuyog Pawar col++;
785*c83a76b0SSuyog Pawar }
786*c83a76b0SSuyog Pawar
787*c83a76b0SSuyog Pawar pu4_horz_bs += (64 == ctb_size) ? 2 : 1;
788*c83a76b0SSuyog Pawar pu1_src -= ctb_size;
789*c83a76b0SSuyog Pawar pu1_src += 8 * src_strd;
790*c83a76b0SSuyog Pawar
791*c83a76b0SSuyog Pawar }
792*c83a76b0SSuyog Pawar }
793*c83a76b0SSuyog Pawar }
794