xref: /aosp_15_r20/external/libhevc/decoder/ihevcd_deblk.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar ******************************************************************************/
18*c83a76b0SSuyog Pawar /**
19*c83a76b0SSuyog Pawar *******************************************************************************
20*c83a76b0SSuyog Pawar * @file
21*c83a76b0SSuyog Pawar *  ihevc_deblk.c
22*c83a76b0SSuyog Pawar *
23*c83a76b0SSuyog Pawar * @brief
24*c83a76b0SSuyog Pawar *  Contains definition for the ctb level deblk function
25*c83a76b0SSuyog Pawar *
26*c83a76b0SSuyog Pawar * @author
27*c83a76b0SSuyog Pawar *  Srinivas T
28*c83a76b0SSuyog Pawar *
29*c83a76b0SSuyog Pawar * @par List of Functions:
30*c83a76b0SSuyog Pawar *   - ihevc_deblk()
31*c83a76b0SSuyog Pawar *
32*c83a76b0SSuyog Pawar * @remarks
33*c83a76b0SSuyog Pawar *  None
34*c83a76b0SSuyog Pawar *
35*c83a76b0SSuyog Pawar *******************************************************************************
36*c83a76b0SSuyog Pawar */
37*c83a76b0SSuyog Pawar 
38*c83a76b0SSuyog Pawar #include <stdio.h>
39*c83a76b0SSuyog Pawar #include <stddef.h>
40*c83a76b0SSuyog Pawar #include <stdlib.h>
41*c83a76b0SSuyog Pawar #include <string.h>
42*c83a76b0SSuyog Pawar #include <assert.h>
43*c83a76b0SSuyog Pawar 
44*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
45*c83a76b0SSuyog Pawar #include "iv.h"
46*c83a76b0SSuyog Pawar #include "ivd.h"
47*c83a76b0SSuyog Pawar #include "ihevcd_cxa.h"
48*c83a76b0SSuyog Pawar #include "ithread.h"
49*c83a76b0SSuyog Pawar 
50*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
51*c83a76b0SSuyog Pawar #include "ihevc_debug.h"
52*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
53*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
54*c83a76b0SSuyog Pawar #include "ihevc_macros.h"
55*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
56*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
57*c83a76b0SSuyog Pawar 
58*c83a76b0SSuyog Pawar #include "ihevc_error.h"
59*c83a76b0SSuyog Pawar #include "ihevc_common_tables.h"
60*c83a76b0SSuyog Pawar 
61*c83a76b0SSuyog Pawar #include "ihevcd_trace.h"
62*c83a76b0SSuyog Pawar #include "ihevcd_defs.h"
63*c83a76b0SSuyog Pawar #include "ihevcd_function_selector.h"
64*c83a76b0SSuyog Pawar #include "ihevcd_structs.h"
65*c83a76b0SSuyog Pawar #include "ihevcd_error.h"
66*c83a76b0SSuyog Pawar #include "ihevcd_nal.h"
67*c83a76b0SSuyog Pawar #include "ihevcd_bitstream.h"
68*c83a76b0SSuyog Pawar #include "ihevcd_job_queue.h"
69*c83a76b0SSuyog Pawar #include "ihevcd_utils.h"
70*c83a76b0SSuyog Pawar #include "ihevcd_debug.h"
71*c83a76b0SSuyog Pawar 
72*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
73*c83a76b0SSuyog Pawar #include "ihevc_deblk_tables.h"
74*c83a76b0SSuyog Pawar #include "ihevcd_profile.h"
75*c83a76b0SSuyog Pawar /**
76*c83a76b0SSuyog Pawar *******************************************************************************
77*c83a76b0SSuyog Pawar *
78*c83a76b0SSuyog Pawar * @brief
79*c83a76b0SSuyog Pawar *     Deblock CTB level function.
80*c83a76b0SSuyog Pawar *
81*c83a76b0SSuyog Pawar * @par Description:
82*c83a76b0SSuyog Pawar *     For a given CTB, deblocking on both vertical and
83*c83a76b0SSuyog Pawar *     horizontal edges is done. Both the luma and chroma
84*c83a76b0SSuyog Pawar *     blocks are processed
85*c83a76b0SSuyog Pawar *
86*c83a76b0SSuyog Pawar * @param[in] ps_deblk
87*c83a76b0SSuyog Pawar *  Pointer to the deblock context
88*c83a76b0SSuyog Pawar *
89*c83a76b0SSuyog Pawar * @returns
90*c83a76b0SSuyog Pawar *
91*c83a76b0SSuyog Pawar * @remarks
92*c83a76b0SSuyog Pawar *  None
93*c83a76b0SSuyog Pawar *
94*c83a76b0SSuyog Pawar *******************************************************************************
95*c83a76b0SSuyog Pawar */
96*c83a76b0SSuyog Pawar 
ihevcd_deblk_ctb(deblk_ctxt_t * ps_deblk,WORD32 i4_is_last_ctb_x,WORD32 i4_is_last_ctb_y)97*c83a76b0SSuyog Pawar void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk,
98*c83a76b0SSuyog Pawar                       WORD32 i4_is_last_ctb_x,
99*c83a76b0SSuyog Pawar                       WORD32 i4_is_last_ctb_y)
100*c83a76b0SSuyog Pawar {
101*c83a76b0SSuyog Pawar     WORD32 ctb_size;
102*c83a76b0SSuyog Pawar     WORD32 log2_ctb_size;
103*c83a76b0SSuyog Pawar     UWORD32 u4_bs;
104*c83a76b0SSuyog Pawar     WORD32 bs_tz; /*Leading zeros in boundary strength*/
105*c83a76b0SSuyog Pawar     WORD32 qp_p, qp_q;
106*c83a76b0SSuyog Pawar 
107*c83a76b0SSuyog Pawar     WORD32 filter_p, filter_q;
108*c83a76b0SSuyog Pawar 
109*c83a76b0SSuyog Pawar     UWORD8 *pu1_src;
110*c83a76b0SSuyog Pawar     WORD32 qp_strd;
111*c83a76b0SSuyog Pawar     UWORD32 *pu4_vert_bs, *pu4_horz_bs;
112*c83a76b0SSuyog Pawar     UWORD32 *pu4_ctb_vert_bs, *pu4_ctb_horz_bs;
113*c83a76b0SSuyog Pawar     WORD32 bs_strd;
114*c83a76b0SSuyog Pawar     WORD32 src_strd;
115*c83a76b0SSuyog Pawar     UWORD8 *pu1_qp;
116*c83a76b0SSuyog Pawar     UWORD16 *pu2_ctb_no_loop_filter_flag;
117*c83a76b0SSuyog Pawar     UWORD16 au2_ctb_no_loop_filter_flag[9];
118*c83a76b0SSuyog Pawar 
119*c83a76b0SSuyog Pawar     WORD32 col, row;
120*c83a76b0SSuyog Pawar 
121*c83a76b0SSuyog Pawar     /* Flag to indicate if QP is constant in CTB
122*c83a76b0SSuyog Pawar      * 0 - top_left, 1 - top, 2 - left, 3 - current */
123*c83a76b0SSuyog Pawar     UWORD32 u4_qp_const_in_ctb[4] = { 0, 0, 0, 0 };
124*c83a76b0SSuyog Pawar     WORD32 ctb_indx;
125*c83a76b0SSuyog Pawar     WORD32  chroma_yuv420sp_vu = ps_deblk->is_chroma_yuv420sp_vu;
126*c83a76b0SSuyog Pawar     sps_t *ps_sps;
127*c83a76b0SSuyog Pawar     pps_t *ps_pps;
128*c83a76b0SSuyog Pawar     codec_t *ps_codec;
129*c83a76b0SSuyog Pawar     slice_header_t *ps_slice_hdr;
130*c83a76b0SSuyog Pawar 
131*c83a76b0SSuyog Pawar     PROFILE_DISABLE_DEBLK();
132*c83a76b0SSuyog Pawar 
133*c83a76b0SSuyog Pawar     ps_sps = ps_deblk->ps_sps;
134*c83a76b0SSuyog Pawar     ps_pps = ps_deblk->ps_pps;
135*c83a76b0SSuyog Pawar     ps_codec = ps_deblk->ps_codec;
136*c83a76b0SSuyog Pawar     ps_slice_hdr = ps_deblk->ps_slice_hdr;
137*c83a76b0SSuyog Pawar 
138*c83a76b0SSuyog Pawar     log2_ctb_size = ps_sps->i1_log2_ctb_size;
139*c83a76b0SSuyog Pawar     ctb_size = (1 << ps_sps->i1_log2_ctb_size);
140*c83a76b0SSuyog Pawar 
141*c83a76b0SSuyog Pawar     /* strides are in units of number of bytes */
142*c83a76b0SSuyog Pawar     /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
143*c83a76b0SSuyog Pawar     bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
144*c83a76b0SSuyog Pawar 
145*c83a76b0SSuyog Pawar     pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_vert_bs +
146*c83a76b0SSuyog Pawar                     (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
147*c83a76b0SSuyog Pawar                     ps_deblk->i4_ctb_y * bs_strd);
148*c83a76b0SSuyog Pawar     pu4_ctb_vert_bs = pu4_vert_bs;
149*c83a76b0SSuyog Pawar 
150*c83a76b0SSuyog Pawar     pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_horz_bs +
151*c83a76b0SSuyog Pawar                     (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
152*c83a76b0SSuyog Pawar                     ps_deblk->i4_ctb_y * bs_strd);
153*c83a76b0SSuyog Pawar     pu4_ctb_horz_bs = pu4_horz_bs;
154*c83a76b0SSuyog Pawar 
155*c83a76b0SSuyog Pawar     qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
156*c83a76b0SSuyog Pawar     pu1_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
157*c83a76b0SSuyog Pawar 
158*c83a76b0SSuyog Pawar     pu2_ctb_no_loop_filter_flag = ps_deblk->au2_ctb_no_loop_filter_flag;
159*c83a76b0SSuyog Pawar 
160*c83a76b0SSuyog Pawar     ctb_indx = ps_deblk->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_deblk->i4_ctb_y;
161*c83a76b0SSuyog Pawar     if(i4_is_last_ctb_y)
162*c83a76b0SSuyog Pawar     {
163*c83a76b0SSuyog Pawar         pu4_vert_bs = (UWORD32 *)((UWORD8 *)pu4_vert_bs + bs_strd);
164*c83a76b0SSuyog Pawar         pu4_ctb_vert_bs = pu4_vert_bs;
165*c83a76b0SSuyog Pawar         /* ctb_size/8 is the number of edges per CTB
166*c83a76b0SSuyog Pawar          * ctb_size/4 is the number of BS values needed per edge
167*c83a76b0SSuyog Pawar          * divided by 8 for the number of bytes
168*c83a76b0SSuyog Pawar          * 2 is the number of bits needed for each BS value */
169*c83a76b0SSuyog Pawar         memset(pu4_vert_bs, 0, 1 << (2 * log2_ctb_size - 7));
170*c83a76b0SSuyog Pawar 
171*c83a76b0SSuyog Pawar         pu1_qp += (qp_strd << (log2_ctb_size - 3));
172*c83a76b0SSuyog Pawar         pu2_ctb_no_loop_filter_flag += (ctb_size >> 3);
173*c83a76b0SSuyog Pawar         ctb_indx += ps_sps->i2_pic_wd_in_ctb;
174*c83a76b0SSuyog Pawar     }
175*c83a76b0SSuyog Pawar 
176*c83a76b0SSuyog Pawar     if(i4_is_last_ctb_x)
177*c83a76b0SSuyog Pawar     {
178*c83a76b0SSuyog Pawar         pu4_horz_bs = (UWORD32 *)((UWORD8 *)pu4_horz_bs + (1 << (2 * log2_ctb_size - 7)));
179*c83a76b0SSuyog Pawar         pu4_ctb_horz_bs = pu4_horz_bs;
180*c83a76b0SSuyog Pawar         memset(pu4_horz_bs, 0, 1 << (2 * log2_ctb_size - 7));
181*c83a76b0SSuyog Pawar 
182*c83a76b0SSuyog Pawar         pu1_qp += (ctb_size >> 3);
183*c83a76b0SSuyog Pawar 
184*c83a76b0SSuyog Pawar         for(row = 0; row < (ctb_size >> 3) + 1; row++)
185*c83a76b0SSuyog Pawar             au2_ctb_no_loop_filter_flag[row] = ps_deblk->au2_ctb_no_loop_filter_flag[row] >> (ctb_size >> 3);
186*c83a76b0SSuyog Pawar         pu2_ctb_no_loop_filter_flag = au2_ctb_no_loop_filter_flag;
187*c83a76b0SSuyog Pawar         ctb_indx += 1;
188*c83a76b0SSuyog Pawar     }
189*c83a76b0SSuyog Pawar 
190*c83a76b0SSuyog Pawar     u4_qp_const_in_ctb[3] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx) >> 3] & (1 << (ctb_indx & 7));
191*c83a76b0SSuyog Pawar 
192*c83a76b0SSuyog Pawar     if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
193*c83a76b0SSuyog Pawar     {
194*c83a76b0SSuyog Pawar         u4_qp_const_in_ctb[2] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - 1) >> 3] & (1 << ((ctb_indx - 1) & 7));
195*c83a76b0SSuyog Pawar     }
196*c83a76b0SSuyog Pawar 
197*c83a76b0SSuyog Pawar     if((ps_deblk->i4_ctb_x || i4_is_last_ctb_x) && (ps_deblk->i4_ctb_y || i4_is_last_ctb_y))
198*c83a76b0SSuyog Pawar     {
199*c83a76b0SSuyog Pawar         u4_qp_const_in_ctb[0] =
200*c83a76b0SSuyog Pawar                         ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) >> 3] &
201*c83a76b0SSuyog Pawar                         (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) & 7));
202*c83a76b0SSuyog Pawar     }
203*c83a76b0SSuyog Pawar 
204*c83a76b0SSuyog Pawar 
205*c83a76b0SSuyog Pawar 
206*c83a76b0SSuyog Pawar     if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
207*c83a76b0SSuyog Pawar     {
208*c83a76b0SSuyog Pawar         u4_qp_const_in_ctb[1] =
209*c83a76b0SSuyog Pawar                         ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb) >> 3] &
210*c83a76b0SSuyog Pawar                         (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb) & 7));
211*c83a76b0SSuyog Pawar     }
212*c83a76b0SSuyog Pawar 
213*c83a76b0SSuyog Pawar     src_strd = ps_codec->i4_strd;
214*c83a76b0SSuyog Pawar 
215*c83a76b0SSuyog Pawar     /* Luma Vertical Edge */
216*c83a76b0SSuyog Pawar 
217*c83a76b0SSuyog Pawar     if(0 == i4_is_last_ctb_x)
218*c83a76b0SSuyog Pawar     {
219*c83a76b0SSuyog Pawar         /* Top CTB's slice header */
220*c83a76b0SSuyog Pawar         slice_header_t *ps_slice_hdr_top;
221*c83a76b0SSuyog Pawar         {
222*c83a76b0SSuyog Pawar             WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
223*c83a76b0SSuyog Pawar             if(i4_is_last_ctb_y)
224*c83a76b0SSuyog Pawar                 cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
225*c83a76b0SSuyog Pawar             ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
226*c83a76b0SSuyog Pawar         }
227*c83a76b0SSuyog Pawar 
228*c83a76b0SSuyog Pawar         pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << (log2_ctb_size));
229*c83a76b0SSuyog Pawar         pu1_src += i4_is_last_ctb_y ? ps_deblk->ps_codec->i4_strd << log2_ctb_size : 0;
230*c83a76b0SSuyog Pawar 
231*c83a76b0SSuyog Pawar         /** Deblocking is done on a shifted CTB -
232*c83a76b0SSuyog Pawar          *  Vertical edge processing is done by shifting the CTB up by four pixels */
233*c83a76b0SSuyog Pawar         pu1_src -= 4 * src_strd;
234*c83a76b0SSuyog Pawar 
235*c83a76b0SSuyog Pawar         for(col = 0; col < ctb_size / 8; col++)
236*c83a76b0SSuyog Pawar         {
237*c83a76b0SSuyog Pawar             WORD32 shift = 0;
238*c83a76b0SSuyog Pawar 
239*c83a76b0SSuyog Pawar             /*  downshift vert_bs by ctb_size/2 for each column
240*c83a76b0SSuyog Pawar              *  shift = (col & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1);
241*c83a76b0SSuyog Pawar              *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
242*c83a76b0SSuyog Pawar              *  and deblocking is done on 8x8 grid
243*c83a76b0SSuyog Pawar              */
244*c83a76b0SSuyog Pawar             if(6 != log2_ctb_size)
245*c83a76b0SSuyog Pawar                 shift = (col & 1) << (log2_ctb_size - 1);
246*c83a76b0SSuyog Pawar 
247*c83a76b0SSuyog Pawar             /* BS for the column - Last row is excluded and the top row is included*/
248*c83a76b0SSuyog Pawar             u4_bs = (pu4_vert_bs[0] >> shift) << 2;
249*c83a76b0SSuyog Pawar 
250*c83a76b0SSuyog Pawar             if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
251*c83a76b0SSuyog Pawar             {
252*c83a76b0SSuyog Pawar                 /* Picking the last BS of the previous CTB corresponding to the same column */
253*c83a76b0SSuyog Pawar                 UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
254*c83a76b0SSuyog Pawar                 UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
255*c83a76b0SSuyog Pawar                 u4_bs |= u4_top_bs & 3;
256*c83a76b0SSuyog Pawar             }
257*c83a76b0SSuyog Pawar 
258*c83a76b0SSuyog Pawar             for(row = 0; row < ctb_size / 4;)
259*c83a76b0SSuyog Pawar             {
260*c83a76b0SSuyog Pawar                 WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
261*c83a76b0SSuyog Pawar                 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
262*c83a76b0SSuyog Pawar 
263*c83a76b0SSuyog Pawar                 /* Trailing zeros are computed and the corresponding rows are not processed */
264*c83a76b0SSuyog Pawar                 bs_tz = CTZ(u4_bs) >> 1;
265*c83a76b0SSuyog Pawar                 if(0 != bs_tz)
266*c83a76b0SSuyog Pawar                 {
267*c83a76b0SSuyog Pawar                     u4_bs = u4_bs >> (bs_tz << 1);
268*c83a76b0SSuyog Pawar                     if((row + bs_tz) >= (ctb_size / 4))
269*c83a76b0SSuyog Pawar                         pu1_src += 4 * (ctb_size / 4 - row) * src_strd;
270*c83a76b0SSuyog Pawar                     else
271*c83a76b0SSuyog Pawar                         pu1_src += 4 * bs_tz  * src_strd;
272*c83a76b0SSuyog Pawar 
273*c83a76b0SSuyog Pawar                     row += bs_tz;
274*c83a76b0SSuyog Pawar                     continue;
275*c83a76b0SSuyog Pawar                 }
276*c83a76b0SSuyog Pawar 
277*c83a76b0SSuyog Pawar                 if(0 == row)
278*c83a76b0SSuyog Pawar                 {
279*c83a76b0SSuyog Pawar                     i1_beta_offset_div2 = ps_slice_hdr_top->i1_beta_offset_div2;
280*c83a76b0SSuyog Pawar                     i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
281*c83a76b0SSuyog Pawar 
282*c83a76b0SSuyog Pawar                     if(0 == col)
283*c83a76b0SSuyog Pawar                     {
284*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[0] ?
285*c83a76b0SSuyog Pawar                                         pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
286*c83a76b0SSuyog Pawar                                         pu1_qp[-qp_strd - 1];
287*c83a76b0SSuyog Pawar                     }
288*c83a76b0SSuyog Pawar                     else
289*c83a76b0SSuyog Pawar                     {
290*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[1] ?
291*c83a76b0SSuyog Pawar                                         pu1_qp[-ctb_size / 8 * qp_strd] :
292*c83a76b0SSuyog Pawar                                         pu1_qp[col - 1 - qp_strd];
293*c83a76b0SSuyog Pawar                     }
294*c83a76b0SSuyog Pawar 
295*c83a76b0SSuyog Pawar                     qp_q = u4_qp_const_in_ctb[1] ?
296*c83a76b0SSuyog Pawar                                     pu1_qp[-ctb_size / 8 * qp_strd] :
297*c83a76b0SSuyog Pawar                                     pu1_qp[col - qp_strd];
298*c83a76b0SSuyog Pawar                 }
299*c83a76b0SSuyog Pawar                 else
300*c83a76b0SSuyog Pawar                 {
301*c83a76b0SSuyog Pawar                     if(0 == col)
302*c83a76b0SSuyog Pawar                     {
303*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[2] ?
304*c83a76b0SSuyog Pawar                                         pu1_qp[-ctb_size / 8] :
305*c83a76b0SSuyog Pawar                                         pu1_qp[((row - 1) >> 1) * qp_strd - 1];
306*c83a76b0SSuyog Pawar                     }
307*c83a76b0SSuyog Pawar                     else
308*c83a76b0SSuyog Pawar                     {
309*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[3] ?
310*c83a76b0SSuyog Pawar                                         pu1_qp[0] :
311*c83a76b0SSuyog Pawar                                         pu1_qp[((row - 1) >> 1) * qp_strd + col - 1];
312*c83a76b0SSuyog Pawar                     }
313*c83a76b0SSuyog Pawar 
314*c83a76b0SSuyog Pawar                     qp_q = u4_qp_const_in_ctb[3] ?
315*c83a76b0SSuyog Pawar                                     pu1_qp[0] :
316*c83a76b0SSuyog Pawar                                     pu1_qp[((row - 1) >> 1) * qp_strd + col];
317*c83a76b0SSuyog Pawar                 }
318*c83a76b0SSuyog Pawar 
319*c83a76b0SSuyog Pawar                 filter_p = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 1;
320*c83a76b0SSuyog Pawar                 filter_q = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 2;
321*c83a76b0SSuyog Pawar                 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
322*c83a76b0SSuyog Pawar                 filter_p = !filter_p;
323*c83a76b0SSuyog Pawar                 filter_q = !filter_q;
324*c83a76b0SSuyog Pawar 
325*c83a76b0SSuyog Pawar                 if(filter_p || filter_q)
326*c83a76b0SSuyog Pawar                 {
327*c83a76b0SSuyog Pawar                     DUMP_DEBLK_LUMA_VERT(pu1_src, src_strd,
328*c83a76b0SSuyog Pawar                                          u4_bs & 3, qp_p, qp_q,
329*c83a76b0SSuyog Pawar                                          ps_slice_hdr->i1_beta_offset_div2,
330*c83a76b0SSuyog Pawar                                          ps_slice_hdr->i1_tc_offset_div2,
331*c83a76b0SSuyog Pawar                                          filter_p, filter_q);
332*c83a76b0SSuyog Pawar                     ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr(pu1_src, src_strd,
333*c83a76b0SSuyog Pawar                                                                          u4_bs & 3, qp_p, qp_q,
334*c83a76b0SSuyog Pawar                                                                          i1_beta_offset_div2,
335*c83a76b0SSuyog Pawar                                                                          i1_tc_offset_div2,
336*c83a76b0SSuyog Pawar                                                                          filter_p, filter_q);
337*c83a76b0SSuyog Pawar                 }
338*c83a76b0SSuyog Pawar 
339*c83a76b0SSuyog Pawar                 pu1_src += 4 * src_strd;
340*c83a76b0SSuyog Pawar                 u4_bs = u4_bs >> 2;
341*c83a76b0SSuyog Pawar                 row++;
342*c83a76b0SSuyog Pawar             }
343*c83a76b0SSuyog Pawar 
344*c83a76b0SSuyog Pawar             if((64 == ctb_size) ||
345*c83a76b0SSuyog Pawar                             ((32 == ctb_size) && (col & 1)))
346*c83a76b0SSuyog Pawar             {
347*c83a76b0SSuyog Pawar                 pu4_vert_bs++;
348*c83a76b0SSuyog Pawar             }
349*c83a76b0SSuyog Pawar             pu1_src -= (src_strd << log2_ctb_size);
350*c83a76b0SSuyog Pawar             pu1_src += 8;
351*c83a76b0SSuyog Pawar         }
352*c83a76b0SSuyog Pawar         pu4_vert_bs = pu4_ctb_vert_bs;
353*c83a76b0SSuyog Pawar     }
354*c83a76b0SSuyog Pawar 
355*c83a76b0SSuyog Pawar 
356*c83a76b0SSuyog Pawar     /* Luma Horizontal Edge */
357*c83a76b0SSuyog Pawar 
358*c83a76b0SSuyog Pawar     if(0 == i4_is_last_ctb_y)
359*c83a76b0SSuyog Pawar     {
360*c83a76b0SSuyog Pawar 
361*c83a76b0SSuyog Pawar         /* Left CTB's slice header */
362*c83a76b0SSuyog Pawar         slice_header_t *ps_slice_hdr_left;
363*c83a76b0SSuyog Pawar         {
364*c83a76b0SSuyog Pawar             WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
365*c83a76b0SSuyog Pawar             if(i4_is_last_ctb_x)
366*c83a76b0SSuyog Pawar                 cur_ctb_indx += 1;
367*c83a76b0SSuyog Pawar             ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
368*c83a76b0SSuyog Pawar         }
369*c83a76b0SSuyog Pawar         pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << log2_ctb_size);
370*c83a76b0SSuyog Pawar         pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
371*c83a76b0SSuyog Pawar 
372*c83a76b0SSuyog Pawar         /** Deblocking is done on a shifted CTB -
373*c83a76b0SSuyog Pawar          *  Horizontal edge processing is done by shifting the CTB left by four pixels */
374*c83a76b0SSuyog Pawar         pu1_src -= 4;
375*c83a76b0SSuyog Pawar         for(row = 0; row < ctb_size / 8; row++)
376*c83a76b0SSuyog Pawar         {
377*c83a76b0SSuyog Pawar             WORD32 shift = 0;
378*c83a76b0SSuyog Pawar 
379*c83a76b0SSuyog Pawar             /* downshift vert_bs by ctb_size/2 for each column
380*c83a76b0SSuyog Pawar              *  shift = (row & (MAX_CTB_SIZE / ctb_size - 1)) * ctb_size / 2;
381*c83a76b0SSuyog Pawar              *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
382*c83a76b0SSuyog Pawar              *  and deblocking is done on 8x8 grid
383*c83a76b0SSuyog Pawar              */
384*c83a76b0SSuyog Pawar             if(6 != log2_ctb_size)
385*c83a76b0SSuyog Pawar                 shift = (row & 1) << (log2_ctb_size - 1);
386*c83a76b0SSuyog Pawar 
387*c83a76b0SSuyog Pawar             /* BS for the row - Last column is excluded and the left column is included*/
388*c83a76b0SSuyog Pawar             u4_bs = (pu4_horz_bs[0] >> shift) << 2;
389*c83a76b0SSuyog Pawar 
390*c83a76b0SSuyog Pawar             if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
391*c83a76b0SSuyog Pawar             {
392*c83a76b0SSuyog Pawar                 /** Picking the last BS of the previous CTB corresponding to the same row
393*c83a76b0SSuyog Pawar                 * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
394*c83a76b0SSuyog Pawar                 */
395*c83a76b0SSuyog Pawar                 UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
396*c83a76b0SSuyog Pawar                 UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
397*c83a76b0SSuyog Pawar                 u4_bs |= u4_left_bs & 3;
398*c83a76b0SSuyog Pawar             }
399*c83a76b0SSuyog Pawar 
400*c83a76b0SSuyog Pawar             for(col = 0; col < ctb_size / 4;)
401*c83a76b0SSuyog Pawar             {
402*c83a76b0SSuyog Pawar                 WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
403*c83a76b0SSuyog Pawar                 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
404*c83a76b0SSuyog Pawar 
405*c83a76b0SSuyog Pawar                 bs_tz = CTZ(u4_bs) >> 1;
406*c83a76b0SSuyog Pawar                 if(0 != bs_tz)
407*c83a76b0SSuyog Pawar                 {
408*c83a76b0SSuyog Pawar                     u4_bs = u4_bs >> (bs_tz << 1);
409*c83a76b0SSuyog Pawar 
410*c83a76b0SSuyog Pawar                     if((col + bs_tz) >= (ctb_size / 4))
411*c83a76b0SSuyog Pawar                         pu1_src += 4 * (ctb_size / 4 - col);
412*c83a76b0SSuyog Pawar                     else
413*c83a76b0SSuyog Pawar                         pu1_src += 4 * bs_tz;
414*c83a76b0SSuyog Pawar 
415*c83a76b0SSuyog Pawar                     col += bs_tz;
416*c83a76b0SSuyog Pawar                     continue;
417*c83a76b0SSuyog Pawar                 }
418*c83a76b0SSuyog Pawar 
419*c83a76b0SSuyog Pawar                 if(0 == col)
420*c83a76b0SSuyog Pawar                 {
421*c83a76b0SSuyog Pawar                     i1_beta_offset_div2 = ps_slice_hdr_left->i1_beta_offset_div2;
422*c83a76b0SSuyog Pawar                     i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
423*c83a76b0SSuyog Pawar 
424*c83a76b0SSuyog Pawar                     if(0 == row)
425*c83a76b0SSuyog Pawar                     {
426*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[0] ?
427*c83a76b0SSuyog Pawar                                         pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
428*c83a76b0SSuyog Pawar                                         pu1_qp[-qp_strd - 1];
429*c83a76b0SSuyog Pawar                     }
430*c83a76b0SSuyog Pawar                     else
431*c83a76b0SSuyog Pawar                     {
432*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[2] ?
433*c83a76b0SSuyog Pawar                                         pu1_qp[-ctb_size / 8] :
434*c83a76b0SSuyog Pawar                                         pu1_qp[(row - 1) * qp_strd - 1];
435*c83a76b0SSuyog Pawar                     }
436*c83a76b0SSuyog Pawar 
437*c83a76b0SSuyog Pawar                     qp_q = u4_qp_const_in_ctb[2] ?
438*c83a76b0SSuyog Pawar                                     pu1_qp[-ctb_size / 8] :
439*c83a76b0SSuyog Pawar                                     pu1_qp[row * qp_strd - 1];
440*c83a76b0SSuyog Pawar                 }
441*c83a76b0SSuyog Pawar                 else
442*c83a76b0SSuyog Pawar                 {
443*c83a76b0SSuyog Pawar                     if(0 == row)
444*c83a76b0SSuyog Pawar                     {
445*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[1] ?
446*c83a76b0SSuyog Pawar                                         pu1_qp[-ctb_size / 8 * qp_strd] :
447*c83a76b0SSuyog Pawar                                         pu1_qp[((col - 1) >> 1) - qp_strd];
448*c83a76b0SSuyog Pawar                     }
449*c83a76b0SSuyog Pawar                     else
450*c83a76b0SSuyog Pawar                     {
451*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[3] ?
452*c83a76b0SSuyog Pawar                                         pu1_qp[0] :
453*c83a76b0SSuyog Pawar                                         pu1_qp[((col - 1) >> 1) + (row - 1) * qp_strd];
454*c83a76b0SSuyog Pawar                     }
455*c83a76b0SSuyog Pawar 
456*c83a76b0SSuyog Pawar                     qp_q = u4_qp_const_in_ctb[3] ?
457*c83a76b0SSuyog Pawar                                     pu1_qp[0] :
458*c83a76b0SSuyog Pawar                                     pu1_qp[((col - 1) >> 1) + row * qp_strd];
459*c83a76b0SSuyog Pawar                 }
460*c83a76b0SSuyog Pawar 
461*c83a76b0SSuyog Pawar                 filter_p = (pu2_ctb_no_loop_filter_flag[row] >> ((col + 1) >> 1)) & 1;
462*c83a76b0SSuyog Pawar                 filter_q = (pu2_ctb_no_loop_filter_flag[row + 1] >> ((col + 1) >> 1)) & 1;
463*c83a76b0SSuyog Pawar                 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
464*c83a76b0SSuyog Pawar                 filter_p = !filter_p;
465*c83a76b0SSuyog Pawar                 filter_q = !filter_q;
466*c83a76b0SSuyog Pawar 
467*c83a76b0SSuyog Pawar                 if(filter_p || filter_q)
468*c83a76b0SSuyog Pawar                 {
469*c83a76b0SSuyog Pawar                     DUMP_DEBLK_LUMA_HORZ(pu1_src, src_strd,
470*c83a76b0SSuyog Pawar                                          u4_bs & 3, qp_p, qp_q,
471*c83a76b0SSuyog Pawar                                          ps_slice_hdr->i1_beta_offset_div2,
472*c83a76b0SSuyog Pawar                                          ps_slice_hdr->i1_tc_offset_div2,
473*c83a76b0SSuyog Pawar                                          filter_p, filter_q);
474*c83a76b0SSuyog Pawar                     ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr(pu1_src, src_strd,
475*c83a76b0SSuyog Pawar                                                                          u4_bs & 3, qp_p, qp_q,
476*c83a76b0SSuyog Pawar                                                                          i1_beta_offset_div2,
477*c83a76b0SSuyog Pawar                                                                          i1_tc_offset_div2, filter_p, filter_q);
478*c83a76b0SSuyog Pawar                 }
479*c83a76b0SSuyog Pawar 
480*c83a76b0SSuyog Pawar                 pu1_src += 4;
481*c83a76b0SSuyog Pawar                 u4_bs = u4_bs >> 2;
482*c83a76b0SSuyog Pawar                 col++;
483*c83a76b0SSuyog Pawar             }
484*c83a76b0SSuyog Pawar 
485*c83a76b0SSuyog Pawar             if((64 == ctb_size) ||
486*c83a76b0SSuyog Pawar                             ((32 == ctb_size) && (row & 1)))
487*c83a76b0SSuyog Pawar             {
488*c83a76b0SSuyog Pawar                 pu4_horz_bs++;
489*c83a76b0SSuyog Pawar             }
490*c83a76b0SSuyog Pawar             pu1_src -= ctb_size;
491*c83a76b0SSuyog Pawar             pu1_src += (src_strd << 3);
492*c83a76b0SSuyog Pawar         }
493*c83a76b0SSuyog Pawar         pu4_horz_bs = pu4_ctb_horz_bs;
494*c83a76b0SSuyog Pawar     }
495*c83a76b0SSuyog Pawar 
496*c83a76b0SSuyog Pawar 
497*c83a76b0SSuyog Pawar     /* Chroma Veritcal Edge */
498*c83a76b0SSuyog Pawar 
499*c83a76b0SSuyog Pawar     if(0 == i4_is_last_ctb_x)
500*c83a76b0SSuyog Pawar     {
501*c83a76b0SSuyog Pawar 
502*c83a76b0SSuyog Pawar         /* Top CTB's slice header */
503*c83a76b0SSuyog Pawar         slice_header_t *ps_slice_hdr_top;
504*c83a76b0SSuyog Pawar         {
505*c83a76b0SSuyog Pawar             WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
506*c83a76b0SSuyog Pawar             if(i4_is_last_ctb_y)
507*c83a76b0SSuyog Pawar                 cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
508*c83a76b0SSuyog Pawar             ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
509*c83a76b0SSuyog Pawar         }
510*c83a76b0SSuyog Pawar 
511*c83a76b0SSuyog Pawar         pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
512*c83a76b0SSuyog Pawar         pu1_src += i4_is_last_ctb_y ? (ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size : 0;
513*c83a76b0SSuyog Pawar 
514*c83a76b0SSuyog Pawar         /** Deblocking is done on a shifted CTB -
515*c83a76b0SSuyog Pawar          *  Vertical edge processing is done by shifting the CTB up by four pixels */
516*c83a76b0SSuyog Pawar         pu1_src -= 4 * src_strd;
517*c83a76b0SSuyog Pawar 
518*c83a76b0SSuyog Pawar         for(col = 0; col < ctb_size / 16; col++)
519*c83a76b0SSuyog Pawar         {
520*c83a76b0SSuyog Pawar 
521*c83a76b0SSuyog Pawar             /* BS for the column - Last row is excluded and the top row is included*/
522*c83a76b0SSuyog Pawar             u4_bs = pu4_vert_bs[0] << 2;
523*c83a76b0SSuyog Pawar 
524*c83a76b0SSuyog Pawar             if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
525*c83a76b0SSuyog Pawar             {
526*c83a76b0SSuyog Pawar                 /* Picking the last BS of the previous CTB corresponding to the same column */
527*c83a76b0SSuyog Pawar                 UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
528*c83a76b0SSuyog Pawar                 UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> ((1 << (log2_ctb_size - 1)) - 2);
529*c83a76b0SSuyog Pawar                 u4_bs |= u4_top_bs & 3;
530*c83a76b0SSuyog Pawar             }
531*c83a76b0SSuyog Pawar 
532*c83a76b0SSuyog Pawar             /* Every alternate boundary strength value is used for chroma */
533*c83a76b0SSuyog Pawar             u4_bs &= 0x22222222;
534*c83a76b0SSuyog Pawar 
535*c83a76b0SSuyog Pawar             for(row = 0; row < ctb_size / 8;)
536*c83a76b0SSuyog Pawar             {
537*c83a76b0SSuyog Pawar                 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
538*c83a76b0SSuyog Pawar 
539*c83a76b0SSuyog Pawar                 bs_tz = CTZ(u4_bs) >> 2;
540*c83a76b0SSuyog Pawar                 if(0 != bs_tz)
541*c83a76b0SSuyog Pawar                 {
542*c83a76b0SSuyog Pawar                     if((row + bs_tz) >= (ctb_size / 8))
543*c83a76b0SSuyog Pawar                         pu1_src += 4 * (ctb_size / 8 - row) * src_strd;
544*c83a76b0SSuyog Pawar                     else
545*c83a76b0SSuyog Pawar                         pu1_src += 4 * bs_tz  * src_strd;
546*c83a76b0SSuyog Pawar                     row += bs_tz;
547*c83a76b0SSuyog Pawar                     u4_bs = u4_bs >> (bs_tz << 2);
548*c83a76b0SSuyog Pawar                     continue;
549*c83a76b0SSuyog Pawar                 }
550*c83a76b0SSuyog Pawar 
551*c83a76b0SSuyog Pawar                 if(0 == row)
552*c83a76b0SSuyog Pawar                 {
553*c83a76b0SSuyog Pawar                     i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
554*c83a76b0SSuyog Pawar 
555*c83a76b0SSuyog Pawar                     if(0 == col)
556*c83a76b0SSuyog Pawar                     {
557*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[0] ?
558*c83a76b0SSuyog Pawar                                         pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
559*c83a76b0SSuyog Pawar                                         pu1_qp[-qp_strd - 1];
560*c83a76b0SSuyog Pawar                     }
561*c83a76b0SSuyog Pawar                     else
562*c83a76b0SSuyog Pawar                     {
563*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[1] ?
564*c83a76b0SSuyog Pawar                                         pu1_qp[-ctb_size / 8 * qp_strd] :
565*c83a76b0SSuyog Pawar                                         pu1_qp[2 * col - 1 - qp_strd];
566*c83a76b0SSuyog Pawar                     }
567*c83a76b0SSuyog Pawar 
568*c83a76b0SSuyog Pawar                     qp_q = u4_qp_const_in_ctb[1] ?
569*c83a76b0SSuyog Pawar                                     pu1_qp[-ctb_size / 8 * qp_strd] :
570*c83a76b0SSuyog Pawar                                     pu1_qp[2 * col - qp_strd];
571*c83a76b0SSuyog Pawar                 }
572*c83a76b0SSuyog Pawar                 else
573*c83a76b0SSuyog Pawar                 {
574*c83a76b0SSuyog Pawar                     if(0 == col)
575*c83a76b0SSuyog Pawar                     {
576*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[2] ?
577*c83a76b0SSuyog Pawar                                         pu1_qp[-ctb_size / 8] :
578*c83a76b0SSuyog Pawar                                         pu1_qp[(row - 1) * qp_strd - 1];
579*c83a76b0SSuyog Pawar                     }
580*c83a76b0SSuyog Pawar                     else
581*c83a76b0SSuyog Pawar                     {
582*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[3] ?
583*c83a76b0SSuyog Pawar                                         pu1_qp[0] :
584*c83a76b0SSuyog Pawar                                         pu1_qp[(row - 1) * qp_strd + 2 * col - 1];
585*c83a76b0SSuyog Pawar                     }
586*c83a76b0SSuyog Pawar 
587*c83a76b0SSuyog Pawar                     qp_q = u4_qp_const_in_ctb[3] ?
588*c83a76b0SSuyog Pawar                                     pu1_qp[0] :
589*c83a76b0SSuyog Pawar                                     pu1_qp[(row - 1) * qp_strd + 2 * col];
590*c83a76b0SSuyog Pawar                 }
591*c83a76b0SSuyog Pawar 
592*c83a76b0SSuyog Pawar                 filter_p = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 1;
593*c83a76b0SSuyog Pawar                 filter_q = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 2;
594*c83a76b0SSuyog Pawar                 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
595*c83a76b0SSuyog Pawar                 filter_p = !filter_p;
596*c83a76b0SSuyog Pawar                 filter_q = !filter_q;
597*c83a76b0SSuyog Pawar 
598*c83a76b0SSuyog Pawar                 if(filter_p || filter_q)
599*c83a76b0SSuyog Pawar                 {
600*c83a76b0SSuyog Pawar                     ASSERT(1 == ((u4_bs & 3) >> 1));
601*c83a76b0SSuyog Pawar                     DUMP_DEBLK_CHROMA_VERT(pu1_src, src_strd,
602*c83a76b0SSuyog Pawar                                            u4_bs & 3, qp_p, qp_q,
603*c83a76b0SSuyog Pawar                                            ps_pps->i1_pic_cb_qp_offset,
604*c83a76b0SSuyog Pawar                                            ps_pps->i1_pic_cr_qp_offset,
605*c83a76b0SSuyog Pawar                                            ps_slice_hdr->i1_tc_offset_div2,
606*c83a76b0SSuyog Pawar                                            filter_p, filter_q);
607*c83a76b0SSuyog Pawar                     if(chroma_yuv420sp_vu)
608*c83a76b0SSuyog Pawar                     {
609*c83a76b0SSuyog Pawar                         ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
610*c83a76b0SSuyog Pawar                                                                                src_strd,
611*c83a76b0SSuyog Pawar                                                                                qp_q,
612*c83a76b0SSuyog Pawar                                                                                qp_p,
613*c83a76b0SSuyog Pawar                                                                                ps_pps->i1_pic_cr_qp_offset,
614*c83a76b0SSuyog Pawar                                                                                ps_pps->i1_pic_cb_qp_offset,
615*c83a76b0SSuyog Pawar                                                                                i1_tc_offset_div2,
616*c83a76b0SSuyog Pawar                                                                                filter_q,
617*c83a76b0SSuyog Pawar                                                                                filter_p);
618*c83a76b0SSuyog Pawar                     }
619*c83a76b0SSuyog Pawar                     else
620*c83a76b0SSuyog Pawar                     {
621*c83a76b0SSuyog Pawar                         ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
622*c83a76b0SSuyog Pawar                                                                                src_strd,
623*c83a76b0SSuyog Pawar                                                                                qp_p,
624*c83a76b0SSuyog Pawar                                                                                qp_q,
625*c83a76b0SSuyog Pawar                                                                                ps_pps->i1_pic_cb_qp_offset,
626*c83a76b0SSuyog Pawar                                                                                ps_pps->i1_pic_cr_qp_offset,
627*c83a76b0SSuyog Pawar                                                                                i1_tc_offset_div2,
628*c83a76b0SSuyog Pawar                                                                                filter_p,
629*c83a76b0SSuyog Pawar                                                                                filter_q);
630*c83a76b0SSuyog Pawar                     }
631*c83a76b0SSuyog Pawar                 }
632*c83a76b0SSuyog Pawar 
633*c83a76b0SSuyog Pawar                 pu1_src += 4 * src_strd;
634*c83a76b0SSuyog Pawar                 u4_bs = u4_bs >> 4;
635*c83a76b0SSuyog Pawar                 row++;
636*c83a76b0SSuyog Pawar             }
637*c83a76b0SSuyog Pawar 
638*c83a76b0SSuyog Pawar             pu4_vert_bs += (64 == ctb_size) ? 2 : 1;
639*c83a76b0SSuyog Pawar             pu1_src -= ((src_strd / 2) << log2_ctb_size);
640*c83a76b0SSuyog Pawar             pu1_src += 16;
641*c83a76b0SSuyog Pawar         }
642*c83a76b0SSuyog Pawar     }
643*c83a76b0SSuyog Pawar 
644*c83a76b0SSuyog Pawar     /* Chroma Horizontal Edge */
645*c83a76b0SSuyog Pawar 
646*c83a76b0SSuyog Pawar     if(0 == i4_is_last_ctb_y)
647*c83a76b0SSuyog Pawar     {
648*c83a76b0SSuyog Pawar 
649*c83a76b0SSuyog Pawar         /* Left CTB's slice header */
650*c83a76b0SSuyog Pawar         slice_header_t *ps_slice_hdr_left;
651*c83a76b0SSuyog Pawar         {
652*c83a76b0SSuyog Pawar             WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
653*c83a76b0SSuyog Pawar             if(i4_is_last_ctb_x)
654*c83a76b0SSuyog Pawar                 cur_ctb_indx += 1;
655*c83a76b0SSuyog Pawar             ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
656*c83a76b0SSuyog Pawar         }
657*c83a76b0SSuyog Pawar 
658*c83a76b0SSuyog Pawar         pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
659*c83a76b0SSuyog Pawar         pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
660*c83a76b0SSuyog Pawar 
661*c83a76b0SSuyog Pawar         /** Deblocking is done on a shifted CTB -
662*c83a76b0SSuyog Pawar          * Vertical edge processing is done by shifting the CTB up by four pixels (8 here beacuse UV are interleaved) */
663*c83a76b0SSuyog Pawar         pu1_src -= 8;
664*c83a76b0SSuyog Pawar         for(row = 0; row < ctb_size / 16; row++)
665*c83a76b0SSuyog Pawar         {
666*c83a76b0SSuyog Pawar             /* BS for the row - Last column is excluded and the left column is included*/
667*c83a76b0SSuyog Pawar             u4_bs = pu4_horz_bs[0] << 2;
668*c83a76b0SSuyog Pawar 
669*c83a76b0SSuyog Pawar             if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
670*c83a76b0SSuyog Pawar             {
671*c83a76b0SSuyog Pawar                 /** Picking the last BS of the previous CTB corresponding to the same row
672*c83a76b0SSuyog Pawar                 * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
673*c83a76b0SSuyog Pawar                 */
674*c83a76b0SSuyog Pawar                 UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
675*c83a76b0SSuyog Pawar                 UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> ((1 << (log2_ctb_size - 1)) - 2);
676*c83a76b0SSuyog Pawar                 u4_bs |= u4_left_bs & 3;
677*c83a76b0SSuyog Pawar             }
678*c83a76b0SSuyog Pawar 
679*c83a76b0SSuyog Pawar             /* Every alternate boundary strength value is used for chroma */
680*c83a76b0SSuyog Pawar             u4_bs &= 0x22222222;
681*c83a76b0SSuyog Pawar 
682*c83a76b0SSuyog Pawar             for(col = 0; col < ctb_size / 8;)
683*c83a76b0SSuyog Pawar             {
684*c83a76b0SSuyog Pawar                 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
685*c83a76b0SSuyog Pawar 
686*c83a76b0SSuyog Pawar                 bs_tz = CTZ(u4_bs) >> 2;
687*c83a76b0SSuyog Pawar                 if(0 != bs_tz)
688*c83a76b0SSuyog Pawar                 {
689*c83a76b0SSuyog Pawar                     u4_bs = u4_bs >> (bs_tz << 2);
690*c83a76b0SSuyog Pawar 
691*c83a76b0SSuyog Pawar                     if((col + bs_tz) >= (ctb_size / 8))
692*c83a76b0SSuyog Pawar                         pu1_src += 8 * (ctb_size / 8 - col);
693*c83a76b0SSuyog Pawar                     else
694*c83a76b0SSuyog Pawar                         pu1_src += 8 * bs_tz;
695*c83a76b0SSuyog Pawar 
696*c83a76b0SSuyog Pawar                     col += bs_tz;
697*c83a76b0SSuyog Pawar                     continue;
698*c83a76b0SSuyog Pawar                 }
699*c83a76b0SSuyog Pawar 
700*c83a76b0SSuyog Pawar                 if(0 == col)
701*c83a76b0SSuyog Pawar                 {
702*c83a76b0SSuyog Pawar                     i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
703*c83a76b0SSuyog Pawar 
704*c83a76b0SSuyog Pawar                     if(0 == row)
705*c83a76b0SSuyog Pawar                     {
706*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[0] ?
707*c83a76b0SSuyog Pawar                                         pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
708*c83a76b0SSuyog Pawar                                         pu1_qp[-qp_strd - 1];
709*c83a76b0SSuyog Pawar                     }
710*c83a76b0SSuyog Pawar                     else
711*c83a76b0SSuyog Pawar                     {
712*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[2] ?
713*c83a76b0SSuyog Pawar                                         pu1_qp[-ctb_size / 8] :
714*c83a76b0SSuyog Pawar                                         pu1_qp[(2 * row - 1) * qp_strd - 1];
715*c83a76b0SSuyog Pawar                     }
716*c83a76b0SSuyog Pawar 
717*c83a76b0SSuyog Pawar                     qp_q = u4_qp_const_in_ctb[2] ?
718*c83a76b0SSuyog Pawar                                     pu1_qp[-ctb_size / 8] :
719*c83a76b0SSuyog Pawar                                     pu1_qp[(2 * row) * qp_strd - 1];
720*c83a76b0SSuyog Pawar                 }
721*c83a76b0SSuyog Pawar                 else
722*c83a76b0SSuyog Pawar                 {
723*c83a76b0SSuyog Pawar                     if(0 == row)
724*c83a76b0SSuyog Pawar                     {
725*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[1] ?
726*c83a76b0SSuyog Pawar                                         pu1_qp[-ctb_size / 8 * qp_strd] :
727*c83a76b0SSuyog Pawar                                         pu1_qp[col - 1 - qp_strd];
728*c83a76b0SSuyog Pawar                     }
729*c83a76b0SSuyog Pawar                     else
730*c83a76b0SSuyog Pawar                     {
731*c83a76b0SSuyog Pawar                         qp_p = u4_qp_const_in_ctb[3] ?
732*c83a76b0SSuyog Pawar                                         pu1_qp[0] :
733*c83a76b0SSuyog Pawar                                         pu1_qp[(col - 1) +  (2 * row - 1) * qp_strd];
734*c83a76b0SSuyog Pawar                     }
735*c83a76b0SSuyog Pawar 
736*c83a76b0SSuyog Pawar                     qp_q = u4_qp_const_in_ctb[3] ?
737*c83a76b0SSuyog Pawar                                     pu1_qp[0] :
738*c83a76b0SSuyog Pawar                                     pu1_qp[(col - 1) + 2 * row * qp_strd];
739*c83a76b0SSuyog Pawar                 }
740*c83a76b0SSuyog Pawar 
741*c83a76b0SSuyog Pawar                 filter_p = (pu2_ctb_no_loop_filter_flag[row << 1] >> col) & 1;
742*c83a76b0SSuyog Pawar                 filter_q = (pu2_ctb_no_loop_filter_flag[(row << 1) + 1] >> col) & 1;
743*c83a76b0SSuyog Pawar                 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
744*c83a76b0SSuyog Pawar                 filter_p = !filter_p;
745*c83a76b0SSuyog Pawar                 filter_q = !filter_q;
746*c83a76b0SSuyog Pawar 
747*c83a76b0SSuyog Pawar                 if(filter_p || filter_q)
748*c83a76b0SSuyog Pawar                 {
749*c83a76b0SSuyog Pawar                     ASSERT(1 == ((u4_bs & 3) >> 1));
750*c83a76b0SSuyog Pawar                     DUMP_DEBLK_CHROMA_HORZ(pu1_src, src_strd,
751*c83a76b0SSuyog Pawar                                            u4_bs & 3, qp_p, qp_q,
752*c83a76b0SSuyog Pawar                                            ps_pps->i1_pic_cb_qp_offset,
753*c83a76b0SSuyog Pawar                                            ps_pps->i1_pic_cr_qp_offset,
754*c83a76b0SSuyog Pawar                                            ps_slice_hdr->i1_tc_offset_div2,
755*c83a76b0SSuyog Pawar                                            filter_p, filter_q);
756*c83a76b0SSuyog Pawar                     if(chroma_yuv420sp_vu)
757*c83a76b0SSuyog Pawar                     {
758*c83a76b0SSuyog Pawar                         ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
759*c83a76b0SSuyog Pawar                                                                                src_strd,
760*c83a76b0SSuyog Pawar                                                                                qp_q,
761*c83a76b0SSuyog Pawar                                                                                qp_p,
762*c83a76b0SSuyog Pawar                                                                                ps_pps->i1_pic_cr_qp_offset,
763*c83a76b0SSuyog Pawar                                                                                ps_pps->i1_pic_cb_qp_offset,
764*c83a76b0SSuyog Pawar                                                                                i1_tc_offset_div2,
765*c83a76b0SSuyog Pawar                                                                                filter_q,
766*c83a76b0SSuyog Pawar                                                                                filter_p);
767*c83a76b0SSuyog Pawar                     }
768*c83a76b0SSuyog Pawar                     else
769*c83a76b0SSuyog Pawar                     {
770*c83a76b0SSuyog Pawar                         ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
771*c83a76b0SSuyog Pawar                                                                                src_strd,
772*c83a76b0SSuyog Pawar                                                                                qp_p,
773*c83a76b0SSuyog Pawar                                                                                qp_q,
774*c83a76b0SSuyog Pawar                                                                                ps_pps->i1_pic_cb_qp_offset,
775*c83a76b0SSuyog Pawar                                                                                ps_pps->i1_pic_cr_qp_offset,
776*c83a76b0SSuyog Pawar                                                                                i1_tc_offset_div2,
777*c83a76b0SSuyog Pawar                                                                                filter_p,
778*c83a76b0SSuyog Pawar                                                                                filter_q);
779*c83a76b0SSuyog Pawar                     }
780*c83a76b0SSuyog Pawar                 }
781*c83a76b0SSuyog Pawar 
782*c83a76b0SSuyog Pawar                 pu1_src += 8;
783*c83a76b0SSuyog Pawar                 u4_bs = u4_bs >> 4;
784*c83a76b0SSuyog Pawar                 col++;
785*c83a76b0SSuyog Pawar             }
786*c83a76b0SSuyog Pawar 
787*c83a76b0SSuyog Pawar             pu4_horz_bs += (64 == ctb_size) ? 2 : 1;
788*c83a76b0SSuyog Pawar             pu1_src -= ctb_size;
789*c83a76b0SSuyog Pawar             pu1_src += 8 * src_strd;
790*c83a76b0SSuyog Pawar 
791*c83a76b0SSuyog Pawar         }
792*c83a76b0SSuyog Pawar     }
793*c83a76b0SSuyog Pawar }
794