1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar ******************************************************************************/
18*c83a76b0SSuyog Pawar /**
19*c83a76b0SSuyog Pawar *******************************************************************************
20*c83a76b0SSuyog Pawar * @file
21*c83a76b0SSuyog Pawar * ihevc_sao.c
22*c83a76b0SSuyog Pawar *
23*c83a76b0SSuyog Pawar * @brief
24*c83a76b0SSuyog Pawar * Contains leaf level function definitions for sample adaptive offset process
25*c83a76b0SSuyog Pawar *
26*c83a76b0SSuyog Pawar * @author
27*c83a76b0SSuyog Pawar * Srinivas T
28*c83a76b0SSuyog Pawar *
29*c83a76b0SSuyog Pawar * @par List of Functions:
30*c83a76b0SSuyog Pawar * - ihevc_sao_band_offset_luma()
31*c83a76b0SSuyog Pawar * - ihevc_sao_band_offset_chroma()
32*c83a76b0SSuyog Pawar * - ihevc_sao_edge_offset_class0()
33*c83a76b0SSuyog Pawar * - ihevc_sao_edge_offset_class0_chroma()
34*c83a76b0SSuyog Pawar * - ihevc_sao_edge_offset_class1()
35*c83a76b0SSuyog Pawar * - ihevc_sao_edge_offset_class1_chroma()
36*c83a76b0SSuyog Pawar * - ihevc_sao_edge_offset_class2()
37*c83a76b0SSuyog Pawar * - ihevc_sao_edge_offset_class2_chroma()
38*c83a76b0SSuyog Pawar * - ihevc_sao_edge_offset_class3()
39*c83a76b0SSuyog Pawar * - ihevc_sao_edge_offset_class3_chroma()
40*c83a76b0SSuyog Pawar * @remarks
41*c83a76b0SSuyog Pawar * None
42*c83a76b0SSuyog Pawar *
43*c83a76b0SSuyog Pawar *******************************************************************************
44*c83a76b0SSuyog Pawar */
45*c83a76b0SSuyog Pawar #include <stdlib.h>
46*c83a76b0SSuyog Pawar #include <assert.h>
47*c83a76b0SSuyog Pawar #include <string.h>
48*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
49*c83a76b0SSuyog Pawar #include "ihevc_macros.h"
50*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
51*c83a76b0SSuyog Pawar #include "ihevc_func_selector.h"
52*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
53*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
54*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
55*c83a76b0SSuyog Pawar
56*c83a76b0SSuyog Pawar #define NUM_BAND_TABLE 32
57*c83a76b0SSuyog Pawar
58*c83a76b0SSuyog Pawar const WORD32 gi4_ihevc_table_edge_idx[5] = { 1, 2, 0, 3, 4 };
59*c83a76b0SSuyog Pawar /**
60*c83a76b0SSuyog Pawar * au4_avail is an array of flags - one for each neighboring block specifying if the block is available
61*c83a76b0SSuyog Pawar * au4_avail[0] - left
62*c83a76b0SSuyog Pawar * au4_avail[1] - right
63*c83a76b0SSuyog Pawar * au4_avail[2] - top
64*c83a76b0SSuyog Pawar * au4_avail[3] - bottom
65*c83a76b0SSuyog Pawar * au4_avail[4] - top-left
66*c83a76b0SSuyog Pawar * au4_avail[5] - top-right
67*c83a76b0SSuyog Pawar * au4_avail[6] - bottom-left
68*c83a76b0SSuyog Pawar * au4_avail[7] - bottom-right
69*c83a76b0SSuyog Pawar */
70*c83a76b0SSuyog Pawar
71*c83a76b0SSuyog Pawar
ihevc_sao_band_offset_luma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,WORD32 sao_band_pos,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)72*c83a76b0SSuyog Pawar void ihevc_sao_band_offset_luma(UWORD8 *pu1_src,
73*c83a76b0SSuyog Pawar WORD32 src_strd,
74*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left,
75*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top,
76*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_left,
77*c83a76b0SSuyog Pawar WORD32 sao_band_pos,
78*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset,
79*c83a76b0SSuyog Pawar WORD32 wd,
80*c83a76b0SSuyog Pawar WORD32 ht)
81*c83a76b0SSuyog Pawar {
82*c83a76b0SSuyog Pawar WORD32 band_shift;
83*c83a76b0SSuyog Pawar WORD32 band_table[NUM_BAND_TABLE];
84*c83a76b0SSuyog Pawar WORD32 i;
85*c83a76b0SSuyog Pawar WORD32 row, col;
86*c83a76b0SSuyog Pawar
87*c83a76b0SSuyog Pawar /* Updating left and top and top-left */
88*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
89*c83a76b0SSuyog Pawar {
90*c83a76b0SSuyog Pawar pu1_src_left[row] = pu1_src[row * src_strd + (wd - 1)];
91*c83a76b0SSuyog Pawar }
92*c83a76b0SSuyog Pawar pu1_src_top_left[0] = pu1_src_top[wd - 1];
93*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
94*c83a76b0SSuyog Pawar {
95*c83a76b0SSuyog Pawar pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
96*c83a76b0SSuyog Pawar }
97*c83a76b0SSuyog Pawar
98*c83a76b0SSuyog Pawar band_shift = BIT_DEPTH_LUMA - 5;
99*c83a76b0SSuyog Pawar for(i = 0; i < NUM_BAND_TABLE; i++)
100*c83a76b0SSuyog Pawar {
101*c83a76b0SSuyog Pawar band_table[i] = 0;
102*c83a76b0SSuyog Pawar }
103*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
104*c83a76b0SSuyog Pawar {
105*c83a76b0SSuyog Pawar band_table[(i + sao_band_pos) & 31] = i + 1;
106*c83a76b0SSuyog Pawar }
107*c83a76b0SSuyog Pawar
108*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
109*c83a76b0SSuyog Pawar {
110*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
111*c83a76b0SSuyog Pawar {
112*c83a76b0SSuyog Pawar WORD32 band_idx;
113*c83a76b0SSuyog Pawar
114*c83a76b0SSuyog Pawar band_idx = band_table[pu1_src[col] >> band_shift];
115*c83a76b0SSuyog Pawar pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
116*c83a76b0SSuyog Pawar }
117*c83a76b0SSuyog Pawar pu1_src += src_strd;
118*c83a76b0SSuyog Pawar }
119*c83a76b0SSuyog Pawar }
120*c83a76b0SSuyog Pawar
121*c83a76b0SSuyog Pawar
122*c83a76b0SSuyog Pawar
123*c83a76b0SSuyog Pawar /* input 'wd' has to be for the interleaved block and not for each color component */
ihevc_sao_band_offset_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,WORD32 sao_band_pos_u,WORD32 sao_band_pos_v,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)124*c83a76b0SSuyog Pawar void ihevc_sao_band_offset_chroma(UWORD8 *pu1_src,
125*c83a76b0SSuyog Pawar WORD32 src_strd,
126*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left,
127*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top,
128*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_left,
129*c83a76b0SSuyog Pawar WORD32 sao_band_pos_u,
130*c83a76b0SSuyog Pawar WORD32 sao_band_pos_v,
131*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset_u,
132*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset_v,
133*c83a76b0SSuyog Pawar WORD32 wd,
134*c83a76b0SSuyog Pawar WORD32 ht)
135*c83a76b0SSuyog Pawar {
136*c83a76b0SSuyog Pawar WORD32 band_shift;
137*c83a76b0SSuyog Pawar WORD32 band_table_u[NUM_BAND_TABLE];
138*c83a76b0SSuyog Pawar WORD32 band_table_v[NUM_BAND_TABLE];
139*c83a76b0SSuyog Pawar WORD32 i;
140*c83a76b0SSuyog Pawar WORD32 row, col;
141*c83a76b0SSuyog Pawar
142*c83a76b0SSuyog Pawar /* Updating left and top and top-left */
143*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
144*c83a76b0SSuyog Pawar {
145*c83a76b0SSuyog Pawar pu1_src_left[2 * row] = pu1_src[row * src_strd + (wd - 2)];
146*c83a76b0SSuyog Pawar pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + (wd - 1)];
147*c83a76b0SSuyog Pawar }
148*c83a76b0SSuyog Pawar pu1_src_top_left[0] = pu1_src_top[wd - 2];
149*c83a76b0SSuyog Pawar pu1_src_top_left[1] = pu1_src_top[wd - 1];
150*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
151*c83a76b0SSuyog Pawar {
152*c83a76b0SSuyog Pawar pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
153*c83a76b0SSuyog Pawar }
154*c83a76b0SSuyog Pawar
155*c83a76b0SSuyog Pawar
156*c83a76b0SSuyog Pawar band_shift = BIT_DEPTH_CHROMA - 5;
157*c83a76b0SSuyog Pawar for(i = 0; i < NUM_BAND_TABLE; i++)
158*c83a76b0SSuyog Pawar {
159*c83a76b0SSuyog Pawar band_table_u[i] = 0;
160*c83a76b0SSuyog Pawar band_table_v[i] = 0;
161*c83a76b0SSuyog Pawar }
162*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
163*c83a76b0SSuyog Pawar {
164*c83a76b0SSuyog Pawar band_table_u[(i + sao_band_pos_u) & 31] = i + 1;
165*c83a76b0SSuyog Pawar band_table_v[(i + sao_band_pos_v) & 31] = i + 1;
166*c83a76b0SSuyog Pawar }
167*c83a76b0SSuyog Pawar
168*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
169*c83a76b0SSuyog Pawar {
170*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
171*c83a76b0SSuyog Pawar {
172*c83a76b0SSuyog Pawar WORD32 band_idx;
173*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset;
174*c83a76b0SSuyog Pawar
175*c83a76b0SSuyog Pawar pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
176*c83a76b0SSuyog Pawar band_idx = (0 == col % 2) ? band_table_u[pu1_src[col] >> band_shift] : band_table_v[pu1_src[col] >> band_shift];
177*c83a76b0SSuyog Pawar pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
178*c83a76b0SSuyog Pawar }
179*c83a76b0SSuyog Pawar pu1_src += src_strd;
180*c83a76b0SSuyog Pawar }
181*c83a76b0SSuyog Pawar }
182*c83a76b0SSuyog Pawar
183*c83a76b0SSuyog Pawar
184*c83a76b0SSuyog Pawar
185*c83a76b0SSuyog Pawar /* Horizontal filtering */
ihevc_sao_edge_offset_class0(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)186*c83a76b0SSuyog Pawar void ihevc_sao_edge_offset_class0(UWORD8 *pu1_src,
187*c83a76b0SSuyog Pawar WORD32 src_strd,
188*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left,
189*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top,
190*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_left,
191*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_right,
192*c83a76b0SSuyog Pawar UWORD8 *pu1_src_bot_left,
193*c83a76b0SSuyog Pawar UWORD8 *pu1_avail,
194*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset,
195*c83a76b0SSuyog Pawar WORD32 wd,
196*c83a76b0SSuyog Pawar WORD32 ht)
197*c83a76b0SSuyog Pawar {
198*c83a76b0SSuyog Pawar WORD32 row, col;
199*c83a76b0SSuyog Pawar UWORD8 au1_mask[MAX_CTB_SIZE];
200*c83a76b0SSuyog Pawar UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
201*c83a76b0SSuyog Pawar WORD8 u1_sign_left, u1_sign_right;
202*c83a76b0SSuyog Pawar WORD32 bit_depth;
203*c83a76b0SSuyog Pawar UNUSED(pu1_src_top_right);
204*c83a76b0SSuyog Pawar UNUSED(pu1_src_bot_left);
205*c83a76b0SSuyog Pawar bit_depth = BIT_DEPTH_LUMA;
206*c83a76b0SSuyog Pawar
207*c83a76b0SSuyog Pawar /* Initialize the mask values */
208*c83a76b0SSuyog Pawar memset(au1_mask, 0xFF, MAX_CTB_SIZE);
209*c83a76b0SSuyog Pawar
210*c83a76b0SSuyog Pawar /* Update top and top-left arrays */
211*c83a76b0SSuyog Pawar *pu1_src_top_left = pu1_src_top[wd - 1];
212*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
213*c83a76b0SSuyog Pawar {
214*c83a76b0SSuyog Pawar au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
215*c83a76b0SSuyog Pawar }
216*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
217*c83a76b0SSuyog Pawar {
218*c83a76b0SSuyog Pawar pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
219*c83a76b0SSuyog Pawar }
220*c83a76b0SSuyog Pawar
221*c83a76b0SSuyog Pawar /* Update masks based on the availability flags */
222*c83a76b0SSuyog Pawar if(0 == pu1_avail[0])
223*c83a76b0SSuyog Pawar {
224*c83a76b0SSuyog Pawar au1_mask[0] = 0;
225*c83a76b0SSuyog Pawar }
226*c83a76b0SSuyog Pawar if(0 == pu1_avail[1])
227*c83a76b0SSuyog Pawar {
228*c83a76b0SSuyog Pawar au1_mask[wd - 1] = 0;
229*c83a76b0SSuyog Pawar }
230*c83a76b0SSuyog Pawar
231*c83a76b0SSuyog Pawar /* Processing is done on the intermediate buffer and the output is written to the source buffer */
232*c83a76b0SSuyog Pawar {
233*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
234*c83a76b0SSuyog Pawar {
235*c83a76b0SSuyog Pawar u1_sign_left = SIGN(pu1_src[0] - pu1_src_left[row]);
236*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
237*c83a76b0SSuyog Pawar {
238*c83a76b0SSuyog Pawar WORD32 edge_idx;
239*c83a76b0SSuyog Pawar
240*c83a76b0SSuyog Pawar u1_sign_right = SIGN(pu1_src[col] - pu1_src[col + 1]);
241*c83a76b0SSuyog Pawar edge_idx = 2 + u1_sign_left + u1_sign_right;
242*c83a76b0SSuyog Pawar u1_sign_left = -u1_sign_right;
243*c83a76b0SSuyog Pawar
244*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
245*c83a76b0SSuyog Pawar
246*c83a76b0SSuyog Pawar if(0 != edge_idx)
247*c83a76b0SSuyog Pawar {
248*c83a76b0SSuyog Pawar pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
249*c83a76b0SSuyog Pawar }
250*c83a76b0SSuyog Pawar }
251*c83a76b0SSuyog Pawar
252*c83a76b0SSuyog Pawar pu1_src += src_strd;
253*c83a76b0SSuyog Pawar }
254*c83a76b0SSuyog Pawar }
255*c83a76b0SSuyog Pawar
256*c83a76b0SSuyog Pawar /* Update left array */
257*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
258*c83a76b0SSuyog Pawar {
259*c83a76b0SSuyog Pawar pu1_src_left[row] = au1_src_left_tmp[row];
260*c83a76b0SSuyog Pawar }
261*c83a76b0SSuyog Pawar
262*c83a76b0SSuyog Pawar }
263*c83a76b0SSuyog Pawar
264*c83a76b0SSuyog Pawar
265*c83a76b0SSuyog Pawar
266*c83a76b0SSuyog Pawar
267*c83a76b0SSuyog Pawar /* input 'wd' has to be for the interleaved block and not for each color component */
ihevc_sao_edge_offset_class0_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)268*c83a76b0SSuyog Pawar void ihevc_sao_edge_offset_class0_chroma(UWORD8 *pu1_src,
269*c83a76b0SSuyog Pawar WORD32 src_strd,
270*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left,
271*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top,
272*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_left,
273*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_right,
274*c83a76b0SSuyog Pawar UWORD8 *pu1_src_bot_left,
275*c83a76b0SSuyog Pawar UWORD8 *pu1_avail,
276*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset_u,
277*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset_v,
278*c83a76b0SSuyog Pawar WORD32 wd,
279*c83a76b0SSuyog Pawar WORD32 ht)
280*c83a76b0SSuyog Pawar {
281*c83a76b0SSuyog Pawar WORD32 row, col;
282*c83a76b0SSuyog Pawar UWORD8 au1_mask[MAX_CTB_SIZE];
283*c83a76b0SSuyog Pawar UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE];
284*c83a76b0SSuyog Pawar WORD8 u1_sign_left_u, u1_sign_right_u;
285*c83a76b0SSuyog Pawar WORD8 u1_sign_left_v, u1_sign_right_v;
286*c83a76b0SSuyog Pawar WORD32 bit_depth;
287*c83a76b0SSuyog Pawar UNUSED(pu1_src_top_right);
288*c83a76b0SSuyog Pawar UNUSED(pu1_src_bot_left);
289*c83a76b0SSuyog Pawar bit_depth = BIT_DEPTH_CHROMA;
290*c83a76b0SSuyog Pawar
291*c83a76b0SSuyog Pawar /* Initialize the mask values */
292*c83a76b0SSuyog Pawar memset(au1_mask, 0xFF, MAX_CTB_SIZE);
293*c83a76b0SSuyog Pawar
294*c83a76b0SSuyog Pawar /* Update left, top and top-left arrays */
295*c83a76b0SSuyog Pawar pu1_src_top_left[0] = pu1_src_top[wd - 2];
296*c83a76b0SSuyog Pawar pu1_src_top_left[1] = pu1_src_top[wd - 1];
297*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
298*c83a76b0SSuyog Pawar {
299*c83a76b0SSuyog Pawar au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
300*c83a76b0SSuyog Pawar au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
301*c83a76b0SSuyog Pawar }
302*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
303*c83a76b0SSuyog Pawar {
304*c83a76b0SSuyog Pawar pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
305*c83a76b0SSuyog Pawar }
306*c83a76b0SSuyog Pawar
307*c83a76b0SSuyog Pawar /* Update masks based on the availability flags */
308*c83a76b0SSuyog Pawar if(0 == pu1_avail[0])
309*c83a76b0SSuyog Pawar {
310*c83a76b0SSuyog Pawar au1_mask[0] = 0;
311*c83a76b0SSuyog Pawar }
312*c83a76b0SSuyog Pawar if(0 == pu1_avail[1])
313*c83a76b0SSuyog Pawar {
314*c83a76b0SSuyog Pawar au1_mask[(wd - 1) >> 1] = 0;
315*c83a76b0SSuyog Pawar }
316*c83a76b0SSuyog Pawar
317*c83a76b0SSuyog Pawar /* Processing is done on the intermediate buffer and the output is written to the source buffer */
318*c83a76b0SSuyog Pawar {
319*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
320*c83a76b0SSuyog Pawar {
321*c83a76b0SSuyog Pawar u1_sign_left_u = SIGN(pu1_src[0] - pu1_src_left[2 * row]);
322*c83a76b0SSuyog Pawar u1_sign_left_v = SIGN(pu1_src[1] - pu1_src_left[2 * row + 1]);
323*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
324*c83a76b0SSuyog Pawar {
325*c83a76b0SSuyog Pawar WORD32 edge_idx;
326*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset;
327*c83a76b0SSuyog Pawar
328*c83a76b0SSuyog Pawar if(0 == col % 2)
329*c83a76b0SSuyog Pawar {
330*c83a76b0SSuyog Pawar pi1_sao_offset = pi1_sao_offset_u;
331*c83a76b0SSuyog Pawar u1_sign_right_u = SIGN(pu1_src[col] - pu1_src[col + 2]);
332*c83a76b0SSuyog Pawar edge_idx = 2 + u1_sign_left_u + u1_sign_right_u;
333*c83a76b0SSuyog Pawar u1_sign_left_u = -u1_sign_right_u;
334*c83a76b0SSuyog Pawar }
335*c83a76b0SSuyog Pawar else
336*c83a76b0SSuyog Pawar {
337*c83a76b0SSuyog Pawar pi1_sao_offset = pi1_sao_offset_v;
338*c83a76b0SSuyog Pawar u1_sign_right_v = SIGN(pu1_src[col] - pu1_src[col + 2]);
339*c83a76b0SSuyog Pawar edge_idx = 2 + u1_sign_left_v + u1_sign_right_v;
340*c83a76b0SSuyog Pawar u1_sign_left_v = -u1_sign_right_v;
341*c83a76b0SSuyog Pawar }
342*c83a76b0SSuyog Pawar
343*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
344*c83a76b0SSuyog Pawar
345*c83a76b0SSuyog Pawar if(0 != edge_idx)
346*c83a76b0SSuyog Pawar {
347*c83a76b0SSuyog Pawar pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
348*c83a76b0SSuyog Pawar }
349*c83a76b0SSuyog Pawar }
350*c83a76b0SSuyog Pawar
351*c83a76b0SSuyog Pawar pu1_src += src_strd;
352*c83a76b0SSuyog Pawar }
353*c83a76b0SSuyog Pawar }
354*c83a76b0SSuyog Pawar
355*c83a76b0SSuyog Pawar for(row = 0; row < 2 * ht; row++)
356*c83a76b0SSuyog Pawar {
357*c83a76b0SSuyog Pawar pu1_src_left[row] = au1_src_left_tmp[row];
358*c83a76b0SSuyog Pawar }
359*c83a76b0SSuyog Pawar
360*c83a76b0SSuyog Pawar }
361*c83a76b0SSuyog Pawar
362*c83a76b0SSuyog Pawar
363*c83a76b0SSuyog Pawar
364*c83a76b0SSuyog Pawar /* Vertical filtering */
ihevc_sao_edge_offset_class1(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)365*c83a76b0SSuyog Pawar void ihevc_sao_edge_offset_class1(UWORD8 *pu1_src,
366*c83a76b0SSuyog Pawar WORD32 src_strd,
367*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left,
368*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top,
369*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_left,
370*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_right,
371*c83a76b0SSuyog Pawar UWORD8 *pu1_src_bot_left,
372*c83a76b0SSuyog Pawar UWORD8 *pu1_avail,
373*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset,
374*c83a76b0SSuyog Pawar WORD32 wd,
375*c83a76b0SSuyog Pawar WORD32 ht)
376*c83a76b0SSuyog Pawar {
377*c83a76b0SSuyog Pawar WORD32 row, col;
378*c83a76b0SSuyog Pawar UWORD8 au1_mask[MAX_CTB_SIZE];
379*c83a76b0SSuyog Pawar UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
380*c83a76b0SSuyog Pawar WORD8 au1_sign_up[MAX_CTB_SIZE];
381*c83a76b0SSuyog Pawar WORD8 u1_sign_down;
382*c83a76b0SSuyog Pawar WORD32 bit_depth;
383*c83a76b0SSuyog Pawar UNUSED(pu1_src_top_right);
384*c83a76b0SSuyog Pawar UNUSED(pu1_src_bot_left);
385*c83a76b0SSuyog Pawar
386*c83a76b0SSuyog Pawar bit_depth = BIT_DEPTH_LUMA;
387*c83a76b0SSuyog Pawar
388*c83a76b0SSuyog Pawar /* Initialize the mask values */
389*c83a76b0SSuyog Pawar memset(au1_mask, 0xFF, MAX_CTB_SIZE);
390*c83a76b0SSuyog Pawar
391*c83a76b0SSuyog Pawar /* Update left, top and top-left arrays */
392*c83a76b0SSuyog Pawar *pu1_src_top_left = pu1_src_top[wd - 1];
393*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
394*c83a76b0SSuyog Pawar {
395*c83a76b0SSuyog Pawar pu1_src_left[row] = pu1_src[row * src_strd + wd - 1];
396*c83a76b0SSuyog Pawar }
397*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
398*c83a76b0SSuyog Pawar {
399*c83a76b0SSuyog Pawar au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
400*c83a76b0SSuyog Pawar }
401*c83a76b0SSuyog Pawar
402*c83a76b0SSuyog Pawar /* Update height and source pointers based on the availability flags */
403*c83a76b0SSuyog Pawar if(0 == pu1_avail[2])
404*c83a76b0SSuyog Pawar {
405*c83a76b0SSuyog Pawar pu1_src += src_strd;
406*c83a76b0SSuyog Pawar ht--;
407*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
408*c83a76b0SSuyog Pawar {
409*c83a76b0SSuyog Pawar au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
410*c83a76b0SSuyog Pawar }
411*c83a76b0SSuyog Pawar }
412*c83a76b0SSuyog Pawar else
413*c83a76b0SSuyog Pawar {
414*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
415*c83a76b0SSuyog Pawar {
416*c83a76b0SSuyog Pawar au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
417*c83a76b0SSuyog Pawar }
418*c83a76b0SSuyog Pawar }
419*c83a76b0SSuyog Pawar if(0 == pu1_avail[3])
420*c83a76b0SSuyog Pawar {
421*c83a76b0SSuyog Pawar ht--;
422*c83a76b0SSuyog Pawar }
423*c83a76b0SSuyog Pawar
424*c83a76b0SSuyog Pawar /* Processing is done on the intermediate buffer and the output is written to the source buffer */
425*c83a76b0SSuyog Pawar {
426*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
427*c83a76b0SSuyog Pawar {
428*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
429*c83a76b0SSuyog Pawar {
430*c83a76b0SSuyog Pawar WORD32 edge_idx;
431*c83a76b0SSuyog Pawar
432*c83a76b0SSuyog Pawar u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
433*c83a76b0SSuyog Pawar edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
434*c83a76b0SSuyog Pawar au1_sign_up[col] = -u1_sign_down;
435*c83a76b0SSuyog Pawar
436*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
437*c83a76b0SSuyog Pawar
438*c83a76b0SSuyog Pawar if(0 != edge_idx)
439*c83a76b0SSuyog Pawar {
440*c83a76b0SSuyog Pawar pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
441*c83a76b0SSuyog Pawar }
442*c83a76b0SSuyog Pawar }
443*c83a76b0SSuyog Pawar
444*c83a76b0SSuyog Pawar pu1_src += src_strd;
445*c83a76b0SSuyog Pawar }
446*c83a76b0SSuyog Pawar }
447*c83a76b0SSuyog Pawar
448*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
449*c83a76b0SSuyog Pawar {
450*c83a76b0SSuyog Pawar pu1_src_top[col] = au1_src_top_tmp[col];
451*c83a76b0SSuyog Pawar }
452*c83a76b0SSuyog Pawar
453*c83a76b0SSuyog Pawar }
454*c83a76b0SSuyog Pawar
455*c83a76b0SSuyog Pawar
456*c83a76b0SSuyog Pawar
457*c83a76b0SSuyog Pawar /* input 'wd' has to be for the interleaved block and not for each color component */
ihevc_sao_edge_offset_class1_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)458*c83a76b0SSuyog Pawar void ihevc_sao_edge_offset_class1_chroma(UWORD8 *pu1_src,
459*c83a76b0SSuyog Pawar WORD32 src_strd,
460*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left,
461*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top,
462*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_left,
463*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_right,
464*c83a76b0SSuyog Pawar UWORD8 *pu1_src_bot_left,
465*c83a76b0SSuyog Pawar UWORD8 *pu1_avail,
466*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset_u,
467*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset_v,
468*c83a76b0SSuyog Pawar WORD32 wd,
469*c83a76b0SSuyog Pawar WORD32 ht)
470*c83a76b0SSuyog Pawar {
471*c83a76b0SSuyog Pawar WORD32 row, col;
472*c83a76b0SSuyog Pawar UWORD8 au1_mask[MAX_CTB_SIZE];
473*c83a76b0SSuyog Pawar UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
474*c83a76b0SSuyog Pawar WORD8 au1_sign_up[MAX_CTB_SIZE];
475*c83a76b0SSuyog Pawar WORD8 u1_sign_down;
476*c83a76b0SSuyog Pawar WORD32 bit_depth;
477*c83a76b0SSuyog Pawar UNUSED(pu1_src_top_right);
478*c83a76b0SSuyog Pawar UNUSED(pu1_src_bot_left);
479*c83a76b0SSuyog Pawar
480*c83a76b0SSuyog Pawar bit_depth = BIT_DEPTH_CHROMA;
481*c83a76b0SSuyog Pawar
482*c83a76b0SSuyog Pawar /* Initialize the mask values */
483*c83a76b0SSuyog Pawar memset(au1_mask, 0xFF, MAX_CTB_SIZE);
484*c83a76b0SSuyog Pawar
485*c83a76b0SSuyog Pawar /* Update left, top and top-left arrays */
486*c83a76b0SSuyog Pawar pu1_src_top_left[0] = pu1_src_top[wd - 2];
487*c83a76b0SSuyog Pawar pu1_src_top_left[1] = pu1_src_top[wd - 1];
488*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
489*c83a76b0SSuyog Pawar {
490*c83a76b0SSuyog Pawar pu1_src_left[2 * row] = pu1_src[row * src_strd + wd - 2];
491*c83a76b0SSuyog Pawar pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
492*c83a76b0SSuyog Pawar }
493*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
494*c83a76b0SSuyog Pawar {
495*c83a76b0SSuyog Pawar au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
496*c83a76b0SSuyog Pawar }
497*c83a76b0SSuyog Pawar
498*c83a76b0SSuyog Pawar /* Update height and source pointers based on the availability flags */
499*c83a76b0SSuyog Pawar if(0 == pu1_avail[2])
500*c83a76b0SSuyog Pawar {
501*c83a76b0SSuyog Pawar pu1_src += src_strd;
502*c83a76b0SSuyog Pawar ht--;
503*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
504*c83a76b0SSuyog Pawar {
505*c83a76b0SSuyog Pawar au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
506*c83a76b0SSuyog Pawar }
507*c83a76b0SSuyog Pawar }
508*c83a76b0SSuyog Pawar else
509*c83a76b0SSuyog Pawar {
510*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
511*c83a76b0SSuyog Pawar {
512*c83a76b0SSuyog Pawar au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
513*c83a76b0SSuyog Pawar }
514*c83a76b0SSuyog Pawar }
515*c83a76b0SSuyog Pawar if(0 == pu1_avail[3])
516*c83a76b0SSuyog Pawar {
517*c83a76b0SSuyog Pawar ht--;
518*c83a76b0SSuyog Pawar }
519*c83a76b0SSuyog Pawar
520*c83a76b0SSuyog Pawar /* Processing is done on the intermediate buffer and the output is written to the source buffer */
521*c83a76b0SSuyog Pawar {
522*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
523*c83a76b0SSuyog Pawar {
524*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
525*c83a76b0SSuyog Pawar {
526*c83a76b0SSuyog Pawar WORD32 edge_idx;
527*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset;
528*c83a76b0SSuyog Pawar
529*c83a76b0SSuyog Pawar pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
530*c83a76b0SSuyog Pawar
531*c83a76b0SSuyog Pawar u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
532*c83a76b0SSuyog Pawar edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
533*c83a76b0SSuyog Pawar au1_sign_up[col] = -u1_sign_down;
534*c83a76b0SSuyog Pawar
535*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
536*c83a76b0SSuyog Pawar
537*c83a76b0SSuyog Pawar if(0 != edge_idx)
538*c83a76b0SSuyog Pawar {
539*c83a76b0SSuyog Pawar pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
540*c83a76b0SSuyog Pawar }
541*c83a76b0SSuyog Pawar }
542*c83a76b0SSuyog Pawar
543*c83a76b0SSuyog Pawar pu1_src += src_strd;
544*c83a76b0SSuyog Pawar }
545*c83a76b0SSuyog Pawar }
546*c83a76b0SSuyog Pawar
547*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
548*c83a76b0SSuyog Pawar {
549*c83a76b0SSuyog Pawar pu1_src_top[col] = au1_src_top_tmp[col];
550*c83a76b0SSuyog Pawar }
551*c83a76b0SSuyog Pawar
552*c83a76b0SSuyog Pawar }
553*c83a76b0SSuyog Pawar
554*c83a76b0SSuyog Pawar
555*c83a76b0SSuyog Pawar
556*c83a76b0SSuyog Pawar /* 135 degree filtering */
ihevc_sao_edge_offset_class2(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)557*c83a76b0SSuyog Pawar void ihevc_sao_edge_offset_class2(UWORD8 *pu1_src,
558*c83a76b0SSuyog Pawar WORD32 src_strd,
559*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left,
560*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top,
561*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_left,
562*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_right,
563*c83a76b0SSuyog Pawar UWORD8 *pu1_src_bot_left,
564*c83a76b0SSuyog Pawar UWORD8 *pu1_avail,
565*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset,
566*c83a76b0SSuyog Pawar WORD32 wd,
567*c83a76b0SSuyog Pawar WORD32 ht)
568*c83a76b0SSuyog Pawar {
569*c83a76b0SSuyog Pawar WORD32 row, col;
570*c83a76b0SSuyog Pawar UWORD8 au1_mask[MAX_CTB_SIZE];
571*c83a76b0SSuyog Pawar UWORD8 au1_src_left_tmp[MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
572*c83a76b0SSuyog Pawar UWORD8 u1_src_top_left_tmp;
573*c83a76b0SSuyog Pawar WORD8 au1_sign_up[MAX_CTB_SIZE + 1], au1_sign_up_tmp[MAX_CTB_SIZE + 1];
574*c83a76b0SSuyog Pawar WORD8 u1_sign_down;
575*c83a76b0SSuyog Pawar WORD8 *pu1_sign_up;
576*c83a76b0SSuyog Pawar WORD8 *pu1_sign_up_tmp;
577*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left_cpy;
578*c83a76b0SSuyog Pawar
579*c83a76b0SSuyog Pawar WORD32 bit_depth;
580*c83a76b0SSuyog Pawar UWORD8 u1_pos_0_0_tmp;
581*c83a76b0SSuyog Pawar UWORD8 u1_pos_wd_ht_tmp;
582*c83a76b0SSuyog Pawar UNUSED(pu1_src_top_right);
583*c83a76b0SSuyog Pawar UNUSED(pu1_src_bot_left);
584*c83a76b0SSuyog Pawar
585*c83a76b0SSuyog Pawar bit_depth = BIT_DEPTH_LUMA;
586*c83a76b0SSuyog Pawar pu1_sign_up = au1_sign_up;
587*c83a76b0SSuyog Pawar pu1_sign_up_tmp = au1_sign_up_tmp;
588*c83a76b0SSuyog Pawar pu1_src_left_cpy = pu1_src_left;
589*c83a76b0SSuyog Pawar
590*c83a76b0SSuyog Pawar /* Initialize the mask values */
591*c83a76b0SSuyog Pawar memset(au1_mask, 0xFF, MAX_CTB_SIZE);
592*c83a76b0SSuyog Pawar
593*c83a76b0SSuyog Pawar /* Update left, top and top-left arrays */
594*c83a76b0SSuyog Pawar u1_src_top_left_tmp = pu1_src_top[wd - 1];
595*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
596*c83a76b0SSuyog Pawar {
597*c83a76b0SSuyog Pawar au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
598*c83a76b0SSuyog Pawar }
599*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
600*c83a76b0SSuyog Pawar {
601*c83a76b0SSuyog Pawar au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
602*c83a76b0SSuyog Pawar }
603*c83a76b0SSuyog Pawar
604*c83a76b0SSuyog Pawar
605*c83a76b0SSuyog Pawar /* If top-left is available, process separately */
606*c83a76b0SSuyog Pawar if(0 != pu1_avail[4])
607*c83a76b0SSuyog Pawar {
608*c83a76b0SSuyog Pawar WORD32 edge_idx;
609*c83a76b0SSuyog Pawar
610*c83a76b0SSuyog Pawar edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
611*c83a76b0SSuyog Pawar SIGN(pu1_src[0] - pu1_src[1 + src_strd]);
612*c83a76b0SSuyog Pawar
613*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
614*c83a76b0SSuyog Pawar
615*c83a76b0SSuyog Pawar if(0 != edge_idx)
616*c83a76b0SSuyog Pawar {
617*c83a76b0SSuyog Pawar u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
618*c83a76b0SSuyog Pawar }
619*c83a76b0SSuyog Pawar else
620*c83a76b0SSuyog Pawar {
621*c83a76b0SSuyog Pawar u1_pos_0_0_tmp = pu1_src[0];
622*c83a76b0SSuyog Pawar }
623*c83a76b0SSuyog Pawar }
624*c83a76b0SSuyog Pawar else
625*c83a76b0SSuyog Pawar {
626*c83a76b0SSuyog Pawar u1_pos_0_0_tmp = pu1_src[0];
627*c83a76b0SSuyog Pawar }
628*c83a76b0SSuyog Pawar
629*c83a76b0SSuyog Pawar /* If bottom-right is available, process separately */
630*c83a76b0SSuyog Pawar if(0 != pu1_avail[7])
631*c83a76b0SSuyog Pawar {
632*c83a76b0SSuyog Pawar WORD32 edge_idx;
633*c83a76b0SSuyog Pawar
634*c83a76b0SSuyog Pawar edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 1 - src_strd]) +
635*c83a76b0SSuyog Pawar SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 1 + src_strd]);
636*c83a76b0SSuyog Pawar
637*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
638*c83a76b0SSuyog Pawar
639*c83a76b0SSuyog Pawar if(0 != edge_idx)
640*c83a76b0SSuyog Pawar {
641*c83a76b0SSuyog Pawar u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
642*c83a76b0SSuyog Pawar }
643*c83a76b0SSuyog Pawar else
644*c83a76b0SSuyog Pawar {
645*c83a76b0SSuyog Pawar u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
646*c83a76b0SSuyog Pawar }
647*c83a76b0SSuyog Pawar }
648*c83a76b0SSuyog Pawar else
649*c83a76b0SSuyog Pawar {
650*c83a76b0SSuyog Pawar u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
651*c83a76b0SSuyog Pawar }
652*c83a76b0SSuyog Pawar
653*c83a76b0SSuyog Pawar /* If Left is not available */
654*c83a76b0SSuyog Pawar if(0 == pu1_avail[0])
655*c83a76b0SSuyog Pawar {
656*c83a76b0SSuyog Pawar au1_mask[0] = 0;
657*c83a76b0SSuyog Pawar }
658*c83a76b0SSuyog Pawar
659*c83a76b0SSuyog Pawar /* If Top is not available */
660*c83a76b0SSuyog Pawar if(0 == pu1_avail[2])
661*c83a76b0SSuyog Pawar {
662*c83a76b0SSuyog Pawar pu1_src += src_strd;
663*c83a76b0SSuyog Pawar ht--;
664*c83a76b0SSuyog Pawar pu1_src_left_cpy += 1;
665*c83a76b0SSuyog Pawar for(col = 1; col < wd; col++)
666*c83a76b0SSuyog Pawar {
667*c83a76b0SSuyog Pawar pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 1 - src_strd]);
668*c83a76b0SSuyog Pawar }
669*c83a76b0SSuyog Pawar }
670*c83a76b0SSuyog Pawar else
671*c83a76b0SSuyog Pawar {
672*c83a76b0SSuyog Pawar for(col = 1; col < wd; col++)
673*c83a76b0SSuyog Pawar {
674*c83a76b0SSuyog Pawar pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 1]);
675*c83a76b0SSuyog Pawar }
676*c83a76b0SSuyog Pawar }
677*c83a76b0SSuyog Pawar
678*c83a76b0SSuyog Pawar /* If Right is not available */
679*c83a76b0SSuyog Pawar if(0 == pu1_avail[1])
680*c83a76b0SSuyog Pawar {
681*c83a76b0SSuyog Pawar au1_mask[wd - 1] = 0;
682*c83a76b0SSuyog Pawar }
683*c83a76b0SSuyog Pawar
684*c83a76b0SSuyog Pawar /* If Bottom is not available */
685*c83a76b0SSuyog Pawar if(0 == pu1_avail[3])
686*c83a76b0SSuyog Pawar {
687*c83a76b0SSuyog Pawar ht--;
688*c83a76b0SSuyog Pawar }
689*c83a76b0SSuyog Pawar
690*c83a76b0SSuyog Pawar /* Processing is done on the intermediate buffer and the output is written to the source buffer */
691*c83a76b0SSuyog Pawar {
692*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
693*c83a76b0SSuyog Pawar {
694*c83a76b0SSuyog Pawar pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[row - 1]);
695*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
696*c83a76b0SSuyog Pawar {
697*c83a76b0SSuyog Pawar WORD32 edge_idx;
698*c83a76b0SSuyog Pawar
699*c83a76b0SSuyog Pawar u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 1 + src_strd]);
700*c83a76b0SSuyog Pawar edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
701*c83a76b0SSuyog Pawar pu1_sign_up_tmp[col + 1] = -u1_sign_down;
702*c83a76b0SSuyog Pawar
703*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
704*c83a76b0SSuyog Pawar
705*c83a76b0SSuyog Pawar if(0 != edge_idx)
706*c83a76b0SSuyog Pawar {
707*c83a76b0SSuyog Pawar pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
708*c83a76b0SSuyog Pawar }
709*c83a76b0SSuyog Pawar }
710*c83a76b0SSuyog Pawar
711*c83a76b0SSuyog Pawar /* Swapping pu1_sign_up_tmp and pu1_sign_up */
712*c83a76b0SSuyog Pawar {
713*c83a76b0SSuyog Pawar WORD8 *pu1_swap_tmp = pu1_sign_up;
714*c83a76b0SSuyog Pawar pu1_sign_up = pu1_sign_up_tmp;
715*c83a76b0SSuyog Pawar pu1_sign_up_tmp = pu1_swap_tmp;
716*c83a76b0SSuyog Pawar }
717*c83a76b0SSuyog Pawar
718*c83a76b0SSuyog Pawar pu1_src += src_strd;
719*c83a76b0SSuyog Pawar }
720*c83a76b0SSuyog Pawar
721*c83a76b0SSuyog Pawar pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp;
722*c83a76b0SSuyog Pawar pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp;
723*c83a76b0SSuyog Pawar }
724*c83a76b0SSuyog Pawar
725*c83a76b0SSuyog Pawar if(0 == pu1_avail[2])
726*c83a76b0SSuyog Pawar ht++;
727*c83a76b0SSuyog Pawar if(0 == pu1_avail[3])
728*c83a76b0SSuyog Pawar ht++;
729*c83a76b0SSuyog Pawar *pu1_src_top_left = u1_src_top_left_tmp;
730*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
731*c83a76b0SSuyog Pawar {
732*c83a76b0SSuyog Pawar pu1_src_left[row] = au1_src_left_tmp[row];
733*c83a76b0SSuyog Pawar }
734*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
735*c83a76b0SSuyog Pawar {
736*c83a76b0SSuyog Pawar pu1_src_top[col] = au1_src_top_tmp[col];
737*c83a76b0SSuyog Pawar }
738*c83a76b0SSuyog Pawar
739*c83a76b0SSuyog Pawar }
740*c83a76b0SSuyog Pawar
741*c83a76b0SSuyog Pawar
742*c83a76b0SSuyog Pawar
743*c83a76b0SSuyog Pawar
744*c83a76b0SSuyog Pawar /* 135 degree filtering */
ihevc_sao_edge_offset_class2_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)745*c83a76b0SSuyog Pawar void ihevc_sao_edge_offset_class2_chroma(UWORD8 *pu1_src,
746*c83a76b0SSuyog Pawar WORD32 src_strd,
747*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left,
748*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top,
749*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_left,
750*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_right,
751*c83a76b0SSuyog Pawar UWORD8 *pu1_src_bot_left,
752*c83a76b0SSuyog Pawar UWORD8 *pu1_avail,
753*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset_u,
754*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset_v,
755*c83a76b0SSuyog Pawar WORD32 wd,
756*c83a76b0SSuyog Pawar WORD32 ht)
757*c83a76b0SSuyog Pawar {
758*c83a76b0SSuyog Pawar WORD32 row, col;
759*c83a76b0SSuyog Pawar UWORD8 au1_mask[MAX_CTB_SIZE];
760*c83a76b0SSuyog Pawar UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
761*c83a76b0SSuyog Pawar UWORD8 au1_src_top_left_tmp[2];
762*c83a76b0SSuyog Pawar WORD8 au1_sign_up[MAX_CTB_SIZE + 2], au1_sign_up_tmp[MAX_CTB_SIZE + 2];
763*c83a76b0SSuyog Pawar WORD8 u1_sign_down;
764*c83a76b0SSuyog Pawar WORD8 *pu1_sign_up;
765*c83a76b0SSuyog Pawar WORD8 *pu1_sign_up_tmp;
766*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left_cpy;
767*c83a76b0SSuyog Pawar
768*c83a76b0SSuyog Pawar WORD32 bit_depth;
769*c83a76b0SSuyog Pawar
770*c83a76b0SSuyog Pawar UWORD8 u1_pos_0_0_tmp_u;
771*c83a76b0SSuyog Pawar UWORD8 u1_pos_0_0_tmp_v;
772*c83a76b0SSuyog Pawar UWORD8 u1_pos_wd_ht_tmp_u;
773*c83a76b0SSuyog Pawar UWORD8 u1_pos_wd_ht_tmp_v;
774*c83a76b0SSuyog Pawar UNUSED(pu1_src_top_right);
775*c83a76b0SSuyog Pawar UNUSED(pu1_src_bot_left);
776*c83a76b0SSuyog Pawar
777*c83a76b0SSuyog Pawar
778*c83a76b0SSuyog Pawar bit_depth = BIT_DEPTH_CHROMA;
779*c83a76b0SSuyog Pawar pu1_sign_up = au1_sign_up;
780*c83a76b0SSuyog Pawar pu1_sign_up_tmp = au1_sign_up_tmp;
781*c83a76b0SSuyog Pawar pu1_src_left_cpy = pu1_src_left;
782*c83a76b0SSuyog Pawar
783*c83a76b0SSuyog Pawar /* Initialize the mask values */
784*c83a76b0SSuyog Pawar memset(au1_mask, 0xFF, MAX_CTB_SIZE);
785*c83a76b0SSuyog Pawar
786*c83a76b0SSuyog Pawar /* Update left, top and top-left arrays */
787*c83a76b0SSuyog Pawar au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
788*c83a76b0SSuyog Pawar au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
789*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
790*c83a76b0SSuyog Pawar {
791*c83a76b0SSuyog Pawar au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
792*c83a76b0SSuyog Pawar au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
793*c83a76b0SSuyog Pawar }
794*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
795*c83a76b0SSuyog Pawar {
796*c83a76b0SSuyog Pawar au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
797*c83a76b0SSuyog Pawar }
798*c83a76b0SSuyog Pawar
799*c83a76b0SSuyog Pawar
800*c83a76b0SSuyog Pawar /* If top-left is available, process separately */
801*c83a76b0SSuyog Pawar if(0 != pu1_avail[4])
802*c83a76b0SSuyog Pawar {
803*c83a76b0SSuyog Pawar WORD32 edge_idx;
804*c83a76b0SSuyog Pawar
805*c83a76b0SSuyog Pawar /* U */
806*c83a76b0SSuyog Pawar edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
807*c83a76b0SSuyog Pawar SIGN(pu1_src[0] - pu1_src[2 + src_strd]);
808*c83a76b0SSuyog Pawar
809*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
810*c83a76b0SSuyog Pawar
811*c83a76b0SSuyog Pawar if(0 != edge_idx)
812*c83a76b0SSuyog Pawar {
813*c83a76b0SSuyog Pawar u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
814*c83a76b0SSuyog Pawar }
815*c83a76b0SSuyog Pawar else
816*c83a76b0SSuyog Pawar {
817*c83a76b0SSuyog Pawar u1_pos_0_0_tmp_u = pu1_src[0];
818*c83a76b0SSuyog Pawar }
819*c83a76b0SSuyog Pawar
820*c83a76b0SSuyog Pawar /* V */
821*c83a76b0SSuyog Pawar edge_idx = 2 + SIGN(pu1_src[1] - pu1_src_top_left[1]) +
822*c83a76b0SSuyog Pawar SIGN(pu1_src[1] - pu1_src[1 + 2 + src_strd]);
823*c83a76b0SSuyog Pawar
824*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
825*c83a76b0SSuyog Pawar
826*c83a76b0SSuyog Pawar if(0 != edge_idx)
827*c83a76b0SSuyog Pawar {
828*c83a76b0SSuyog Pawar u1_pos_0_0_tmp_v = CLIP3(pu1_src[1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
829*c83a76b0SSuyog Pawar }
830*c83a76b0SSuyog Pawar else
831*c83a76b0SSuyog Pawar {
832*c83a76b0SSuyog Pawar u1_pos_0_0_tmp_v = pu1_src[1];
833*c83a76b0SSuyog Pawar }
834*c83a76b0SSuyog Pawar }
835*c83a76b0SSuyog Pawar else
836*c83a76b0SSuyog Pawar {
837*c83a76b0SSuyog Pawar u1_pos_0_0_tmp_u = pu1_src[0];
838*c83a76b0SSuyog Pawar u1_pos_0_0_tmp_v = pu1_src[1];
839*c83a76b0SSuyog Pawar }
840*c83a76b0SSuyog Pawar
841*c83a76b0SSuyog Pawar /* If bottom-right is available, process separately */
842*c83a76b0SSuyog Pawar if(0 != pu1_avail[7])
843*c83a76b0SSuyog Pawar {
844*c83a76b0SSuyog Pawar WORD32 edge_idx;
845*c83a76b0SSuyog Pawar
846*c83a76b0SSuyog Pawar /* U */
847*c83a76b0SSuyog Pawar edge_idx = 2 + SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd - 2 - src_strd]) +
848*c83a76b0SSuyog Pawar SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd + 2 + src_strd]);
849*c83a76b0SSuyog Pawar
850*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
851*c83a76b0SSuyog Pawar
852*c83a76b0SSuyog Pawar if(0 != edge_idx)
853*c83a76b0SSuyog Pawar {
854*c83a76b0SSuyog Pawar u1_pos_wd_ht_tmp_u = CLIP3(pu1_src[wd - 2 + (ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
855*c83a76b0SSuyog Pawar }
856*c83a76b0SSuyog Pawar else
857*c83a76b0SSuyog Pawar {
858*c83a76b0SSuyog Pawar u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
859*c83a76b0SSuyog Pawar }
860*c83a76b0SSuyog Pawar
861*c83a76b0SSuyog Pawar /* V */
862*c83a76b0SSuyog Pawar edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 2 - src_strd]) +
863*c83a76b0SSuyog Pawar SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 2 + src_strd]);
864*c83a76b0SSuyog Pawar
865*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
866*c83a76b0SSuyog Pawar
867*c83a76b0SSuyog Pawar if(0 != edge_idx)
868*c83a76b0SSuyog Pawar {
869*c83a76b0SSuyog Pawar u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
870*c83a76b0SSuyog Pawar }
871*c83a76b0SSuyog Pawar else
872*c83a76b0SSuyog Pawar {
873*c83a76b0SSuyog Pawar u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
874*c83a76b0SSuyog Pawar }
875*c83a76b0SSuyog Pawar }
876*c83a76b0SSuyog Pawar else
877*c83a76b0SSuyog Pawar {
878*c83a76b0SSuyog Pawar u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
879*c83a76b0SSuyog Pawar u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
880*c83a76b0SSuyog Pawar }
881*c83a76b0SSuyog Pawar
882*c83a76b0SSuyog Pawar /* If Left is not available */
883*c83a76b0SSuyog Pawar if(0 == pu1_avail[0])
884*c83a76b0SSuyog Pawar {
885*c83a76b0SSuyog Pawar au1_mask[0] = 0;
886*c83a76b0SSuyog Pawar }
887*c83a76b0SSuyog Pawar
888*c83a76b0SSuyog Pawar /* If Top is not available */
889*c83a76b0SSuyog Pawar if(0 == pu1_avail[2])
890*c83a76b0SSuyog Pawar {
891*c83a76b0SSuyog Pawar pu1_src += src_strd;
892*c83a76b0SSuyog Pawar pu1_src_left_cpy += 2;
893*c83a76b0SSuyog Pawar ht--;
894*c83a76b0SSuyog Pawar for(col = 2; col < wd; col++)
895*c83a76b0SSuyog Pawar {
896*c83a76b0SSuyog Pawar pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 2 - src_strd]);
897*c83a76b0SSuyog Pawar }
898*c83a76b0SSuyog Pawar }
899*c83a76b0SSuyog Pawar else
900*c83a76b0SSuyog Pawar {
901*c83a76b0SSuyog Pawar for(col = 2; col < wd; col++)
902*c83a76b0SSuyog Pawar {
903*c83a76b0SSuyog Pawar pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 2]);
904*c83a76b0SSuyog Pawar }
905*c83a76b0SSuyog Pawar }
906*c83a76b0SSuyog Pawar
907*c83a76b0SSuyog Pawar /* If Right is not available */
908*c83a76b0SSuyog Pawar if(0 == pu1_avail[1])
909*c83a76b0SSuyog Pawar {
910*c83a76b0SSuyog Pawar au1_mask[(wd - 1) >> 1] = 0;
911*c83a76b0SSuyog Pawar }
912*c83a76b0SSuyog Pawar
913*c83a76b0SSuyog Pawar /* If Bottom is not available */
914*c83a76b0SSuyog Pawar if(0 == pu1_avail[3])
915*c83a76b0SSuyog Pawar {
916*c83a76b0SSuyog Pawar ht--;
917*c83a76b0SSuyog Pawar }
918*c83a76b0SSuyog Pawar
919*c83a76b0SSuyog Pawar /* Processing is done on the intermediate buffer and the output is written to the source buffer */
920*c83a76b0SSuyog Pawar {
921*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
922*c83a76b0SSuyog Pawar {
923*c83a76b0SSuyog Pawar pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[2 * (row - 1)]);
924*c83a76b0SSuyog Pawar pu1_sign_up[1] = SIGN(pu1_src[1] - pu1_src_left_cpy[2 * (row - 1) + 1]);
925*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
926*c83a76b0SSuyog Pawar {
927*c83a76b0SSuyog Pawar WORD32 edge_idx;
928*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset;
929*c83a76b0SSuyog Pawar
930*c83a76b0SSuyog Pawar pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
931*c83a76b0SSuyog Pawar
932*c83a76b0SSuyog Pawar u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 2 + src_strd]);
933*c83a76b0SSuyog Pawar edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
934*c83a76b0SSuyog Pawar pu1_sign_up_tmp[col + 2] = -u1_sign_down;
935*c83a76b0SSuyog Pawar
936*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
937*c83a76b0SSuyog Pawar
938*c83a76b0SSuyog Pawar if(0 != edge_idx)
939*c83a76b0SSuyog Pawar {
940*c83a76b0SSuyog Pawar pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
941*c83a76b0SSuyog Pawar }
942*c83a76b0SSuyog Pawar }
943*c83a76b0SSuyog Pawar
944*c83a76b0SSuyog Pawar /* Swapping pu1_sign_up_tmp and pu1_sign_up */
945*c83a76b0SSuyog Pawar {
946*c83a76b0SSuyog Pawar WORD8 *pu1_swap_tmp = pu1_sign_up;
947*c83a76b0SSuyog Pawar pu1_sign_up = pu1_sign_up_tmp;
948*c83a76b0SSuyog Pawar pu1_sign_up_tmp = pu1_swap_tmp;
949*c83a76b0SSuyog Pawar }
950*c83a76b0SSuyog Pawar
951*c83a76b0SSuyog Pawar pu1_src += src_strd;
952*c83a76b0SSuyog Pawar }
953*c83a76b0SSuyog Pawar
954*c83a76b0SSuyog Pawar pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp_u;
955*c83a76b0SSuyog Pawar pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + 1] = u1_pos_0_0_tmp_v;
956*c83a76b0SSuyog Pawar pu1_src[(pu1_avail[3] ? wd - 2 - src_strd : wd - 2)] = u1_pos_wd_ht_tmp_u;
957*c83a76b0SSuyog Pawar pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp_v;
958*c83a76b0SSuyog Pawar }
959*c83a76b0SSuyog Pawar
960*c83a76b0SSuyog Pawar if(0 == pu1_avail[2])
961*c83a76b0SSuyog Pawar ht++;
962*c83a76b0SSuyog Pawar if(0 == pu1_avail[3])
963*c83a76b0SSuyog Pawar ht++;
964*c83a76b0SSuyog Pawar pu1_src_top_left[0] = au1_src_top_left_tmp[0];
965*c83a76b0SSuyog Pawar pu1_src_top_left[1] = au1_src_top_left_tmp[1];
966*c83a76b0SSuyog Pawar for(row = 0; row < 2 * ht; row++)
967*c83a76b0SSuyog Pawar {
968*c83a76b0SSuyog Pawar pu1_src_left[row] = au1_src_left_tmp[row];
969*c83a76b0SSuyog Pawar }
970*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
971*c83a76b0SSuyog Pawar {
972*c83a76b0SSuyog Pawar pu1_src_top[col] = au1_src_top_tmp[col];
973*c83a76b0SSuyog Pawar }
974*c83a76b0SSuyog Pawar
975*c83a76b0SSuyog Pawar }
976*c83a76b0SSuyog Pawar
977*c83a76b0SSuyog Pawar
978*c83a76b0SSuyog Pawar
979*c83a76b0SSuyog Pawar
980*c83a76b0SSuyog Pawar /* 45 degree filtering */
ihevc_sao_edge_offset_class3(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset,WORD32 wd,WORD32 ht)981*c83a76b0SSuyog Pawar void ihevc_sao_edge_offset_class3(UWORD8 *pu1_src,
982*c83a76b0SSuyog Pawar WORD32 src_strd,
983*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left,
984*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top,
985*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_left,
986*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_right,
987*c83a76b0SSuyog Pawar UWORD8 *pu1_src_bot_left,
988*c83a76b0SSuyog Pawar UWORD8 *pu1_avail,
989*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset,
990*c83a76b0SSuyog Pawar WORD32 wd,
991*c83a76b0SSuyog Pawar WORD32 ht)
992*c83a76b0SSuyog Pawar {
993*c83a76b0SSuyog Pawar WORD32 row, col;
994*c83a76b0SSuyog Pawar UWORD8 au1_mask[MAX_CTB_SIZE];
995*c83a76b0SSuyog Pawar UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
996*c83a76b0SSuyog Pawar UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
997*c83a76b0SSuyog Pawar UWORD8 u1_src_top_left_tmp;
998*c83a76b0SSuyog Pawar WORD8 au1_sign_up[MAX_CTB_SIZE];
999*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left_cpy;
1000*c83a76b0SSuyog Pawar WORD8 u1_sign_down;
1001*c83a76b0SSuyog Pawar WORD32 bit_depth;
1002*c83a76b0SSuyog Pawar
1003*c83a76b0SSuyog Pawar UWORD8 u1_pos_0_ht_tmp;
1004*c83a76b0SSuyog Pawar UWORD8 u1_pos_wd_0_tmp;
1005*c83a76b0SSuyog Pawar
1006*c83a76b0SSuyog Pawar bit_depth = BIT_DEPTH_LUMA;
1007*c83a76b0SSuyog Pawar pu1_src_left_cpy = pu1_src_left;
1008*c83a76b0SSuyog Pawar
1009*c83a76b0SSuyog Pawar /* Initialize the mask values */
1010*c83a76b0SSuyog Pawar memset(au1_mask, 0xFF, MAX_CTB_SIZE);
1011*c83a76b0SSuyog Pawar
1012*c83a76b0SSuyog Pawar /* Update left, top and top-left arrays */
1013*c83a76b0SSuyog Pawar u1_src_top_left_tmp = pu1_src_top[wd - 1];
1014*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
1015*c83a76b0SSuyog Pawar {
1016*c83a76b0SSuyog Pawar au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
1017*c83a76b0SSuyog Pawar }
1018*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
1019*c83a76b0SSuyog Pawar {
1020*c83a76b0SSuyog Pawar au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
1021*c83a76b0SSuyog Pawar }
1022*c83a76b0SSuyog Pawar
1023*c83a76b0SSuyog Pawar /* If top-right is available, process separately */
1024*c83a76b0SSuyog Pawar if(0 != pu1_avail[5])
1025*c83a76b0SSuyog Pawar {
1026*c83a76b0SSuyog Pawar WORD32 edge_idx;
1027*c83a76b0SSuyog Pawar
1028*c83a76b0SSuyog Pawar edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[0]) +
1029*c83a76b0SSuyog Pawar SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 1 + src_strd]);
1030*c83a76b0SSuyog Pawar
1031*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1032*c83a76b0SSuyog Pawar
1033*c83a76b0SSuyog Pawar if(0 != edge_idx)
1034*c83a76b0SSuyog Pawar {
1035*c83a76b0SSuyog Pawar u1_pos_wd_0_tmp = CLIP3(pu1_src[wd - 1] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1036*c83a76b0SSuyog Pawar }
1037*c83a76b0SSuyog Pawar else
1038*c83a76b0SSuyog Pawar {
1039*c83a76b0SSuyog Pawar u1_pos_wd_0_tmp = pu1_src[wd - 1];
1040*c83a76b0SSuyog Pawar }
1041*c83a76b0SSuyog Pawar }
1042*c83a76b0SSuyog Pawar else
1043*c83a76b0SSuyog Pawar {
1044*c83a76b0SSuyog Pawar u1_pos_wd_0_tmp = pu1_src[wd - 1];
1045*c83a76b0SSuyog Pawar }
1046*c83a76b0SSuyog Pawar
1047*c83a76b0SSuyog Pawar /* If bottom-left is available, process separately */
1048*c83a76b0SSuyog Pawar if(0 != pu1_avail[6])
1049*c83a76b0SSuyog Pawar {
1050*c83a76b0SSuyog Pawar WORD32 edge_idx;
1051*c83a76b0SSuyog Pawar
1052*c83a76b0SSuyog Pawar edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 1 - src_strd]) +
1053*c83a76b0SSuyog Pawar SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
1054*c83a76b0SSuyog Pawar
1055*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1056*c83a76b0SSuyog Pawar
1057*c83a76b0SSuyog Pawar if(0 != edge_idx)
1058*c83a76b0SSuyog Pawar {
1059*c83a76b0SSuyog Pawar u1_pos_0_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1060*c83a76b0SSuyog Pawar }
1061*c83a76b0SSuyog Pawar else
1062*c83a76b0SSuyog Pawar {
1063*c83a76b0SSuyog Pawar u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
1064*c83a76b0SSuyog Pawar }
1065*c83a76b0SSuyog Pawar }
1066*c83a76b0SSuyog Pawar else
1067*c83a76b0SSuyog Pawar {
1068*c83a76b0SSuyog Pawar u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
1069*c83a76b0SSuyog Pawar }
1070*c83a76b0SSuyog Pawar
1071*c83a76b0SSuyog Pawar /* If Left is not available */
1072*c83a76b0SSuyog Pawar if(0 == pu1_avail[0])
1073*c83a76b0SSuyog Pawar {
1074*c83a76b0SSuyog Pawar au1_mask[0] = 0;
1075*c83a76b0SSuyog Pawar }
1076*c83a76b0SSuyog Pawar
1077*c83a76b0SSuyog Pawar /* If Top is not available */
1078*c83a76b0SSuyog Pawar if(0 == pu1_avail[2])
1079*c83a76b0SSuyog Pawar {
1080*c83a76b0SSuyog Pawar pu1_src += src_strd;
1081*c83a76b0SSuyog Pawar ht--;
1082*c83a76b0SSuyog Pawar pu1_src_left_cpy += 1;
1083*c83a76b0SSuyog Pawar for(col = 0; col < wd - 1; col++)
1084*c83a76b0SSuyog Pawar {
1085*c83a76b0SSuyog Pawar au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 1 - src_strd]);
1086*c83a76b0SSuyog Pawar }
1087*c83a76b0SSuyog Pawar }
1088*c83a76b0SSuyog Pawar else
1089*c83a76b0SSuyog Pawar {
1090*c83a76b0SSuyog Pawar for(col = 0; col < wd - 1; col++)
1091*c83a76b0SSuyog Pawar {
1092*c83a76b0SSuyog Pawar au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 1]);
1093*c83a76b0SSuyog Pawar }
1094*c83a76b0SSuyog Pawar }
1095*c83a76b0SSuyog Pawar
1096*c83a76b0SSuyog Pawar /* If Right is not available */
1097*c83a76b0SSuyog Pawar if(0 == pu1_avail[1])
1098*c83a76b0SSuyog Pawar {
1099*c83a76b0SSuyog Pawar au1_mask[wd - 1] = 0;
1100*c83a76b0SSuyog Pawar }
1101*c83a76b0SSuyog Pawar
1102*c83a76b0SSuyog Pawar /* If Bottom is not available */
1103*c83a76b0SSuyog Pawar if(0 == pu1_avail[3])
1104*c83a76b0SSuyog Pawar {
1105*c83a76b0SSuyog Pawar ht--;
1106*c83a76b0SSuyog Pawar }
1107*c83a76b0SSuyog Pawar
1108*c83a76b0SSuyog Pawar /* Processing is done on the intermediate buffer and the output is written to the source buffer */
1109*c83a76b0SSuyog Pawar {
1110*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
1111*c83a76b0SSuyog Pawar {
1112*c83a76b0SSuyog Pawar au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 1 - src_strd]);
1113*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
1114*c83a76b0SSuyog Pawar {
1115*c83a76b0SSuyog Pawar WORD32 edge_idx;
1116*c83a76b0SSuyog Pawar
1117*c83a76b0SSuyog Pawar u1_sign_down = SIGN(pu1_src[col] - ((col == 0) ? pu1_src_left_cpy[row + 1] :
1118*c83a76b0SSuyog Pawar pu1_src[col - 1 + src_strd]));
1119*c83a76b0SSuyog Pawar edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
1120*c83a76b0SSuyog Pawar if(col > 0)
1121*c83a76b0SSuyog Pawar au1_sign_up[col - 1] = -u1_sign_down;
1122*c83a76b0SSuyog Pawar
1123*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
1124*c83a76b0SSuyog Pawar
1125*c83a76b0SSuyog Pawar if(0 != edge_idx)
1126*c83a76b0SSuyog Pawar {
1127*c83a76b0SSuyog Pawar pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1128*c83a76b0SSuyog Pawar }
1129*c83a76b0SSuyog Pawar }
1130*c83a76b0SSuyog Pawar
1131*c83a76b0SSuyog Pawar pu1_src += src_strd;
1132*c83a76b0SSuyog Pawar }
1133*c83a76b0SSuyog Pawar
1134*c83a76b0SSuyog Pawar pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp;
1135*c83a76b0SSuyog Pawar pu1_src[(pu1_avail[3] ? (-src_strd) : 0)] = u1_pos_0_ht_tmp;
1136*c83a76b0SSuyog Pawar }
1137*c83a76b0SSuyog Pawar
1138*c83a76b0SSuyog Pawar if(0 == pu1_avail[2])
1139*c83a76b0SSuyog Pawar ht++;
1140*c83a76b0SSuyog Pawar if(0 == pu1_avail[3])
1141*c83a76b0SSuyog Pawar ht++;
1142*c83a76b0SSuyog Pawar *pu1_src_top_left = u1_src_top_left_tmp;
1143*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
1144*c83a76b0SSuyog Pawar {
1145*c83a76b0SSuyog Pawar pu1_src_left[row] = au1_src_left_tmp[row];
1146*c83a76b0SSuyog Pawar }
1147*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
1148*c83a76b0SSuyog Pawar {
1149*c83a76b0SSuyog Pawar pu1_src_top[col] = au1_src_top_tmp[col];
1150*c83a76b0SSuyog Pawar }
1151*c83a76b0SSuyog Pawar
1152*c83a76b0SSuyog Pawar }
1153*c83a76b0SSuyog Pawar
1154*c83a76b0SSuyog Pawar
1155*c83a76b0SSuyog Pawar
1156*c83a76b0SSuyog Pawar
ihevc_sao_edge_offset_class3_chroma(UWORD8 * pu1_src,WORD32 src_strd,UWORD8 * pu1_src_left,UWORD8 * pu1_src_top,UWORD8 * pu1_src_top_left,UWORD8 * pu1_src_top_right,UWORD8 * pu1_src_bot_left,UWORD8 * pu1_avail,WORD8 * pi1_sao_offset_u,WORD8 * pi1_sao_offset_v,WORD32 wd,WORD32 ht)1157*c83a76b0SSuyog Pawar void ihevc_sao_edge_offset_class3_chroma(UWORD8 *pu1_src,
1158*c83a76b0SSuyog Pawar WORD32 src_strd,
1159*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left,
1160*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top,
1161*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_left,
1162*c83a76b0SSuyog Pawar UWORD8 *pu1_src_top_right,
1163*c83a76b0SSuyog Pawar UWORD8 *pu1_src_bot_left,
1164*c83a76b0SSuyog Pawar UWORD8 *pu1_avail,
1165*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset_u,
1166*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset_v,
1167*c83a76b0SSuyog Pawar WORD32 wd,
1168*c83a76b0SSuyog Pawar WORD32 ht)
1169*c83a76b0SSuyog Pawar {
1170*c83a76b0SSuyog Pawar WORD32 row, col;
1171*c83a76b0SSuyog Pawar UWORD8 au1_mask[MAX_CTB_SIZE];
1172*c83a76b0SSuyog Pawar UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
1173*c83a76b0SSuyog Pawar UWORD8 au1_src_top_left_tmp[2];
1174*c83a76b0SSuyog Pawar WORD8 au1_sign_up[MAX_CTB_SIZE];
1175*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left_cpy;
1176*c83a76b0SSuyog Pawar WORD8 u1_sign_down;
1177*c83a76b0SSuyog Pawar WORD32 bit_depth;
1178*c83a76b0SSuyog Pawar
1179*c83a76b0SSuyog Pawar UWORD8 u1_pos_wd_0_tmp_u;
1180*c83a76b0SSuyog Pawar UWORD8 u1_pos_wd_0_tmp_v;
1181*c83a76b0SSuyog Pawar UWORD8 u1_pos_0_ht_tmp_u;
1182*c83a76b0SSuyog Pawar UWORD8 u1_pos_0_ht_tmp_v;
1183*c83a76b0SSuyog Pawar
1184*c83a76b0SSuyog Pawar bit_depth = BIT_DEPTH_CHROMA;
1185*c83a76b0SSuyog Pawar pu1_src_left_cpy = pu1_src_left;
1186*c83a76b0SSuyog Pawar
1187*c83a76b0SSuyog Pawar /* Initialize the mask values */
1188*c83a76b0SSuyog Pawar memset(au1_mask, 0xFF, MAX_CTB_SIZE);
1189*c83a76b0SSuyog Pawar
1190*c83a76b0SSuyog Pawar /* Update left, top and top-left arrays */
1191*c83a76b0SSuyog Pawar au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
1192*c83a76b0SSuyog Pawar au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
1193*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
1194*c83a76b0SSuyog Pawar {
1195*c83a76b0SSuyog Pawar au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
1196*c83a76b0SSuyog Pawar au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
1197*c83a76b0SSuyog Pawar }
1198*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
1199*c83a76b0SSuyog Pawar {
1200*c83a76b0SSuyog Pawar au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
1201*c83a76b0SSuyog Pawar }
1202*c83a76b0SSuyog Pawar
1203*c83a76b0SSuyog Pawar
1204*c83a76b0SSuyog Pawar /* If top-right is available, process separately */
1205*c83a76b0SSuyog Pawar if(0 != pu1_avail[5])
1206*c83a76b0SSuyog Pawar {
1207*c83a76b0SSuyog Pawar WORD32 edge_idx;
1208*c83a76b0SSuyog Pawar
1209*c83a76b0SSuyog Pawar /* U */
1210*c83a76b0SSuyog Pawar edge_idx = 2 + SIGN(pu1_src[wd - 2] - pu1_src_top_right[0]) +
1211*c83a76b0SSuyog Pawar SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 - 2 + src_strd]);
1212*c83a76b0SSuyog Pawar
1213*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1214*c83a76b0SSuyog Pawar
1215*c83a76b0SSuyog Pawar if(0 != edge_idx)
1216*c83a76b0SSuyog Pawar {
1217*c83a76b0SSuyog Pawar u1_pos_wd_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
1218*c83a76b0SSuyog Pawar }
1219*c83a76b0SSuyog Pawar else
1220*c83a76b0SSuyog Pawar {
1221*c83a76b0SSuyog Pawar u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
1222*c83a76b0SSuyog Pawar }
1223*c83a76b0SSuyog Pawar
1224*c83a76b0SSuyog Pawar /* V */
1225*c83a76b0SSuyog Pawar edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[1]) +
1226*c83a76b0SSuyog Pawar SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 2 + src_strd]);
1227*c83a76b0SSuyog Pawar
1228*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1229*c83a76b0SSuyog Pawar
1230*c83a76b0SSuyog Pawar if(0 != edge_idx)
1231*c83a76b0SSuyog Pawar {
1232*c83a76b0SSuyog Pawar u1_pos_wd_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
1233*c83a76b0SSuyog Pawar }
1234*c83a76b0SSuyog Pawar else
1235*c83a76b0SSuyog Pawar {
1236*c83a76b0SSuyog Pawar u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
1237*c83a76b0SSuyog Pawar }
1238*c83a76b0SSuyog Pawar }
1239*c83a76b0SSuyog Pawar else
1240*c83a76b0SSuyog Pawar {
1241*c83a76b0SSuyog Pawar u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
1242*c83a76b0SSuyog Pawar u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
1243*c83a76b0SSuyog Pawar }
1244*c83a76b0SSuyog Pawar
1245*c83a76b0SSuyog Pawar /* If bottom-left is available, process separately */
1246*c83a76b0SSuyog Pawar if(0 != pu1_avail[6])
1247*c83a76b0SSuyog Pawar {
1248*c83a76b0SSuyog Pawar WORD32 edge_idx;
1249*c83a76b0SSuyog Pawar
1250*c83a76b0SSuyog Pawar /* U */
1251*c83a76b0SSuyog Pawar edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 2 - src_strd]) +
1252*c83a76b0SSuyog Pawar SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
1253*c83a76b0SSuyog Pawar
1254*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1255*c83a76b0SSuyog Pawar
1256*c83a76b0SSuyog Pawar if(0 != edge_idx)
1257*c83a76b0SSuyog Pawar {
1258*c83a76b0SSuyog Pawar u1_pos_0_ht_tmp_u = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
1259*c83a76b0SSuyog Pawar }
1260*c83a76b0SSuyog Pawar else
1261*c83a76b0SSuyog Pawar {
1262*c83a76b0SSuyog Pawar u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
1263*c83a76b0SSuyog Pawar }
1264*c83a76b0SSuyog Pawar
1265*c83a76b0SSuyog Pawar /* V */
1266*c83a76b0SSuyog Pawar edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src[(ht - 1) * src_strd + 1 + 2 - src_strd]) +
1267*c83a76b0SSuyog Pawar SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src_bot_left[1]);
1268*c83a76b0SSuyog Pawar
1269*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1270*c83a76b0SSuyog Pawar
1271*c83a76b0SSuyog Pawar if(0 != edge_idx)
1272*c83a76b0SSuyog Pawar {
1273*c83a76b0SSuyog Pawar u1_pos_0_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd + 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
1274*c83a76b0SSuyog Pawar }
1275*c83a76b0SSuyog Pawar else
1276*c83a76b0SSuyog Pawar {
1277*c83a76b0SSuyog Pawar u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
1278*c83a76b0SSuyog Pawar }
1279*c83a76b0SSuyog Pawar }
1280*c83a76b0SSuyog Pawar else
1281*c83a76b0SSuyog Pawar {
1282*c83a76b0SSuyog Pawar u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
1283*c83a76b0SSuyog Pawar u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
1284*c83a76b0SSuyog Pawar }
1285*c83a76b0SSuyog Pawar
1286*c83a76b0SSuyog Pawar /* If Left is not available */
1287*c83a76b0SSuyog Pawar if(0 == pu1_avail[0])
1288*c83a76b0SSuyog Pawar {
1289*c83a76b0SSuyog Pawar au1_mask[0] = 0;
1290*c83a76b0SSuyog Pawar }
1291*c83a76b0SSuyog Pawar
1292*c83a76b0SSuyog Pawar /* If Top is not available */
1293*c83a76b0SSuyog Pawar if(0 == pu1_avail[2])
1294*c83a76b0SSuyog Pawar {
1295*c83a76b0SSuyog Pawar pu1_src += src_strd;
1296*c83a76b0SSuyog Pawar ht--;
1297*c83a76b0SSuyog Pawar pu1_src_left_cpy += 2;
1298*c83a76b0SSuyog Pawar for(col = 0; col < wd - 2; col++)
1299*c83a76b0SSuyog Pawar {
1300*c83a76b0SSuyog Pawar au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 2 - src_strd]);
1301*c83a76b0SSuyog Pawar }
1302*c83a76b0SSuyog Pawar }
1303*c83a76b0SSuyog Pawar else
1304*c83a76b0SSuyog Pawar {
1305*c83a76b0SSuyog Pawar for(col = 0; col < wd - 2; col++)
1306*c83a76b0SSuyog Pawar {
1307*c83a76b0SSuyog Pawar au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 2]);
1308*c83a76b0SSuyog Pawar }
1309*c83a76b0SSuyog Pawar }
1310*c83a76b0SSuyog Pawar
1311*c83a76b0SSuyog Pawar /* If Right is not available */
1312*c83a76b0SSuyog Pawar if(0 == pu1_avail[1])
1313*c83a76b0SSuyog Pawar {
1314*c83a76b0SSuyog Pawar au1_mask[(wd - 1) >> 1] = 0;
1315*c83a76b0SSuyog Pawar }
1316*c83a76b0SSuyog Pawar
1317*c83a76b0SSuyog Pawar /* If Bottom is not available */
1318*c83a76b0SSuyog Pawar if(0 == pu1_avail[3])
1319*c83a76b0SSuyog Pawar {
1320*c83a76b0SSuyog Pawar ht--;
1321*c83a76b0SSuyog Pawar }
1322*c83a76b0SSuyog Pawar
1323*c83a76b0SSuyog Pawar /* Processing is done on the intermediate buffer and the output is written to the source buffer */
1324*c83a76b0SSuyog Pawar {
1325*c83a76b0SSuyog Pawar for(row = 0; row < ht; row++)
1326*c83a76b0SSuyog Pawar {
1327*c83a76b0SSuyog Pawar au1_sign_up[wd - 2] = SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 + 2 - src_strd]);
1328*c83a76b0SSuyog Pawar au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 2 - src_strd]);
1329*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
1330*c83a76b0SSuyog Pawar {
1331*c83a76b0SSuyog Pawar WORD32 edge_idx;
1332*c83a76b0SSuyog Pawar WORD8 *pi1_sao_offset;
1333*c83a76b0SSuyog Pawar
1334*c83a76b0SSuyog Pawar pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
1335*c83a76b0SSuyog Pawar
1336*c83a76b0SSuyog Pawar u1_sign_down = SIGN(pu1_src[col] - ((col < 2) ? pu1_src_left_cpy[2 * (row + 1) + col] :
1337*c83a76b0SSuyog Pawar pu1_src[col - 2 + src_strd]));
1338*c83a76b0SSuyog Pawar edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
1339*c83a76b0SSuyog Pawar if(col > 1)
1340*c83a76b0SSuyog Pawar au1_sign_up[col - 2] = -u1_sign_down;
1341*c83a76b0SSuyog Pawar
1342*c83a76b0SSuyog Pawar edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
1343*c83a76b0SSuyog Pawar
1344*c83a76b0SSuyog Pawar if(0 != edge_idx)
1345*c83a76b0SSuyog Pawar {
1346*c83a76b0SSuyog Pawar pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1347*c83a76b0SSuyog Pawar }
1348*c83a76b0SSuyog Pawar }
1349*c83a76b0SSuyog Pawar
1350*c83a76b0SSuyog Pawar pu1_src += src_strd;
1351*c83a76b0SSuyog Pawar }
1352*c83a76b0SSuyog Pawar
1353*c83a76b0SSuyog Pawar pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 2] = u1_pos_wd_0_tmp_u;
1354*c83a76b0SSuyog Pawar pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp_v;
1355*c83a76b0SSuyog Pawar pu1_src[(pu1_avail[3] ? (-src_strd) : 0)] = u1_pos_0_ht_tmp_u;
1356*c83a76b0SSuyog Pawar pu1_src[(pu1_avail[3] ? (-src_strd) : 0) + 1] = u1_pos_0_ht_tmp_v;
1357*c83a76b0SSuyog Pawar }
1358*c83a76b0SSuyog Pawar
1359*c83a76b0SSuyog Pawar if(0 == pu1_avail[2])
1360*c83a76b0SSuyog Pawar ht++;
1361*c83a76b0SSuyog Pawar if(0 == pu1_avail[3])
1362*c83a76b0SSuyog Pawar ht++;
1363*c83a76b0SSuyog Pawar pu1_src_top_left[0] = au1_src_top_left_tmp[0];
1364*c83a76b0SSuyog Pawar pu1_src_top_left[1] = au1_src_top_left_tmp[1];
1365*c83a76b0SSuyog Pawar for(row = 0; row < 2 * ht; row++)
1366*c83a76b0SSuyog Pawar {
1367*c83a76b0SSuyog Pawar pu1_src_left[row] = au1_src_left_tmp[row];
1368*c83a76b0SSuyog Pawar }
1369*c83a76b0SSuyog Pawar for(col = 0; col < wd; col++)
1370*c83a76b0SSuyog Pawar {
1371*c83a76b0SSuyog Pawar pu1_src_top[col] = au1_src_top_tmp[col];
1372*c83a76b0SSuyog Pawar }
1373*c83a76b0SSuyog Pawar
1374*c83a76b0SSuyog Pawar }
1375