xref: /aosp_15_r20/external/libhevc/common/ihevc_itrans_recon_16x16.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar ******************************************************************************/
18*c83a76b0SSuyog Pawar /**
19*c83a76b0SSuyog Pawar  *******************************************************************************
20*c83a76b0SSuyog Pawar  * @file
21*c83a76b0SSuyog Pawar  *  ihevc_itrans_recon_16x16.c
22*c83a76b0SSuyog Pawar  *
23*c83a76b0SSuyog Pawar  * @brief
24*c83a76b0SSuyog Pawar  *  Contains function definitions for inverse transform  and reconstruction 16x16
25*c83a76b0SSuyog Pawar  *
26*c83a76b0SSuyog Pawar  *
27*c83a76b0SSuyog Pawar  * @author
28*c83a76b0SSuyog Pawar  *  100470
29*c83a76b0SSuyog Pawar  *
30*c83a76b0SSuyog Pawar  * @par List of Functions:
31*c83a76b0SSuyog Pawar  *  - ihevc_itrans_recon_16x16()
32*c83a76b0SSuyog Pawar  *
33*c83a76b0SSuyog Pawar  * @remarks
34*c83a76b0SSuyog Pawar  *  None
35*c83a76b0SSuyog Pawar  *
36*c83a76b0SSuyog Pawar  *******************************************************************************
37*c83a76b0SSuyog Pawar  */
38*c83a76b0SSuyog Pawar #include <stdio.h>
39*c83a76b0SSuyog Pawar #include <string.h>
40*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
41*c83a76b0SSuyog Pawar #include "ihevc_macros.h"
42*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
43*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
44*c83a76b0SSuyog Pawar #include "ihevc_trans_tables.h"
45*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
46*c83a76b0SSuyog Pawar #include "ihevc_func_selector.h"
47*c83a76b0SSuyog Pawar #include "ihevc_trans_macros.h"
48*c83a76b0SSuyog Pawar 
49*c83a76b0SSuyog Pawar /**
50*c83a76b0SSuyog Pawar  *******************************************************************************
51*c83a76b0SSuyog Pawar  *
52*c83a76b0SSuyog Pawar  * @brief
53*c83a76b0SSuyog Pawar  *  This function performs Inverse transform  and reconstruction for 16x16
54*c83a76b0SSuyog Pawar  * input block
55*c83a76b0SSuyog Pawar  *
56*c83a76b0SSuyog Pawar  * @par Description:
57*c83a76b0SSuyog Pawar  *  Performs inverse transform and adds the prediction  data and clips output
58*c83a76b0SSuyog Pawar  * to 8 bit
59*c83a76b0SSuyog Pawar  *
60*c83a76b0SSuyog Pawar  * @param[in] pi2_src
61*c83a76b0SSuyog Pawar  *  Input 16x16 coefficients
62*c83a76b0SSuyog Pawar  *
63*c83a76b0SSuyog Pawar  * @param[in] pi2_tmp
64*c83a76b0SSuyog Pawar  *  Temporary 16x16 buffer for storing inverse
65*c83a76b0SSuyog Pawar  *
66*c83a76b0SSuyog Pawar  *  transform
67*c83a76b0SSuyog Pawar  *  1st stage output
68*c83a76b0SSuyog Pawar  *
69*c83a76b0SSuyog Pawar  * @param[in] pu1_pred
70*c83a76b0SSuyog Pawar  *  Prediction 16x16 block
71*c83a76b0SSuyog Pawar  *
72*c83a76b0SSuyog Pawar  * @param[out] pu1_dst
73*c83a76b0SSuyog Pawar  *  Output 16x16 block
74*c83a76b0SSuyog Pawar  *
75*c83a76b0SSuyog Pawar  * @param[in] src_strd
76*c83a76b0SSuyog Pawar  *  Input stride
77*c83a76b0SSuyog Pawar  *
78*c83a76b0SSuyog Pawar  * @param[in] pred_strd
79*c83a76b0SSuyog Pawar  *  Prediction stride
80*c83a76b0SSuyog Pawar  *
81*c83a76b0SSuyog Pawar  * @param[in] dst_strd
82*c83a76b0SSuyog Pawar  *  Output Stride
83*c83a76b0SSuyog Pawar  *
84*c83a76b0SSuyog Pawar  * @param[in] shift
85*c83a76b0SSuyog Pawar  *  Output shift
86*c83a76b0SSuyog Pawar  *
87*c83a76b0SSuyog Pawar  * @param[in] zero_cols
88*c83a76b0SSuyog Pawar  *  Zero columns in pi2_src
89*c83a76b0SSuyog Pawar  *
90*c83a76b0SSuyog Pawar  * @returns  Void
91*c83a76b0SSuyog Pawar  *
92*c83a76b0SSuyog Pawar  * @remarks
93*c83a76b0SSuyog Pawar  *  None
94*c83a76b0SSuyog Pawar  *
95*c83a76b0SSuyog Pawar  *******************************************************************************
96*c83a76b0SSuyog Pawar  */
97*c83a76b0SSuyog Pawar 
ihevc_itrans_recon_16x16(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,WORD32 zero_cols,WORD32 zero_rows)98*c83a76b0SSuyog Pawar void ihevc_itrans_recon_16x16(WORD16 *pi2_src,
99*c83a76b0SSuyog Pawar                               WORD16 *pi2_tmp,
100*c83a76b0SSuyog Pawar                               UWORD8 *pu1_pred,
101*c83a76b0SSuyog Pawar                               UWORD8 *pu1_dst,
102*c83a76b0SSuyog Pawar                               WORD32 src_strd,
103*c83a76b0SSuyog Pawar                               WORD32 pred_strd,
104*c83a76b0SSuyog Pawar                               WORD32 dst_strd,
105*c83a76b0SSuyog Pawar                               WORD32 zero_cols,
106*c83a76b0SSuyog Pawar                               WORD32 zero_rows)
107*c83a76b0SSuyog Pawar {
108*c83a76b0SSuyog Pawar     WORD32 j, k;
109*c83a76b0SSuyog Pawar     WORD32 e[8], o[8];
110*c83a76b0SSuyog Pawar     WORD32 ee[4], eo[4];
111*c83a76b0SSuyog Pawar     WORD32 eee[2], eeo[2];
112*c83a76b0SSuyog Pawar     WORD32 add;
113*c83a76b0SSuyog Pawar     WORD32 shift;
114*c83a76b0SSuyog Pawar     WORD16 *pi2_tmp_orig;
115*c83a76b0SSuyog Pawar     WORD32 trans_size;
116*c83a76b0SSuyog Pawar     WORD32 zero_rows_2nd_stage = zero_cols;
117*c83a76b0SSuyog Pawar     WORD32 row_limit_2nd_stage;
118*c83a76b0SSuyog Pawar 
119*c83a76b0SSuyog Pawar     if((zero_cols & 0xFFF0) == 0xFFF0)
120*c83a76b0SSuyog Pawar         row_limit_2nd_stage = 4;
121*c83a76b0SSuyog Pawar     else if((zero_cols & 0xFF00) == 0xFF00)
122*c83a76b0SSuyog Pawar         row_limit_2nd_stage = 8;
123*c83a76b0SSuyog Pawar     else
124*c83a76b0SSuyog Pawar         row_limit_2nd_stage = TRANS_SIZE_16;
125*c83a76b0SSuyog Pawar 
126*c83a76b0SSuyog Pawar     trans_size = TRANS_SIZE_16;
127*c83a76b0SSuyog Pawar     pi2_tmp_orig = pi2_tmp;
128*c83a76b0SSuyog Pawar     if((zero_rows & 0xFFF0) == 0xFFF0)  /* First 4 rows of input are non-zero */
129*c83a76b0SSuyog Pawar     {
130*c83a76b0SSuyog Pawar         /* Inverse Transform 1st stage */
131*c83a76b0SSuyog Pawar         /************************************************************************************************/
132*c83a76b0SSuyog Pawar         /**********************************START - IT_RECON_16x16****************************************/
133*c83a76b0SSuyog Pawar         /************************************************************************************************/
134*c83a76b0SSuyog Pawar 
135*c83a76b0SSuyog Pawar         shift = IT_SHIFT_STAGE_1;
136*c83a76b0SSuyog Pawar         add = 1 << (shift - 1);
137*c83a76b0SSuyog Pawar 
138*c83a76b0SSuyog Pawar         for(j = 0; j < row_limit_2nd_stage; j++)
139*c83a76b0SSuyog Pawar         {
140*c83a76b0SSuyog Pawar             /* Checking for Zero Cols */
141*c83a76b0SSuyog Pawar             if((zero_cols & 1) == 1)
142*c83a76b0SSuyog Pawar             {
143*c83a76b0SSuyog Pawar                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
144*c83a76b0SSuyog Pawar             }
145*c83a76b0SSuyog Pawar             else
146*c83a76b0SSuyog Pawar             {
147*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
148*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
149*c83a76b0SSuyog Pawar                 {
150*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd]
151*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[3][k]
152*c83a76b0SSuyog Pawar                                                     * pi2_src[3 * src_strd];
153*c83a76b0SSuyog Pawar                 }
154*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
155*c83a76b0SSuyog Pawar                 {
156*c83a76b0SSuyog Pawar                     eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd];
157*c83a76b0SSuyog Pawar                 }
158*c83a76b0SSuyog Pawar                 eeo[0] = 0;
159*c83a76b0SSuyog Pawar                 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0];
160*c83a76b0SSuyog Pawar                 eeo[1] = 0;
161*c83a76b0SSuyog Pawar                 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0];
162*c83a76b0SSuyog Pawar 
163*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
164*c83a76b0SSuyog Pawar                 for(k = 0; k < 2; k++)
165*c83a76b0SSuyog Pawar                 {
166*c83a76b0SSuyog Pawar                     ee[k] = eee[k] + eeo[k];
167*c83a76b0SSuyog Pawar                     ee[k + 2] = eee[1 - k] - eeo[1 - k];
168*c83a76b0SSuyog Pawar                 }
169*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
170*c83a76b0SSuyog Pawar                 {
171*c83a76b0SSuyog Pawar                     e[k] = ee[k] + eo[k];
172*c83a76b0SSuyog Pawar                     e[k + 4] = ee[3 - k] - eo[3 - k];
173*c83a76b0SSuyog Pawar                 }
174*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
175*c83a76b0SSuyog Pawar                 {
176*c83a76b0SSuyog Pawar                     pi2_tmp[k] =
177*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
178*c83a76b0SSuyog Pawar                     pi2_tmp[k + 8] =
179*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
180*c83a76b0SSuyog Pawar                 }
181*c83a76b0SSuyog Pawar             }
182*c83a76b0SSuyog Pawar             pi2_src++;
183*c83a76b0SSuyog Pawar             pi2_tmp += trans_size;
184*c83a76b0SSuyog Pawar             zero_cols = zero_cols >> 1;
185*c83a76b0SSuyog Pawar         }
186*c83a76b0SSuyog Pawar 
187*c83a76b0SSuyog Pawar         pi2_tmp = pi2_tmp_orig;
188*c83a76b0SSuyog Pawar 
189*c83a76b0SSuyog Pawar         /* Inverse Transform 2nd stage */
190*c83a76b0SSuyog Pawar         shift = IT_SHIFT_STAGE_2;
191*c83a76b0SSuyog Pawar         add = 1 << (shift - 1);
192*c83a76b0SSuyog Pawar 
193*c83a76b0SSuyog Pawar         if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */
194*c83a76b0SSuyog Pawar         {
195*c83a76b0SSuyog Pawar             for(j = 0; j < trans_size; j++)
196*c83a76b0SSuyog Pawar             {
197*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
198*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
199*c83a76b0SSuyog Pawar                 {
200*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
201*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[3][k]
202*c83a76b0SSuyog Pawar                                                     * pi2_tmp[3 * trans_size];
203*c83a76b0SSuyog Pawar                 }
204*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
205*c83a76b0SSuyog Pawar                 {
206*c83a76b0SSuyog Pawar                     eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size];
207*c83a76b0SSuyog Pawar                 }
208*c83a76b0SSuyog Pawar                 eeo[0] = 0;
209*c83a76b0SSuyog Pawar                 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
210*c83a76b0SSuyog Pawar                 eeo[1] = 0;
211*c83a76b0SSuyog Pawar                 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
212*c83a76b0SSuyog Pawar 
213*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
214*c83a76b0SSuyog Pawar                 for(k = 0; k < 2; k++)
215*c83a76b0SSuyog Pawar                 {
216*c83a76b0SSuyog Pawar                     ee[k] = eee[k] + eeo[k];
217*c83a76b0SSuyog Pawar                     ee[k + 2] = eee[1 - k] - eeo[1 - k];
218*c83a76b0SSuyog Pawar                 }
219*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
220*c83a76b0SSuyog Pawar                 {
221*c83a76b0SSuyog Pawar                     e[k] = ee[k] + eo[k];
222*c83a76b0SSuyog Pawar                     e[k + 4] = ee[3 - k] - eo[3 - k];
223*c83a76b0SSuyog Pawar                 }
224*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
225*c83a76b0SSuyog Pawar                 {
226*c83a76b0SSuyog Pawar                     WORD32 itrans_out;
227*c83a76b0SSuyog Pawar                     itrans_out =
228*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
229*c83a76b0SSuyog Pawar                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
230*c83a76b0SSuyog Pawar                     itrans_out =
231*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
232*c83a76b0SSuyog Pawar                     pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
233*c83a76b0SSuyog Pawar                 }
234*c83a76b0SSuyog Pawar                 pi2_tmp++;
235*c83a76b0SSuyog Pawar                 pu1_pred += pred_strd;
236*c83a76b0SSuyog Pawar                 pu1_dst += dst_strd;
237*c83a76b0SSuyog Pawar             }
238*c83a76b0SSuyog Pawar         }
239*c83a76b0SSuyog Pawar         else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */
240*c83a76b0SSuyog Pawar         {
241*c83a76b0SSuyog Pawar             for(j = 0; j < trans_size; j++)
242*c83a76b0SSuyog Pawar             {
243*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
244*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
245*c83a76b0SSuyog Pawar                 {
246*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
247*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[3][k]
248*c83a76b0SSuyog Pawar                                                     * pi2_tmp[3 * trans_size]
249*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[5][k]
250*c83a76b0SSuyog Pawar                                                     * pi2_tmp[5 * trans_size]
251*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[7][k]
252*c83a76b0SSuyog Pawar                                                     * pi2_tmp[7 * trans_size];
253*c83a76b0SSuyog Pawar                 }
254*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
255*c83a76b0SSuyog Pawar                 {
256*c83a76b0SSuyog Pawar                     eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
257*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[6][k]
258*c83a76b0SSuyog Pawar                                                     * pi2_tmp[6 * trans_size];
259*c83a76b0SSuyog Pawar                 }
260*c83a76b0SSuyog Pawar                 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size];
261*c83a76b0SSuyog Pawar                 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
262*c83a76b0SSuyog Pawar                 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size];
263*c83a76b0SSuyog Pawar                 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
264*c83a76b0SSuyog Pawar 
265*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
266*c83a76b0SSuyog Pawar                 for(k = 0; k < 2; k++)
267*c83a76b0SSuyog Pawar                 {
268*c83a76b0SSuyog Pawar                     ee[k] = eee[k] + eeo[k];
269*c83a76b0SSuyog Pawar                     ee[k + 2] = eee[1 - k] - eeo[1 - k];
270*c83a76b0SSuyog Pawar                 }
271*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
272*c83a76b0SSuyog Pawar                 {
273*c83a76b0SSuyog Pawar                     e[k] = ee[k] + eo[k];
274*c83a76b0SSuyog Pawar                     e[k + 4] = ee[3 - k] - eo[3 - k];
275*c83a76b0SSuyog Pawar                 }
276*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
277*c83a76b0SSuyog Pawar                 {
278*c83a76b0SSuyog Pawar                     WORD32 itrans_out;
279*c83a76b0SSuyog Pawar                     itrans_out =
280*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
281*c83a76b0SSuyog Pawar                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
282*c83a76b0SSuyog Pawar                     itrans_out =
283*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
284*c83a76b0SSuyog Pawar                     pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
285*c83a76b0SSuyog Pawar                 }
286*c83a76b0SSuyog Pawar                 pi2_tmp++;
287*c83a76b0SSuyog Pawar                 pu1_pred += pred_strd;
288*c83a76b0SSuyog Pawar                 pu1_dst += dst_strd;
289*c83a76b0SSuyog Pawar             }
290*c83a76b0SSuyog Pawar         }
291*c83a76b0SSuyog Pawar         else /* All rows of output of 1st stage are non-zero */
292*c83a76b0SSuyog Pawar         {
293*c83a76b0SSuyog Pawar             for(j = 0; j < trans_size; j++)
294*c83a76b0SSuyog Pawar             {
295*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
296*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
297*c83a76b0SSuyog Pawar                 {
298*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
299*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[3][k]
300*c83a76b0SSuyog Pawar                                                     * pi2_tmp[3 * trans_size]
301*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[5][k]
302*c83a76b0SSuyog Pawar                                                     * pi2_tmp[5 * trans_size]
303*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[7][k]
304*c83a76b0SSuyog Pawar                                                     * pi2_tmp[7 * trans_size]
305*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[9][k]
306*c83a76b0SSuyog Pawar                                                     * pi2_tmp[9 * trans_size]
307*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[11][k]
308*c83a76b0SSuyog Pawar                                                     * pi2_tmp[11 * trans_size]
309*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[13][k]
310*c83a76b0SSuyog Pawar                                                     * pi2_tmp[13 * trans_size]
311*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[15][k]
312*c83a76b0SSuyog Pawar                                                     * pi2_tmp[15 * trans_size];
313*c83a76b0SSuyog Pawar                 }
314*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
315*c83a76b0SSuyog Pawar                 {
316*c83a76b0SSuyog Pawar                     eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
317*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[6][k]
318*c83a76b0SSuyog Pawar                                                     * pi2_tmp[6 * trans_size]
319*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[10][k]
320*c83a76b0SSuyog Pawar                                                     * pi2_tmp[10 * trans_size]
321*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[14][k]
322*c83a76b0SSuyog Pawar                                                     * pi2_tmp[14 * trans_size];
323*c83a76b0SSuyog Pawar                 }
324*c83a76b0SSuyog Pawar                 eeo[0] =
325*c83a76b0SSuyog Pawar                                 g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]
326*c83a76b0SSuyog Pawar                                                 + g_ai2_ihevc_trans_16[12][0]
327*c83a76b0SSuyog Pawar                                                                 * pi2_tmp[12
328*c83a76b0SSuyog Pawar                                                                                 * trans_size];
329*c83a76b0SSuyog Pawar                 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]
330*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size];
331*c83a76b0SSuyog Pawar                 eeo[1] =
332*c83a76b0SSuyog Pawar                                 g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]
333*c83a76b0SSuyog Pawar                                                 + g_ai2_ihevc_trans_16[12][1]
334*c83a76b0SSuyog Pawar                                                                 * pi2_tmp[12
335*c83a76b0SSuyog Pawar                                                                                 * trans_size];
336*c83a76b0SSuyog Pawar                 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]
337*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size];
338*c83a76b0SSuyog Pawar 
339*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
340*c83a76b0SSuyog Pawar                 for(k = 0; k < 2; k++)
341*c83a76b0SSuyog Pawar                 {
342*c83a76b0SSuyog Pawar                     ee[k] = eee[k] + eeo[k];
343*c83a76b0SSuyog Pawar                     ee[k + 2] = eee[1 - k] - eeo[1 - k];
344*c83a76b0SSuyog Pawar                 }
345*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
346*c83a76b0SSuyog Pawar                 {
347*c83a76b0SSuyog Pawar                     e[k] = ee[k] + eo[k];
348*c83a76b0SSuyog Pawar                     e[k + 4] = ee[3 - k] - eo[3 - k];
349*c83a76b0SSuyog Pawar                 }
350*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
351*c83a76b0SSuyog Pawar                 {
352*c83a76b0SSuyog Pawar                     WORD32 itrans_out;
353*c83a76b0SSuyog Pawar                     itrans_out =
354*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
355*c83a76b0SSuyog Pawar                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
356*c83a76b0SSuyog Pawar                     itrans_out =
357*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
358*c83a76b0SSuyog Pawar                     pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
359*c83a76b0SSuyog Pawar                 }
360*c83a76b0SSuyog Pawar                 pi2_tmp++;
361*c83a76b0SSuyog Pawar                 pu1_pred += pred_strd;
362*c83a76b0SSuyog Pawar                 pu1_dst += dst_strd;
363*c83a76b0SSuyog Pawar             }
364*c83a76b0SSuyog Pawar         }
365*c83a76b0SSuyog Pawar         /************************************************************************************************/
366*c83a76b0SSuyog Pawar         /************************************END - IT_RECON_16x16****************************************/
367*c83a76b0SSuyog Pawar         /************************************************************************************************/
368*c83a76b0SSuyog Pawar     }
369*c83a76b0SSuyog Pawar     else if((zero_rows & 0xFF00) == 0xFF00)  /* First 8 rows of input are non-zero */
370*c83a76b0SSuyog Pawar     {
371*c83a76b0SSuyog Pawar         /* Inverse Transform 1st stage */
372*c83a76b0SSuyog Pawar         /************************************************************************************************/
373*c83a76b0SSuyog Pawar         /**********************************START - IT_RECON_16x16****************************************/
374*c83a76b0SSuyog Pawar         /************************************************************************************************/
375*c83a76b0SSuyog Pawar 
376*c83a76b0SSuyog Pawar         shift = IT_SHIFT_STAGE_1;
377*c83a76b0SSuyog Pawar         add = 1 << (shift - 1);
378*c83a76b0SSuyog Pawar 
379*c83a76b0SSuyog Pawar         for(j = 0; j < row_limit_2nd_stage; j++)
380*c83a76b0SSuyog Pawar         {
381*c83a76b0SSuyog Pawar             /* Checking for Zero Cols */
382*c83a76b0SSuyog Pawar             if((zero_cols & 1) == 1)
383*c83a76b0SSuyog Pawar             {
384*c83a76b0SSuyog Pawar                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
385*c83a76b0SSuyog Pawar             }
386*c83a76b0SSuyog Pawar             else
387*c83a76b0SSuyog Pawar             {
388*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
389*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
390*c83a76b0SSuyog Pawar                 {
391*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd]
392*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[3][k]
393*c83a76b0SSuyog Pawar                                                     * pi2_src[3 * src_strd]
394*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[5][k]
395*c83a76b0SSuyog Pawar                                                     * pi2_src[5 * src_strd]
396*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[7][k]
397*c83a76b0SSuyog Pawar                                                     * pi2_src[7 * src_strd];
398*c83a76b0SSuyog Pawar                 }
399*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
400*c83a76b0SSuyog Pawar                 {
401*c83a76b0SSuyog Pawar                     eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]
402*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[6][k]
403*c83a76b0SSuyog Pawar                                                     * pi2_src[6 * src_strd];
404*c83a76b0SSuyog Pawar                 }
405*c83a76b0SSuyog Pawar                 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd];
406*c83a76b0SSuyog Pawar                 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0];
407*c83a76b0SSuyog Pawar                 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd];
408*c83a76b0SSuyog Pawar                 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0];
409*c83a76b0SSuyog Pawar 
410*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
411*c83a76b0SSuyog Pawar                 for(k = 0; k < 2; k++)
412*c83a76b0SSuyog Pawar                 {
413*c83a76b0SSuyog Pawar                     ee[k] = eee[k] + eeo[k];
414*c83a76b0SSuyog Pawar                     ee[k + 2] = eee[1 - k] - eeo[1 - k];
415*c83a76b0SSuyog Pawar                 }
416*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
417*c83a76b0SSuyog Pawar                 {
418*c83a76b0SSuyog Pawar                     e[k] = ee[k] + eo[k];
419*c83a76b0SSuyog Pawar                     e[k + 4] = ee[3 - k] - eo[3 - k];
420*c83a76b0SSuyog Pawar                 }
421*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
422*c83a76b0SSuyog Pawar                 {
423*c83a76b0SSuyog Pawar                     pi2_tmp[k] =
424*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
425*c83a76b0SSuyog Pawar                     pi2_tmp[k + 8] =
426*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
427*c83a76b0SSuyog Pawar                 }
428*c83a76b0SSuyog Pawar             }
429*c83a76b0SSuyog Pawar             pi2_src++;
430*c83a76b0SSuyog Pawar             pi2_tmp += trans_size;
431*c83a76b0SSuyog Pawar             zero_cols = zero_cols >> 1;
432*c83a76b0SSuyog Pawar         }
433*c83a76b0SSuyog Pawar 
434*c83a76b0SSuyog Pawar         pi2_tmp = pi2_tmp_orig;
435*c83a76b0SSuyog Pawar 
436*c83a76b0SSuyog Pawar         /* Inverse Transform 2nd stage */
437*c83a76b0SSuyog Pawar         shift = IT_SHIFT_STAGE_2;
438*c83a76b0SSuyog Pawar         add = 1 << (shift - 1);
439*c83a76b0SSuyog Pawar 
440*c83a76b0SSuyog Pawar         if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */
441*c83a76b0SSuyog Pawar         {
442*c83a76b0SSuyog Pawar             for(j = 0; j < trans_size; j++)
443*c83a76b0SSuyog Pawar             {
444*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
445*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
446*c83a76b0SSuyog Pawar                 {
447*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
448*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[3][k]
449*c83a76b0SSuyog Pawar                                                     * pi2_tmp[3 * trans_size];
450*c83a76b0SSuyog Pawar                 }
451*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
452*c83a76b0SSuyog Pawar                 {
453*c83a76b0SSuyog Pawar                     eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size];
454*c83a76b0SSuyog Pawar                 }
455*c83a76b0SSuyog Pawar                 eeo[0] = 0;
456*c83a76b0SSuyog Pawar                 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
457*c83a76b0SSuyog Pawar                 eeo[1] = 0;
458*c83a76b0SSuyog Pawar                 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
459*c83a76b0SSuyog Pawar 
460*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
461*c83a76b0SSuyog Pawar                 for(k = 0; k < 2; k++)
462*c83a76b0SSuyog Pawar                 {
463*c83a76b0SSuyog Pawar                     ee[k] = eee[k] + eeo[k];
464*c83a76b0SSuyog Pawar                     ee[k + 2] = eee[1 - k] - eeo[1 - k];
465*c83a76b0SSuyog Pawar                 }
466*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
467*c83a76b0SSuyog Pawar                 {
468*c83a76b0SSuyog Pawar                     e[k] = ee[k] + eo[k];
469*c83a76b0SSuyog Pawar                     e[k + 4] = ee[3 - k] - eo[3 - k];
470*c83a76b0SSuyog Pawar                 }
471*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
472*c83a76b0SSuyog Pawar                 {
473*c83a76b0SSuyog Pawar                     WORD32 itrans_out;
474*c83a76b0SSuyog Pawar                     itrans_out =
475*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
476*c83a76b0SSuyog Pawar                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
477*c83a76b0SSuyog Pawar                     itrans_out =
478*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
479*c83a76b0SSuyog Pawar                     pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
480*c83a76b0SSuyog Pawar                 }
481*c83a76b0SSuyog Pawar                 pi2_tmp++;
482*c83a76b0SSuyog Pawar                 pu1_pred += pred_strd;
483*c83a76b0SSuyog Pawar                 pu1_dst += dst_strd;
484*c83a76b0SSuyog Pawar             }
485*c83a76b0SSuyog Pawar         }
486*c83a76b0SSuyog Pawar         else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */
487*c83a76b0SSuyog Pawar         {
488*c83a76b0SSuyog Pawar             for(j = 0; j < trans_size; j++)
489*c83a76b0SSuyog Pawar             {
490*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
491*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
492*c83a76b0SSuyog Pawar                 {
493*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
494*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[3][k]
495*c83a76b0SSuyog Pawar                                                     * pi2_tmp[3 * trans_size]
496*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[5][k]
497*c83a76b0SSuyog Pawar                                                     * pi2_tmp[5 * trans_size]
498*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[7][k]
499*c83a76b0SSuyog Pawar                                                     * pi2_tmp[7 * trans_size];
500*c83a76b0SSuyog Pawar                 }
501*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
502*c83a76b0SSuyog Pawar                 {
503*c83a76b0SSuyog Pawar                     eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
504*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[6][k]
505*c83a76b0SSuyog Pawar                                                     * pi2_tmp[6 * trans_size];
506*c83a76b0SSuyog Pawar                 }
507*c83a76b0SSuyog Pawar                 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size];
508*c83a76b0SSuyog Pawar                 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
509*c83a76b0SSuyog Pawar                 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size];
510*c83a76b0SSuyog Pawar                 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
511*c83a76b0SSuyog Pawar 
512*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
513*c83a76b0SSuyog Pawar                 for(k = 0; k < 2; k++)
514*c83a76b0SSuyog Pawar                 {
515*c83a76b0SSuyog Pawar                     ee[k] = eee[k] + eeo[k];
516*c83a76b0SSuyog Pawar                     ee[k + 2] = eee[1 - k] - eeo[1 - k];
517*c83a76b0SSuyog Pawar                 }
518*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
519*c83a76b0SSuyog Pawar                 {
520*c83a76b0SSuyog Pawar                     e[k] = ee[k] + eo[k];
521*c83a76b0SSuyog Pawar                     e[k + 4] = ee[3 - k] - eo[3 - k];
522*c83a76b0SSuyog Pawar                 }
523*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
524*c83a76b0SSuyog Pawar                 {
525*c83a76b0SSuyog Pawar                     WORD32 itrans_out;
526*c83a76b0SSuyog Pawar                     itrans_out =
527*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
528*c83a76b0SSuyog Pawar                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
529*c83a76b0SSuyog Pawar                     itrans_out =
530*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
531*c83a76b0SSuyog Pawar                     pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
532*c83a76b0SSuyog Pawar                 }
533*c83a76b0SSuyog Pawar                 pi2_tmp++;
534*c83a76b0SSuyog Pawar                 pu1_pred += pred_strd;
535*c83a76b0SSuyog Pawar                 pu1_dst += dst_strd;
536*c83a76b0SSuyog Pawar             }
537*c83a76b0SSuyog Pawar         }
538*c83a76b0SSuyog Pawar         else /* All rows of output of 1st stage are non-zero */
539*c83a76b0SSuyog Pawar         {
540*c83a76b0SSuyog Pawar             for(j = 0; j < trans_size; j++)
541*c83a76b0SSuyog Pawar             {
542*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
543*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
544*c83a76b0SSuyog Pawar                 {
545*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
546*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[3][k]
547*c83a76b0SSuyog Pawar                                                     * pi2_tmp[3 * trans_size]
548*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[5][k]
549*c83a76b0SSuyog Pawar                                                     * pi2_tmp[5 * trans_size]
550*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[7][k]
551*c83a76b0SSuyog Pawar                                                     * pi2_tmp[7 * trans_size]
552*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[9][k]
553*c83a76b0SSuyog Pawar                                                     * pi2_tmp[9 * trans_size]
554*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[11][k]
555*c83a76b0SSuyog Pawar                                                     * pi2_tmp[11 * trans_size]
556*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[13][k]
557*c83a76b0SSuyog Pawar                                                     * pi2_tmp[13 * trans_size]
558*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[15][k]
559*c83a76b0SSuyog Pawar                                                     * pi2_tmp[15 * trans_size];
560*c83a76b0SSuyog Pawar                 }
561*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
562*c83a76b0SSuyog Pawar                 {
563*c83a76b0SSuyog Pawar                     eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
564*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[6][k]
565*c83a76b0SSuyog Pawar                                                     * pi2_tmp[6 * trans_size]
566*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[10][k]
567*c83a76b0SSuyog Pawar                                                     * pi2_tmp[10 * trans_size]
568*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[14][k]
569*c83a76b0SSuyog Pawar                                                     * pi2_tmp[14 * trans_size];
570*c83a76b0SSuyog Pawar                 }
571*c83a76b0SSuyog Pawar                 eeo[0] =
572*c83a76b0SSuyog Pawar                                 g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]
573*c83a76b0SSuyog Pawar                                                 + g_ai2_ihevc_trans_16[12][0]
574*c83a76b0SSuyog Pawar                                                                 * pi2_tmp[12
575*c83a76b0SSuyog Pawar                                                                                 * trans_size];
576*c83a76b0SSuyog Pawar                 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]
577*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size];
578*c83a76b0SSuyog Pawar                 eeo[1] =
579*c83a76b0SSuyog Pawar                                 g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]
580*c83a76b0SSuyog Pawar                                                 + g_ai2_ihevc_trans_16[12][1]
581*c83a76b0SSuyog Pawar                                                                 * pi2_tmp[12
582*c83a76b0SSuyog Pawar                                                                                 * trans_size];
583*c83a76b0SSuyog Pawar                 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]
584*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size];
585*c83a76b0SSuyog Pawar 
586*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
587*c83a76b0SSuyog Pawar                 for(k = 0; k < 2; k++)
588*c83a76b0SSuyog Pawar                 {
589*c83a76b0SSuyog Pawar                     ee[k] = eee[k] + eeo[k];
590*c83a76b0SSuyog Pawar                     ee[k + 2] = eee[1 - k] - eeo[1 - k];
591*c83a76b0SSuyog Pawar                 }
592*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
593*c83a76b0SSuyog Pawar                 {
594*c83a76b0SSuyog Pawar                     e[k] = ee[k] + eo[k];
595*c83a76b0SSuyog Pawar                     e[k + 4] = ee[3 - k] - eo[3 - k];
596*c83a76b0SSuyog Pawar                 }
597*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
598*c83a76b0SSuyog Pawar                 {
599*c83a76b0SSuyog Pawar                     WORD32 itrans_out;
600*c83a76b0SSuyog Pawar                     itrans_out =
601*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
602*c83a76b0SSuyog Pawar                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
603*c83a76b0SSuyog Pawar                     itrans_out =
604*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
605*c83a76b0SSuyog Pawar                     pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
606*c83a76b0SSuyog Pawar                 }
607*c83a76b0SSuyog Pawar                 pi2_tmp++;
608*c83a76b0SSuyog Pawar                 pu1_pred += pred_strd;
609*c83a76b0SSuyog Pawar                 pu1_dst += dst_strd;
610*c83a76b0SSuyog Pawar             }
611*c83a76b0SSuyog Pawar         }
612*c83a76b0SSuyog Pawar         /************************************************************************************************/
613*c83a76b0SSuyog Pawar         /************************************END - IT_RECON_16x16****************************************/
614*c83a76b0SSuyog Pawar         /************************************************************************************************/
615*c83a76b0SSuyog Pawar     }
616*c83a76b0SSuyog Pawar     else  /* All rows of input are non-zero */
617*c83a76b0SSuyog Pawar     {
618*c83a76b0SSuyog Pawar         /* Inverse Transform 1st stage */
619*c83a76b0SSuyog Pawar         /************************************************************************************************/
620*c83a76b0SSuyog Pawar         /**********************************START - IT_RECON_16x16****************************************/
621*c83a76b0SSuyog Pawar         /************************************************************************************************/
622*c83a76b0SSuyog Pawar 
623*c83a76b0SSuyog Pawar         shift = IT_SHIFT_STAGE_1;
624*c83a76b0SSuyog Pawar         add = 1 << (shift - 1);
625*c83a76b0SSuyog Pawar 
626*c83a76b0SSuyog Pawar         for(j = 0; j < row_limit_2nd_stage; j++)
627*c83a76b0SSuyog Pawar         {
628*c83a76b0SSuyog Pawar             /* Checking for Zero Cols */
629*c83a76b0SSuyog Pawar             if((zero_cols & 1) == 1)
630*c83a76b0SSuyog Pawar             {
631*c83a76b0SSuyog Pawar                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
632*c83a76b0SSuyog Pawar             }
633*c83a76b0SSuyog Pawar             else
634*c83a76b0SSuyog Pawar             {
635*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
636*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
637*c83a76b0SSuyog Pawar                 {
638*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd]
639*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[3][k]
640*c83a76b0SSuyog Pawar                                                     * pi2_src[3 * src_strd]
641*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[5][k]
642*c83a76b0SSuyog Pawar                                                     * pi2_src[5 * src_strd]
643*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[7][k]
644*c83a76b0SSuyog Pawar                                                     * pi2_src[7 * src_strd]
645*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[9][k]
646*c83a76b0SSuyog Pawar                                                     * pi2_src[9 * src_strd]
647*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[11][k]
648*c83a76b0SSuyog Pawar                                                     * pi2_src[11 * src_strd]
649*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[13][k]
650*c83a76b0SSuyog Pawar                                                     * pi2_src[13 * src_strd]
651*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[15][k]
652*c83a76b0SSuyog Pawar                                                     * pi2_src[15 * src_strd];
653*c83a76b0SSuyog Pawar                 }
654*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
655*c83a76b0SSuyog Pawar                 {
656*c83a76b0SSuyog Pawar                     eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]
657*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[6][k]
658*c83a76b0SSuyog Pawar                                                     * pi2_src[6 * src_strd]
659*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[10][k]
660*c83a76b0SSuyog Pawar                                                     * pi2_src[10 * src_strd]
661*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[14][k]
662*c83a76b0SSuyog Pawar                                                     * pi2_src[14 * src_strd];
663*c83a76b0SSuyog Pawar                 }
664*c83a76b0SSuyog Pawar                 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd]
665*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_16[12][0]
666*c83a76b0SSuyog Pawar                                                 * pi2_src[12 * src_strd];
667*c83a76b0SSuyog Pawar                 eee[0] =
668*c83a76b0SSuyog Pawar                                 g_ai2_ihevc_trans_16[0][0] * pi2_src[0]
669*c83a76b0SSuyog Pawar                                                 + g_ai2_ihevc_trans_16[8][0]
670*c83a76b0SSuyog Pawar                                                                 * pi2_src[8
671*c83a76b0SSuyog Pawar                                                                                 * src_strd];
672*c83a76b0SSuyog Pawar                 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd]
673*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_16[12][1]
674*c83a76b0SSuyog Pawar                                                 * pi2_src[12 * src_strd];
675*c83a76b0SSuyog Pawar                 eee[1] =
676*c83a76b0SSuyog Pawar                                 g_ai2_ihevc_trans_16[0][1] * pi2_src[0]
677*c83a76b0SSuyog Pawar                                                 + g_ai2_ihevc_trans_16[8][1]
678*c83a76b0SSuyog Pawar                                                                 * pi2_src[8
679*c83a76b0SSuyog Pawar                                                                                 * src_strd];
680*c83a76b0SSuyog Pawar 
681*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
682*c83a76b0SSuyog Pawar                 for(k = 0; k < 2; k++)
683*c83a76b0SSuyog Pawar                 {
684*c83a76b0SSuyog Pawar                     ee[k] = eee[k] + eeo[k];
685*c83a76b0SSuyog Pawar                     ee[k + 2] = eee[1 - k] - eeo[1 - k];
686*c83a76b0SSuyog Pawar                 }
687*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
688*c83a76b0SSuyog Pawar                 {
689*c83a76b0SSuyog Pawar                     e[k] = ee[k] + eo[k];
690*c83a76b0SSuyog Pawar                     e[k + 4] = ee[3 - k] - eo[3 - k];
691*c83a76b0SSuyog Pawar                 }
692*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
693*c83a76b0SSuyog Pawar                 {
694*c83a76b0SSuyog Pawar                     pi2_tmp[k] =
695*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
696*c83a76b0SSuyog Pawar                     pi2_tmp[k + 8] =
697*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
698*c83a76b0SSuyog Pawar                 }
699*c83a76b0SSuyog Pawar             }
700*c83a76b0SSuyog Pawar             pi2_src++;
701*c83a76b0SSuyog Pawar             pi2_tmp += trans_size;
702*c83a76b0SSuyog Pawar             zero_cols = zero_cols >> 1;
703*c83a76b0SSuyog Pawar         }
704*c83a76b0SSuyog Pawar 
705*c83a76b0SSuyog Pawar         pi2_tmp = pi2_tmp_orig;
706*c83a76b0SSuyog Pawar 
707*c83a76b0SSuyog Pawar         /* Inverse Transform 2nd stage */
708*c83a76b0SSuyog Pawar         shift = IT_SHIFT_STAGE_2;
709*c83a76b0SSuyog Pawar         add = 1 << (shift - 1);
710*c83a76b0SSuyog Pawar 
711*c83a76b0SSuyog Pawar         if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */
712*c83a76b0SSuyog Pawar         {
713*c83a76b0SSuyog Pawar             for(j = 0; j < trans_size; j++)
714*c83a76b0SSuyog Pawar             {
715*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
716*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
717*c83a76b0SSuyog Pawar                 {
718*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
719*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[3][k]
720*c83a76b0SSuyog Pawar                                                     * pi2_tmp[3 * trans_size];
721*c83a76b0SSuyog Pawar                 }
722*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
723*c83a76b0SSuyog Pawar                 {
724*c83a76b0SSuyog Pawar                     eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size];
725*c83a76b0SSuyog Pawar                 }
726*c83a76b0SSuyog Pawar                 eeo[0] = 0;
727*c83a76b0SSuyog Pawar                 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
728*c83a76b0SSuyog Pawar                 eeo[1] = 0;
729*c83a76b0SSuyog Pawar                 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
730*c83a76b0SSuyog Pawar 
731*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
732*c83a76b0SSuyog Pawar                 for(k = 0; k < 2; k++)
733*c83a76b0SSuyog Pawar                 {
734*c83a76b0SSuyog Pawar                     ee[k] = eee[k] + eeo[k];
735*c83a76b0SSuyog Pawar                     ee[k + 2] = eee[1 - k] - eeo[1 - k];
736*c83a76b0SSuyog Pawar                 }
737*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
738*c83a76b0SSuyog Pawar                 {
739*c83a76b0SSuyog Pawar                     e[k] = ee[k] + eo[k];
740*c83a76b0SSuyog Pawar                     e[k + 4] = ee[3 - k] - eo[3 - k];
741*c83a76b0SSuyog Pawar                 }
742*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
743*c83a76b0SSuyog Pawar                 {
744*c83a76b0SSuyog Pawar                     WORD32 itrans_out;
745*c83a76b0SSuyog Pawar                     itrans_out =
746*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
747*c83a76b0SSuyog Pawar                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
748*c83a76b0SSuyog Pawar                     itrans_out =
749*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
750*c83a76b0SSuyog Pawar                     pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
751*c83a76b0SSuyog Pawar                 }
752*c83a76b0SSuyog Pawar                 pi2_tmp++;
753*c83a76b0SSuyog Pawar                 pu1_pred += pred_strd;
754*c83a76b0SSuyog Pawar                 pu1_dst += dst_strd;
755*c83a76b0SSuyog Pawar             }
756*c83a76b0SSuyog Pawar         }
757*c83a76b0SSuyog Pawar         else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */
758*c83a76b0SSuyog Pawar         {
759*c83a76b0SSuyog Pawar             for(j = 0; j < trans_size; j++)
760*c83a76b0SSuyog Pawar             {
761*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
762*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
763*c83a76b0SSuyog Pawar                 {
764*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
765*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[3][k]
766*c83a76b0SSuyog Pawar                                                     * pi2_tmp[3 * trans_size]
767*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[5][k]
768*c83a76b0SSuyog Pawar                                                     * pi2_tmp[5 * trans_size]
769*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[7][k]
770*c83a76b0SSuyog Pawar                                                     * pi2_tmp[7 * trans_size];
771*c83a76b0SSuyog Pawar                 }
772*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
773*c83a76b0SSuyog Pawar                 {
774*c83a76b0SSuyog Pawar                     eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
775*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[6][k]
776*c83a76b0SSuyog Pawar                                                     * pi2_tmp[6 * trans_size];
777*c83a76b0SSuyog Pawar                 }
778*c83a76b0SSuyog Pawar                 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size];
779*c83a76b0SSuyog Pawar                 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
780*c83a76b0SSuyog Pawar                 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size];
781*c83a76b0SSuyog Pawar                 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
782*c83a76b0SSuyog Pawar 
783*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
784*c83a76b0SSuyog Pawar                 for(k = 0; k < 2; k++)
785*c83a76b0SSuyog Pawar                 {
786*c83a76b0SSuyog Pawar                     ee[k] = eee[k] + eeo[k];
787*c83a76b0SSuyog Pawar                     ee[k + 2] = eee[1 - k] - eeo[1 - k];
788*c83a76b0SSuyog Pawar                 }
789*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
790*c83a76b0SSuyog Pawar                 {
791*c83a76b0SSuyog Pawar                     e[k] = ee[k] + eo[k];
792*c83a76b0SSuyog Pawar                     e[k + 4] = ee[3 - k] - eo[3 - k];
793*c83a76b0SSuyog Pawar                 }
794*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
795*c83a76b0SSuyog Pawar                 {
796*c83a76b0SSuyog Pawar                     WORD32 itrans_out;
797*c83a76b0SSuyog Pawar                     itrans_out =
798*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
799*c83a76b0SSuyog Pawar                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
800*c83a76b0SSuyog Pawar                     itrans_out =
801*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
802*c83a76b0SSuyog Pawar                     pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
803*c83a76b0SSuyog Pawar                 }
804*c83a76b0SSuyog Pawar                 pi2_tmp++;
805*c83a76b0SSuyog Pawar                 pu1_pred += pred_strd;
806*c83a76b0SSuyog Pawar                 pu1_dst += dst_strd;
807*c83a76b0SSuyog Pawar             }
808*c83a76b0SSuyog Pawar         }
809*c83a76b0SSuyog Pawar         else /* All rows of output of 1st stage are non-zero */
810*c83a76b0SSuyog Pawar         {
811*c83a76b0SSuyog Pawar             for(j = 0; j < trans_size; j++)
812*c83a76b0SSuyog Pawar             {
813*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
814*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
815*c83a76b0SSuyog Pawar                 {
816*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
817*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[3][k]
818*c83a76b0SSuyog Pawar                                                     * pi2_tmp[3 * trans_size]
819*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[5][k]
820*c83a76b0SSuyog Pawar                                                     * pi2_tmp[5 * trans_size]
821*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[7][k]
822*c83a76b0SSuyog Pawar                                                     * pi2_tmp[7 * trans_size]
823*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[9][k]
824*c83a76b0SSuyog Pawar                                                     * pi2_tmp[9 * trans_size]
825*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[11][k]
826*c83a76b0SSuyog Pawar                                                     * pi2_tmp[11 * trans_size]
827*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[13][k]
828*c83a76b0SSuyog Pawar                                                     * pi2_tmp[13 * trans_size]
829*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[15][k]
830*c83a76b0SSuyog Pawar                                                     * pi2_tmp[15 * trans_size];
831*c83a76b0SSuyog Pawar                 }
832*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
833*c83a76b0SSuyog Pawar                 {
834*c83a76b0SSuyog Pawar                     eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
835*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[6][k]
836*c83a76b0SSuyog Pawar                                                     * pi2_tmp[6 * trans_size]
837*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[10][k]
838*c83a76b0SSuyog Pawar                                                     * pi2_tmp[10 * trans_size]
839*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_16[14][k]
840*c83a76b0SSuyog Pawar                                                     * pi2_tmp[14 * trans_size];
841*c83a76b0SSuyog Pawar                 }
842*c83a76b0SSuyog Pawar                 eeo[0] =
843*c83a76b0SSuyog Pawar                                 g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]
844*c83a76b0SSuyog Pawar                                                 + g_ai2_ihevc_trans_16[12][0]
845*c83a76b0SSuyog Pawar                                                                 * pi2_tmp[12
846*c83a76b0SSuyog Pawar                                                                                 * trans_size];
847*c83a76b0SSuyog Pawar                 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]
848*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size];
849*c83a76b0SSuyog Pawar                 eeo[1] =
850*c83a76b0SSuyog Pawar                                 g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]
851*c83a76b0SSuyog Pawar                                                 + g_ai2_ihevc_trans_16[12][1]
852*c83a76b0SSuyog Pawar                                                                 * pi2_tmp[12
853*c83a76b0SSuyog Pawar                                                                                 * trans_size];
854*c83a76b0SSuyog Pawar                 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]
855*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size];
856*c83a76b0SSuyog Pawar 
857*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
858*c83a76b0SSuyog Pawar                 for(k = 0; k < 2; k++)
859*c83a76b0SSuyog Pawar                 {
860*c83a76b0SSuyog Pawar                     ee[k] = eee[k] + eeo[k];
861*c83a76b0SSuyog Pawar                     ee[k + 2] = eee[1 - k] - eeo[1 - k];
862*c83a76b0SSuyog Pawar                 }
863*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
864*c83a76b0SSuyog Pawar                 {
865*c83a76b0SSuyog Pawar                     e[k] = ee[k] + eo[k];
866*c83a76b0SSuyog Pawar                     e[k + 4] = ee[3 - k] - eo[3 - k];
867*c83a76b0SSuyog Pawar                 }
868*c83a76b0SSuyog Pawar                 for(k = 0; k < 8; k++)
869*c83a76b0SSuyog Pawar                 {
870*c83a76b0SSuyog Pawar                     WORD32 itrans_out;
871*c83a76b0SSuyog Pawar                     itrans_out =
872*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
873*c83a76b0SSuyog Pawar                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
874*c83a76b0SSuyog Pawar                     itrans_out =
875*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
876*c83a76b0SSuyog Pawar                     pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
877*c83a76b0SSuyog Pawar                 }
878*c83a76b0SSuyog Pawar                 pi2_tmp++;
879*c83a76b0SSuyog Pawar                 pu1_pred += pred_strd;
880*c83a76b0SSuyog Pawar                 pu1_dst += dst_strd;
881*c83a76b0SSuyog Pawar             }
882*c83a76b0SSuyog Pawar         }
883*c83a76b0SSuyog Pawar         /************************************************************************************************/
884*c83a76b0SSuyog Pawar         /************************************END - IT_RECON_16x16****************************************/
885*c83a76b0SSuyog Pawar         /************************************************************************************************/
886*c83a76b0SSuyog Pawar     }
887*c83a76b0SSuyog Pawar 
888*c83a76b0SSuyog Pawar }
889*c83a76b0SSuyog Pawar 
890