xref: /aosp_15_r20/external/libhevc/encoder/hme_coarse.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar  *
3*c83a76b0SSuyog Pawar  * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar  *
5*c83a76b0SSuyog Pawar  * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar  * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar  * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar  *
9*c83a76b0SSuyog Pawar  * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar  *
11*c83a76b0SSuyog Pawar  * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar  * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar  * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar  * limitations under the License.
16*c83a76b0SSuyog Pawar  *
17*c83a76b0SSuyog Pawar  *****************************************************************************
18*c83a76b0SSuyog Pawar  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar 
21*c83a76b0SSuyog Pawar /**
22*c83a76b0SSuyog Pawar ******************************************************************************
23*c83a76b0SSuyog Pawar * @file hme_coarse.c
24*c83a76b0SSuyog Pawar *
25*c83a76b0SSuyog Pawar * @brief
26*c83a76b0SSuyog Pawar *    Contains ME algorithm for the coarse layer.
27*c83a76b0SSuyog Pawar *
28*c83a76b0SSuyog Pawar * @author
29*c83a76b0SSuyog Pawar *    Ittiam
30*c83a76b0SSuyog Pawar *
31*c83a76b0SSuyog Pawar *
32*c83a76b0SSuyog Pawar * List of Functions
33*c83a76b0SSuyog Pawar * hme_update_mv_bank_coarse()
34*c83a76b0SSuyog Pawar * hme_coarse()
35*c83a76b0SSuyog Pawar ******************************************************************************
36*c83a76b0SSuyog Pawar */
37*c83a76b0SSuyog Pawar 
38*c83a76b0SSuyog Pawar /*****************************************************************************/
39*c83a76b0SSuyog Pawar /* File Includes                                                             */
40*c83a76b0SSuyog Pawar /*****************************************************************************/
41*c83a76b0SSuyog Pawar /* System include files */
42*c83a76b0SSuyog Pawar #include <stdio.h>
43*c83a76b0SSuyog Pawar #include <string.h>
44*c83a76b0SSuyog Pawar #include <stdlib.h>
45*c83a76b0SSuyog Pawar #include <assert.h>
46*c83a76b0SSuyog Pawar #include <stdarg.h>
47*c83a76b0SSuyog Pawar #include <math.h>
48*c83a76b0SSuyog Pawar #include <limits.h>
49*c83a76b0SSuyog Pawar 
50*c83a76b0SSuyog Pawar /* User include files */
51*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
52*c83a76b0SSuyog Pawar #include "itt_video_api.h"
53*c83a76b0SSuyog Pawar #include "ihevce_api.h"
54*c83a76b0SSuyog Pawar 
55*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
56*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
57*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
58*c83a76b0SSuyog Pawar 
59*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
60*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
61*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
62*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
63*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
64*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
65*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
66*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
67*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
68*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
69*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
70*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
71*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
72*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
73*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
74*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
75*c83a76b0SSuyog Pawar 
76*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
77*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
78*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
79*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_funcs.h"
80*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
81*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
82*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
83*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
84*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
85*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
86*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
87*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
88*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
89*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
90*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
91*c83a76b0SSuyog Pawar #include "ihevce_bs_compute_ctb.h"
92*c83a76b0SSuyog Pawar #include "ihevce_global_tables.h"
93*c83a76b0SSuyog Pawar #include "ihevce_dep_mngr_interface.h"
94*c83a76b0SSuyog Pawar #include "hme_datatype.h"
95*c83a76b0SSuyog Pawar #include "hme_interface.h"
96*c83a76b0SSuyog Pawar #include "hme_common_defs.h"
97*c83a76b0SSuyog Pawar #include "hme_defs.h"
98*c83a76b0SSuyog Pawar #include "ihevce_me_instr_set_router.h"
99*c83a76b0SSuyog Pawar #include "hme_globals.h"
100*c83a76b0SSuyog Pawar #include "hme_utils.h"
101*c83a76b0SSuyog Pawar #include "hme_coarse.h"
102*c83a76b0SSuyog Pawar #include "hme_refine.h"
103*c83a76b0SSuyog Pawar #include "hme_err_compute.h"
104*c83a76b0SSuyog Pawar #include "hme_common_utils.h"
105*c83a76b0SSuyog Pawar #include "hme_search_algo.h"
106*c83a76b0SSuyog Pawar 
107*c83a76b0SSuyog Pawar /*******************************************************************************
108*c83a76b0SSuyog Pawar *                             MACROS
109*c83a76b0SSuyog Pawar *******************************************************************************/
110*c83a76b0SSuyog Pawar #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift)                              \
111*c83a76b0SSuyog Pawar     {                                                                                              \
112*c83a76b0SSuyog Pawar         ps_mv->i2_mv_x = ps_search_node->s_mv.i2_mvx >> (shift);                                   \
113*c83a76b0SSuyog Pawar         ps_mv->i2_mv_y = ps_search_node->s_mv.i2_mvy >> (shift);                                   \
114*c83a76b0SSuyog Pawar         *pi1_ref_idx = ps_search_node->i1_ref_idx;                                                 \
115*c83a76b0SSuyog Pawar     }
116*c83a76b0SSuyog Pawar 
117*c83a76b0SSuyog Pawar /*****************************************************************************/
118*c83a76b0SSuyog Pawar /* Function Definitions                                                      */
119*c83a76b0SSuyog Pawar /*****************************************************************************/
120*c83a76b0SSuyog Pawar 
121*c83a76b0SSuyog Pawar /**
122*c83a76b0SSuyog Pawar ********************************************************************************
123*c83a76b0SSuyog Pawar *  @fn     void hme_update_mv_bank_coarse(search_results_t *ps_search_results,
124*c83a76b0SSuyog Pawar *                                   layer_mv_t *ps_layer_mv,
125*c83a76b0SSuyog Pawar *                                   S32 i4_blk_x,
126*c83a76b0SSuyog Pawar *                                   S32 i4_blk_y,
127*c83a76b0SSuyog Pawar *                                   search_node_t *ps_search_node_4x8_l,
128*c83a76b0SSuyog Pawar *                                   search_node_t *ps_search_node_8x4_t,
129*c83a76b0SSuyog Pawar *                                   S08 i1_ref_idx,
130*c83a76b0SSuyog Pawar *                                   mvbank_update_prms_t *ps_prms
131*c83a76b0SSuyog Pawar *
132*c83a76b0SSuyog Pawar *  @brief  Updates the coarse layer MV Bank for a given ref id and blk pos
133*c83a76b0SSuyog Pawar *
134*c83a76b0SSuyog Pawar *  @param[in]  ps_search_results: Search results data structure
135*c83a76b0SSuyog Pawar *
136*c83a76b0SSuyog Pawar *  @param[in, out]  ps_layer_mv : MV Bank for this layer
137*c83a76b0SSuyog Pawar *
138*c83a76b0SSuyog Pawar *  @param[in]  i4_search_blk_x: column number of the 4x4 blk searched
139*c83a76b0SSuyog Pawar *
140*c83a76b0SSuyog Pawar *  @param[in]  i4_search_blk_y: row number of the 4x4 blk searched
141*c83a76b0SSuyog Pawar *
142*c83a76b0SSuyog Pawar *  @param[in]  ps_search_node_4x8_t: Best MV of the 4x8T blk
143*c83a76b0SSuyog Pawar *
144*c83a76b0SSuyog Pawar *  @param[in]  ps_search_node_8x4_l: Best MV of the 8x4L blk
145*c83a76b0SSuyog Pawar *
146*c83a76b0SSuyog Pawar *  @param[in]  i1_ref_idx : Reference ID that has been searched
147*c83a76b0SSuyog Pawar *
148*c83a76b0SSuyog Pawar *  @param[in]  ps_prms : Parameters pertaining to the MV Bank update
149*c83a76b0SSuyog Pawar *
150*c83a76b0SSuyog Pawar *  @return None
151*c83a76b0SSuyog Pawar ********************************************************************************
152*c83a76b0SSuyog Pawar */
hme_update_mv_bank_coarse(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,search_node_t * ps_search_node_4x8_t,search_node_t * ps_search_node_8x4_l,S08 i1_ref_idx,mvbank_update_prms_t * ps_prms)153*c83a76b0SSuyog Pawar void hme_update_mv_bank_coarse(
154*c83a76b0SSuyog Pawar     search_results_t *ps_search_results,
155*c83a76b0SSuyog Pawar     layer_mv_t *ps_layer_mv,
156*c83a76b0SSuyog Pawar     S32 i4_search_blk_x,
157*c83a76b0SSuyog Pawar     S32 i4_search_blk_y,
158*c83a76b0SSuyog Pawar     search_node_t *ps_search_node_4x8_t,
159*c83a76b0SSuyog Pawar     search_node_t *ps_search_node_8x4_l,
160*c83a76b0SSuyog Pawar     S08 i1_ref_idx,
161*c83a76b0SSuyog Pawar     mvbank_update_prms_t *ps_prms)
162*c83a76b0SSuyog Pawar {
163*c83a76b0SSuyog Pawar     /* These point to the MV and ref idx posn to be udpated */
164*c83a76b0SSuyog Pawar     hme_mv_t *ps_mv;
165*c83a76b0SSuyog Pawar     S08 *pi1_ref_idx;
166*c83a76b0SSuyog Pawar 
167*c83a76b0SSuyog Pawar     /* Offset within the bank */
168*c83a76b0SSuyog Pawar     S32 i4_offset;
169*c83a76b0SSuyog Pawar 
170*c83a76b0SSuyog Pawar     S32 i, j, i4_blk_x, i4_blk_y;
171*c83a76b0SSuyog Pawar 
172*c83a76b0SSuyog Pawar     /* Best results for 8x4R and 4x8B blocks */
173*c83a76b0SSuyog Pawar     search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b;
174*c83a76b0SSuyog Pawar 
175*c83a76b0SSuyog Pawar     /* Number of MVs in a block */
176*c83a76b0SSuyog Pawar     S32 num_mvs = ps_layer_mv->i4_num_mvs_per_ref;
177*c83a76b0SSuyog Pawar 
178*c83a76b0SSuyog Pawar     search_node_t *aps_search_nodes[4];
179*c83a76b0SSuyog Pawar 
180*c83a76b0SSuyog Pawar     /* The search blk may be different in size from the blk used to hold MV */
181*c83a76b0SSuyog Pawar     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
182*c83a76b0SSuyog Pawar     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
183*c83a76b0SSuyog Pawar 
184*c83a76b0SSuyog Pawar     /* Compute the offset in the MV bank */
185*c83a76b0SSuyog Pawar     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
186*c83a76b0SSuyog Pawar     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
187*c83a76b0SSuyog Pawar 
188*c83a76b0SSuyog Pawar     /* Identify the correct offset in the mvbank and the reference id buf */
189*c83a76b0SSuyog Pawar     ps_mv = ps_layer_mv->ps_mv + (i4_offset + (num_mvs * i1_ref_idx));
190*c83a76b0SSuyog Pawar     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + (i4_offset + (num_mvs * i1_ref_idx));
191*c83a76b0SSuyog Pawar 
192*c83a76b0SSuyog Pawar     /*************************************************************************/
193*c83a76b0SSuyog Pawar     /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */
194*c83a76b0SSuyog Pawar     /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp.      */
195*c83a76b0SSuyog Pawar     /* If number of results to be stored is 4, then we store all these 4     */
196*c83a76b0SSuyog Pawar     /* results, else we pick best ones                                       */
197*c83a76b0SSuyog Pawar     /*************************************************************************/
198*c83a76b0SSuyog Pawar     ps_search_node_8x4_r = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B];
199*c83a76b0SSuyog Pawar     ps_search_node_4x8_b = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R];
200*c83a76b0SSuyog Pawar 
201*c83a76b0SSuyog Pawar     ASSERT(num_mvs <= 4);
202*c83a76b0SSuyog Pawar 
203*c83a76b0SSuyog Pawar     /* Doing this to sort best results */
204*c83a76b0SSuyog Pawar     aps_search_nodes[0] = ps_search_node_8x4_r;
205*c83a76b0SSuyog Pawar     aps_search_nodes[1] = ps_search_node_4x8_b;
206*c83a76b0SSuyog Pawar     aps_search_nodes[2] = ps_search_node_8x4_l;
207*c83a76b0SSuyog Pawar     aps_search_nodes[3] = ps_search_node_4x8_t;
208*c83a76b0SSuyog Pawar     if(num_mvs == 4)
209*c83a76b0SSuyog Pawar     {
210*c83a76b0SSuyog Pawar         COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[0], 0);
211*c83a76b0SSuyog Pawar         ps_mv++;
212*c83a76b0SSuyog Pawar         pi1_ref_idx++;
213*c83a76b0SSuyog Pawar         COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[1], 0);
214*c83a76b0SSuyog Pawar         ps_mv++;
215*c83a76b0SSuyog Pawar         pi1_ref_idx++;
216*c83a76b0SSuyog Pawar         COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[2], 0);
217*c83a76b0SSuyog Pawar         ps_mv++;
218*c83a76b0SSuyog Pawar         pi1_ref_idx++;
219*c83a76b0SSuyog Pawar         COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[3], 0);
220*c83a76b0SSuyog Pawar         ps_mv++;
221*c83a76b0SSuyog Pawar         pi1_ref_idx++;
222*c83a76b0SSuyog Pawar         return;
223*c83a76b0SSuyog Pawar     }
224*c83a76b0SSuyog Pawar 
225*c83a76b0SSuyog Pawar     /* Run through the results, store them in best to worst order */
226*c83a76b0SSuyog Pawar     for(i = 0; i < num_mvs; i++)
227*c83a76b0SSuyog Pawar     {
228*c83a76b0SSuyog Pawar         for(j = i + 1; j < 4; j++)
229*c83a76b0SSuyog Pawar         {
230*c83a76b0SSuyog Pawar             if(aps_search_nodes[j]->i4_tot_cost < aps_search_nodes[i]->i4_tot_cost)
231*c83a76b0SSuyog Pawar             {
232*c83a76b0SSuyog Pawar                 SWAP_HME(aps_search_nodes[j], aps_search_nodes[i], search_node_t *);
233*c83a76b0SSuyog Pawar             }
234*c83a76b0SSuyog Pawar         }
235*c83a76b0SSuyog Pawar         COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[i], 0);
236*c83a76b0SSuyog Pawar         ps_mv++;
237*c83a76b0SSuyog Pawar         pi1_ref_idx++;
238*c83a76b0SSuyog Pawar     }
239*c83a76b0SSuyog Pawar }
240*c83a76b0SSuyog Pawar 
241*c83a76b0SSuyog Pawar /**
242*c83a76b0SSuyog Pawar ********************************************************************************
243*c83a76b0SSuyog Pawar *  @fn     void hme_coarse_frm_init(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
244*c83a76b0SSuyog Pawar *
245*c83a76b0SSuyog Pawar *  @brief  Frame init entry point Coarse ME.
246*c83a76b0SSuyog Pawar *
247*c83a76b0SSuyog Pawar *  @param[in,out]  ps_ctxt: ME Handle
248*c83a76b0SSuyog Pawar *
249*c83a76b0SSuyog Pawar *  @param[in]  ps_coarse_prms : Coarse layer config params
250*c83a76b0SSuyog Pawar *
251*c83a76b0SSuyog Pawar *  @return None
252*c83a76b0SSuyog Pawar ********************************************************************************
253*c83a76b0SSuyog Pawar */
hme_coarse_frm_init(coarse_me_ctxt_t * ps_ctxt,coarse_prms_t * ps_coarse_prms)254*c83a76b0SSuyog Pawar void hme_coarse_frm_init(coarse_me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
255*c83a76b0SSuyog Pawar {
256*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_curr_layer;
257*c83a76b0SSuyog Pawar 
258*c83a76b0SSuyog Pawar     S32 i4_pic_wd, i4_pic_ht;
259*c83a76b0SSuyog Pawar 
260*c83a76b0SSuyog Pawar     S32 num_blks_in_pic, num_blks_in_row;
261*c83a76b0SSuyog Pawar 
262*c83a76b0SSuyog Pawar     BLK_SIZE_T e_search_blk_size = BLK_4x4;
263*c83a76b0SSuyog Pawar 
264*c83a76b0SSuyog Pawar     S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4;
265*c83a76b0SSuyog Pawar 
266*c83a76b0SSuyog Pawar     /* Number of references to search */
267*c83a76b0SSuyog Pawar     S32 i4_num_ref;
268*c83a76b0SSuyog Pawar 
269*c83a76b0SSuyog Pawar     ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id];
270*c83a76b0SSuyog Pawar     i4_num_ref = ps_coarse_prms->i4_num_ref;
271*c83a76b0SSuyog Pawar 
272*c83a76b0SSuyog Pawar     i4_pic_wd = ps_curr_layer->i4_wd;
273*c83a76b0SSuyog Pawar     i4_pic_ht = ps_curr_layer->i4_ht;
274*c83a76b0SSuyog Pawar     /* Macro updates num_blks_in_pic and num_blks_in_row*/
275*c83a76b0SSuyog Pawar     GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
276*c83a76b0SSuyog Pawar 
277*c83a76b0SSuyog Pawar     /************************************************************************/
278*c83a76b0SSuyog Pawar     /* Initialize the mv bank that holds results of this layer.             */
279*c83a76b0SSuyog Pawar     /************************************************************************/
280*c83a76b0SSuyog Pawar     hme_init_mv_bank(
281*c83a76b0SSuyog Pawar         ps_curr_layer,
282*c83a76b0SSuyog Pawar         BLK_4x4,
283*c83a76b0SSuyog Pawar         i4_num_ref,
284*c83a76b0SSuyog Pawar         ps_coarse_prms->num_results,
285*c83a76b0SSuyog Pawar         ps_ctxt->u1_encode[ps_coarse_prms->i4_layer_id]);
286*c83a76b0SSuyog Pawar 
287*c83a76b0SSuyog Pawar     return;
288*c83a76b0SSuyog Pawar }
289*c83a76b0SSuyog Pawar 
290*c83a76b0SSuyog Pawar /**
291*c83a76b0SSuyog Pawar ********************************************************************************
292*c83a76b0SSuyog Pawar *  @fn    void hme_derive_worst_case_search_range(range_prms_t *ps_range,
293*c83a76b0SSuyog Pawar *                                   range_prms_t *ps_pic_limit,
294*c83a76b0SSuyog Pawar *                                   range_prms_t *ps_mv_limit,
295*c83a76b0SSuyog Pawar *                                   S32 i4_x,
296*c83a76b0SSuyog Pawar *                                   S32 i4_y,
297*c83a76b0SSuyog Pawar *                                   S32 blk_wd,
298*c83a76b0SSuyog Pawar *                                   S32 blk_ht)
299*c83a76b0SSuyog Pawar *
300*c83a76b0SSuyog Pawar *  @brief  given picture limits and blk dimensions and mv search limits, obtains
301*c83a76b0SSuyog Pawar *          teh valid search range such that the blk stays within pic boundaries,
302*c83a76b0SSuyog Pawar *          where picture boundaries include padded portions of picture
303*c83a76b0SSuyog Pawar *
304*c83a76b0SSuyog Pawar *  @param[out] ps_range: updated with actual search range
305*c83a76b0SSuyog Pawar *
306*c83a76b0SSuyog Pawar *  @param[in] ps_pic_limit : picture boundaries
307*c83a76b0SSuyog Pawar *
308*c83a76b0SSuyog Pawar *  @param[in] ps_mv_limit: Search range limits for the mvs
309*c83a76b0SSuyog Pawar *
310*c83a76b0SSuyog Pawar *  @param[in] i4_x : x coordinate of the blk
311*c83a76b0SSuyog Pawar *
312*c83a76b0SSuyog Pawar *  @param[in] i4_y : y coordinate of the blk
313*c83a76b0SSuyog Pawar *
314*c83a76b0SSuyog Pawar *  @param[in] blk_wd : blk width
315*c83a76b0SSuyog Pawar *
316*c83a76b0SSuyog Pawar *  @param[in] blk_ht : blk height
317*c83a76b0SSuyog Pawar *
318*c83a76b0SSuyog Pawar *  @return void
319*c83a76b0SSuyog Pawar ********************************************************************************
320*c83a76b0SSuyog Pawar */
hme_derive_worst_case_search_range(range_prms_t * ps_range,range_prms_t * ps_pic_limit,range_prms_t * ps_mv_limit,S32 i4_x,S32 i4_y,S32 blk_wd,S32 blk_ht)321*c83a76b0SSuyog Pawar void hme_derive_worst_case_search_range(
322*c83a76b0SSuyog Pawar     range_prms_t *ps_range,
323*c83a76b0SSuyog Pawar     range_prms_t *ps_pic_limit,
324*c83a76b0SSuyog Pawar     range_prms_t *ps_mv_limit,
325*c83a76b0SSuyog Pawar     S32 i4_x,
326*c83a76b0SSuyog Pawar     S32 i4_y,
327*c83a76b0SSuyog Pawar     S32 blk_wd,
328*c83a76b0SSuyog Pawar     S32 blk_ht)
329*c83a76b0SSuyog Pawar {
330*c83a76b0SSuyog Pawar     /* Taking max x of left block, min x of current block */
331*c83a76b0SSuyog Pawar     ps_range->i2_max_x =
332*c83a76b0SSuyog Pawar         MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)(i4_x - 4)), ps_mv_limit->i2_max_x);
333*c83a76b0SSuyog Pawar     ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x);
334*c83a76b0SSuyog Pawar     /* Taking max y of top block, min y of current block */
335*c83a76b0SSuyog Pawar     ps_range->i2_max_y =
336*c83a76b0SSuyog Pawar         MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)(i4_y - 4)), ps_mv_limit->i2_max_y);
337*c83a76b0SSuyog Pawar     ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y);
338*c83a76b0SSuyog Pawar }
339*c83a76b0SSuyog Pawar 
340*c83a76b0SSuyog Pawar /**
341*c83a76b0SSuyog Pawar ********************************************************************************
342*c83a76b0SSuyog Pawar * @fn void hme_combine_4x4_sads_and_compute_cost(S08 i1_ref_idx,
343*c83a76b0SSuyog Pawar *                                           range_prms_t *ps_mv_range,
344*c83a76b0SSuyog Pawar *                                           range_prms_t *ps_mv_limit,
345*c83a76b0SSuyog Pawar *                                           hme_mv_t *ps_best_mv_4x8,
346*c83a76b0SSuyog Pawar *                                           hme_mv_t *ps_best_mv_8x4,
347*c83a76b0SSuyog Pawar *                                           pred_ctxt_t *ps_pred_ctxt,
348*c83a76b0SSuyog Pawar *                                           PF_MV_COST_FXN pf_mv_cost_compute,
349*c83a76b0SSuyog Pawar *                                           ME_QUALITY_PRESETS_T e_me_quality_preset,
350*c83a76b0SSuyog Pawar *                                           S16 *pi2_sads_4x4_current,
351*c83a76b0SSuyog Pawar *                                           S16 *pi2_sads_4x4_east,
352*c83a76b0SSuyog Pawar *                                           S16 *pi2_sads_4x4_south,
353*c83a76b0SSuyog Pawar *                                           FILE *fp_dump_sad)
354*c83a76b0SSuyog Pawar *
355*c83a76b0SSuyog Pawar *  @brief  Does a full search on entire srch window with a given step size in coarse layer
356*c83a76b0SSuyog Pawar *
357*c83a76b0SSuyog Pawar *  @param[in] i1_ref_idx : Cur ref idx
358*c83a76b0SSuyog Pawar *
359*c83a76b0SSuyog Pawar *  @param[in] ps_layer_ctxt: All info about this layer
360*c83a76b0SSuyog Pawar *
361*c83a76b0SSuyog Pawar *  @param[out] ps_best_mv  : type hme_mv_t contains best mv x and y
362*c83a76b0SSuyog Pawar *
363*c83a76b0SSuyog Pawar *  @param[in] ps_pred_ctxt : Prediction ctxt for cost computation
364*c83a76b0SSuyog Pawar *
365*c83a76b0SSuyog Pawar *  @param[in] pf_mv_cost_compute : mv cost computation function
366*c83a76b0SSuyog Pawar *
367*c83a76b0SSuyog Pawar *  @return void
368*c83a76b0SSuyog Pawar ********************************************************************************
369*c83a76b0SSuyog Pawar */
hme_combine_4x4_sads_and_compute_cost_high_quality(S08 i1_ref_idx,range_prms_t * ps_mv_range,range_prms_t * ps_mv_limit,hme_mv_t * ps_best_mv_4x8,hme_mv_t * ps_best_mv_8x4,pred_ctxt_t * ps_pred_ctxt,PF_MV_COST_FXN pf_mv_cost_compute,S16 * pi2_sads_4x4_current,S16 * pi2_sads_4x4_east,S16 * pi2_sads_4x4_south)370*c83a76b0SSuyog Pawar void hme_combine_4x4_sads_and_compute_cost_high_quality(
371*c83a76b0SSuyog Pawar     S08 i1_ref_idx,
372*c83a76b0SSuyog Pawar     range_prms_t *ps_mv_range,
373*c83a76b0SSuyog Pawar     range_prms_t *ps_mv_limit,
374*c83a76b0SSuyog Pawar     hme_mv_t *ps_best_mv_4x8,
375*c83a76b0SSuyog Pawar     hme_mv_t *ps_best_mv_8x4,
376*c83a76b0SSuyog Pawar     pred_ctxt_t *ps_pred_ctxt,
377*c83a76b0SSuyog Pawar     PF_MV_COST_FXN pf_mv_cost_compute,
378*c83a76b0SSuyog Pawar     S16 *pi2_sads_4x4_current,
379*c83a76b0SSuyog Pawar     S16 *pi2_sads_4x4_east,
380*c83a76b0SSuyog Pawar     S16 *pi2_sads_4x4_south)
381*c83a76b0SSuyog Pawar {
382*c83a76b0SSuyog Pawar     /* These control number of parts and number of pts in grid to search */
383*c83a76b0SSuyog Pawar     S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4;
384*c83a76b0SSuyog Pawar     S32 step_shift_x, step_shift_y;
385*c83a76b0SSuyog Pawar     S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
386*c83a76b0SSuyog Pawar 
387*c83a76b0SSuyog Pawar     S32 min_cost_4x8 = MAX_32BIT_VAL;
388*c83a76b0SSuyog Pawar     S32 min_cost_8x4 = MAX_32BIT_VAL;
389*c83a76b0SSuyog Pawar 
390*c83a76b0SSuyog Pawar     search_node_t s_search_node;
391*c83a76b0SSuyog Pawar     s_search_node.i1_ref_idx = i1_ref_idx;
392*c83a76b0SSuyog Pawar 
393*c83a76b0SSuyog Pawar     stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
394*c83a76b0SSuyog Pawar     /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */
395*c83a76b0SSuyog Pawar     step_shift_x = step_shift_y = 1;
396*c83a76b0SSuyog Pawar 
397*c83a76b0SSuyog Pawar     mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x);
398*c83a76b0SSuyog Pawar     mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y);
399*c83a76b0SSuyog Pawar     mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
400*c83a76b0SSuyog Pawar     mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
401*c83a76b0SSuyog Pawar 
402*c83a76b0SSuyog Pawar     /* Run 2loops to sweep over the reference area */
403*c83a76b0SSuyog Pawar     for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy)
404*c83a76b0SSuyog Pawar     {
405*c83a76b0SSuyog Pawar         for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx)
406*c83a76b0SSuyog Pawar         {
407*c83a76b0SSuyog Pawar             S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4;
408*c83a76b0SSuyog Pawar             S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) +
409*c83a76b0SSuyog Pawar                           ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range;
410*c83a76b0SSuyog Pawar 
411*c83a76b0SSuyog Pawar             /* Get SAD by adding SAD for current and neighbour S  */
412*c83a76b0SSuyog Pawar             sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos];
413*c83a76b0SSuyog Pawar             sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos];
414*c83a76b0SSuyog Pawar 
415*c83a76b0SSuyog Pawar             //          fprintf(fp_dump_sad,"%d\t",sad);
416*c83a76b0SSuyog Pawar             s_search_node.s_mv.i2_mvx = mvx;
417*c83a76b0SSuyog Pawar             s_search_node.s_mv.i2_mvy = mvy;
418*c83a76b0SSuyog Pawar 
419*c83a76b0SSuyog Pawar             cost_4x8 = cost_8x4 =
420*c83a76b0SSuyog Pawar                 pf_mv_cost_compute(&s_search_node, ps_pred_ctxt, PART_ID_2Nx2N, MV_RES_FPEL);
421*c83a76b0SSuyog Pawar 
422*c83a76b0SSuyog Pawar             cost_4x8 += sad_4x8;
423*c83a76b0SSuyog Pawar             cost_8x4 += sad_8x4;
424*c83a76b0SSuyog Pawar 
425*c83a76b0SSuyog Pawar             if(cost_4x8 < min_cost_4x8)
426*c83a76b0SSuyog Pawar             {
427*c83a76b0SSuyog Pawar                 best_mv_x_4x8 = mvx;
428*c83a76b0SSuyog Pawar                 best_mv_y_4x8 = mvy;
429*c83a76b0SSuyog Pawar                 min_cost_4x8 = cost_4x8;
430*c83a76b0SSuyog Pawar             }
431*c83a76b0SSuyog Pawar             if(cost_8x4 < min_cost_8x4)
432*c83a76b0SSuyog Pawar             {
433*c83a76b0SSuyog Pawar                 best_mv_x_8x4 = mvx;
434*c83a76b0SSuyog Pawar                 best_mv_y_8x4 = mvy;
435*c83a76b0SSuyog Pawar                 min_cost_8x4 = cost_8x4;
436*c83a76b0SSuyog Pawar             }
437*c83a76b0SSuyog Pawar         }
438*c83a76b0SSuyog Pawar     }
439*c83a76b0SSuyog Pawar 
440*c83a76b0SSuyog Pawar     ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8;
441*c83a76b0SSuyog Pawar     ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8;
442*c83a76b0SSuyog Pawar 
443*c83a76b0SSuyog Pawar     ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4;
444*c83a76b0SSuyog Pawar     ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4;
445*c83a76b0SSuyog Pawar }
446*c83a76b0SSuyog Pawar 
hme_combine_4x4_sads_and_compute_cost_high_speed(S08 i1_ref_idx,range_prms_t * ps_mv_range,range_prms_t * ps_mv_limit,hme_mv_t * ps_best_mv_4x8,hme_mv_t * ps_best_mv_8x4,pred_ctxt_t * ps_pred_ctxt,PF_MV_COST_FXN pf_mv_cost_compute,S16 * pi2_sads_4x4_current,S16 * pi2_sads_4x4_east,S16 * pi2_sads_4x4_south)447*c83a76b0SSuyog Pawar void hme_combine_4x4_sads_and_compute_cost_high_speed(
448*c83a76b0SSuyog Pawar     S08 i1_ref_idx,
449*c83a76b0SSuyog Pawar     range_prms_t *ps_mv_range,
450*c83a76b0SSuyog Pawar     range_prms_t *ps_mv_limit,
451*c83a76b0SSuyog Pawar     hme_mv_t *ps_best_mv_4x8,
452*c83a76b0SSuyog Pawar     hme_mv_t *ps_best_mv_8x4,
453*c83a76b0SSuyog Pawar     pred_ctxt_t *ps_pred_ctxt,
454*c83a76b0SSuyog Pawar     PF_MV_COST_FXN pf_mv_cost_compute,
455*c83a76b0SSuyog Pawar     S16 *pi2_sads_4x4_current,
456*c83a76b0SSuyog Pawar     S16 *pi2_sads_4x4_east,
457*c83a76b0SSuyog Pawar     S16 *pi2_sads_4x4_south)
458*c83a76b0SSuyog Pawar {
459*c83a76b0SSuyog Pawar     /* These control number of parts and number of pts in grid to search */
460*c83a76b0SSuyog Pawar     S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4;
461*c83a76b0SSuyog Pawar     S32 step_shift_x, step_shift_y;
462*c83a76b0SSuyog Pawar     S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
463*c83a76b0SSuyog Pawar 
464*c83a76b0SSuyog Pawar     S32 rnd, lambda, lambda_q_shift;
465*c83a76b0SSuyog Pawar 
466*c83a76b0SSuyog Pawar     S32 min_cost_4x8 = MAX_32BIT_VAL;
467*c83a76b0SSuyog Pawar     S32 min_cost_8x4 = MAX_32BIT_VAL;
468*c83a76b0SSuyog Pawar 
469*c83a76b0SSuyog Pawar     (void)pf_mv_cost_compute;
470*c83a76b0SSuyog Pawar     stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED;
471*c83a76b0SSuyog Pawar     /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */
472*c83a76b0SSuyog Pawar     step_shift_x = step_shift_y = 2;
473*c83a76b0SSuyog Pawar 
474*c83a76b0SSuyog Pawar     mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x);
475*c83a76b0SSuyog Pawar     mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y);
476*c83a76b0SSuyog Pawar     mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
477*c83a76b0SSuyog Pawar     mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
478*c83a76b0SSuyog Pawar 
479*c83a76b0SSuyog Pawar     lambda = ps_pred_ctxt->lambda;
480*c83a76b0SSuyog Pawar     lambda_q_shift = ps_pred_ctxt->lambda_q_shift;
481*c83a76b0SSuyog Pawar     rnd = 1 << (lambda_q_shift - 1);
482*c83a76b0SSuyog Pawar 
483*c83a76b0SSuyog Pawar     ASSERT(MAX_MVX_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_x));
484*c83a76b0SSuyog Pawar     ASSERT(MAX_MVY_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_y));
485*c83a76b0SSuyog Pawar 
486*c83a76b0SSuyog Pawar     /* Run 2loops to sweep over the reference area */
487*c83a76b0SSuyog Pawar     for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy)
488*c83a76b0SSuyog Pawar     {
489*c83a76b0SSuyog Pawar         for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx)
490*c83a76b0SSuyog Pawar         {
491*c83a76b0SSuyog Pawar             S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4;
492*c83a76b0SSuyog Pawar 
493*c83a76b0SSuyog Pawar             S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) +
494*c83a76b0SSuyog Pawar                           ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range;
495*c83a76b0SSuyog Pawar 
496*c83a76b0SSuyog Pawar             /* Get SAD by adding SAD for current and neighbour S  */
497*c83a76b0SSuyog Pawar             sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos];
498*c83a76b0SSuyog Pawar             sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos];
499*c83a76b0SSuyog Pawar 
500*c83a76b0SSuyog Pawar             //          fprintf(fp_dump_sad,"%d\t",sad);
501*c83a76b0SSuyog Pawar 
502*c83a76b0SSuyog Pawar             cost_4x8 = cost_8x4 =
503*c83a76b0SSuyog Pawar                 (2 * hme_get_range(ABS(mvx)) - 1) + (2 * hme_get_range(ABS(mvy)) - 1) + i1_ref_idx;
504*c83a76b0SSuyog Pawar 
505*c83a76b0SSuyog Pawar             cost_4x8 += (mvx != 0) ? 1 : 0;
506*c83a76b0SSuyog Pawar             cost_4x8 += (mvy != 0) ? 1 : 0;
507*c83a76b0SSuyog Pawar             cost_4x8 = (cost_4x8 * lambda + rnd) >> lambda_q_shift;
508*c83a76b0SSuyog Pawar 
509*c83a76b0SSuyog Pawar             cost_8x4 += (mvx != 0) ? 1 : 0;
510*c83a76b0SSuyog Pawar             cost_8x4 += (mvy != 0) ? 1 : 0;
511*c83a76b0SSuyog Pawar             cost_8x4 = (cost_8x4 * lambda + rnd) >> lambda_q_shift;
512*c83a76b0SSuyog Pawar 
513*c83a76b0SSuyog Pawar             cost_4x8 += sad_4x8;
514*c83a76b0SSuyog Pawar             cost_8x4 += sad_8x4;
515*c83a76b0SSuyog Pawar 
516*c83a76b0SSuyog Pawar             if(cost_4x8 < min_cost_4x8)
517*c83a76b0SSuyog Pawar             {
518*c83a76b0SSuyog Pawar                 best_mv_x_4x8 = mvx;
519*c83a76b0SSuyog Pawar                 best_mv_y_4x8 = mvy;
520*c83a76b0SSuyog Pawar                 min_cost_4x8 = cost_4x8;
521*c83a76b0SSuyog Pawar             }
522*c83a76b0SSuyog Pawar             if(cost_8x4 < min_cost_8x4)
523*c83a76b0SSuyog Pawar             {
524*c83a76b0SSuyog Pawar                 best_mv_x_8x4 = mvx;
525*c83a76b0SSuyog Pawar                 best_mv_y_8x4 = mvy;
526*c83a76b0SSuyog Pawar                 min_cost_8x4 = cost_8x4;
527*c83a76b0SSuyog Pawar             }
528*c83a76b0SSuyog Pawar         }
529*c83a76b0SSuyog Pawar     }
530*c83a76b0SSuyog Pawar 
531*c83a76b0SSuyog Pawar     ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8;
532*c83a76b0SSuyog Pawar     ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8;
533*c83a76b0SSuyog Pawar 
534*c83a76b0SSuyog Pawar     ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4;
535*c83a76b0SSuyog Pawar     ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4;
536*c83a76b0SSuyog Pawar }
537*c83a76b0SSuyog Pawar 
538*c83a76b0SSuyog Pawar /**
539*c83a76b0SSuyog Pawar ********************************************************************************
540*c83a76b0SSuyog Pawar *  @fn     hme_store_4x4_sads(hme_search_prms_t *ps_search_prms,
541*c83a76b0SSuyog Pawar *                               layer_ctxt_t *ps_layer_ctxt)
542*c83a76b0SSuyog Pawar *
543*c83a76b0SSuyog Pawar *  @brief  Does a 4x4 sad computation on a given range and stores it in memory
544*c83a76b0SSuyog Pawar *
545*c83a76b0SSuyog Pawar *  @param[in] ps_search_prms : Search prms structure containing info like
546*c83a76b0SSuyog Pawar *               blk dimensions, search range etc
547*c83a76b0SSuyog Pawar *
548*c83a76b0SSuyog Pawar *  @param[in] ps_layer_ctxt: All info about this layer
549*c83a76b0SSuyog Pawar *
550*c83a76b0SSuyog Pawar *  @param[in] ps_wt_inp_prms: All info about weighted input
551*c83a76b0SSuyog Pawar *
552*c83a76b0SSuyog Pawar *  @param[in] e_me_quality_preset: motion estimation quality preset
553*c83a76b0SSuyog Pawar *
554*c83a76b0SSuyog Pawar *  @param[in] pi2_sads_4x4: Memory to store all 4x4 SADs for given range
555*c83a76b0SSuyog Pawar *
556*c83a76b0SSuyog Pawar *  @return void
557*c83a76b0SSuyog Pawar ********************************************************************************
558*c83a76b0SSuyog Pawar */
559*c83a76b0SSuyog Pawar 
hme_store_4x4_sads_high_quality(hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,range_prms_t * ps_mv_limit,wgt_pred_ctxt_t * ps_wt_inp_prms,S16 * pi2_sads_4x4)560*c83a76b0SSuyog Pawar void hme_store_4x4_sads_high_quality(
561*c83a76b0SSuyog Pawar     hme_search_prms_t *ps_search_prms,
562*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_layer_ctxt,
563*c83a76b0SSuyog Pawar     range_prms_t *ps_mv_limit,
564*c83a76b0SSuyog Pawar     wgt_pred_ctxt_t *ps_wt_inp_prms,
565*c83a76b0SSuyog Pawar     S16 *pi2_sads_4x4)
566*c83a76b0SSuyog Pawar {
567*c83a76b0SSuyog Pawar     S32 sad, i, j;
568*c83a76b0SSuyog Pawar 
569*c83a76b0SSuyog Pawar     /* Input and reference attributes */
570*c83a76b0SSuyog Pawar     U08 *pu1_inp, *pu1_inp_orig, *pu1_ref;
571*c83a76b0SSuyog Pawar     S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
572*c83a76b0SSuyog Pawar 
573*c83a76b0SSuyog Pawar     /* The reference is actually an array of ptrs since there are several    */
574*c83a76b0SSuyog Pawar     /* reference id. So an array gets passed form calling function           */
575*c83a76b0SSuyog Pawar     U08 **ppu1_ref, *pu1_ref_coloc;
576*c83a76b0SSuyog Pawar 
577*c83a76b0SSuyog Pawar     S32 stepy, stepx, step_shift_x, step_shift_y;
578*c83a76b0SSuyog Pawar     S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
579*c83a76b0SSuyog Pawar 
580*c83a76b0SSuyog Pawar     /* Points to the range limits for mv */
581*c83a76b0SSuyog Pawar     range_prms_t *ps_range_prms;
582*c83a76b0SSuyog Pawar 
583*c83a76b0SSuyog Pawar     /* Reference index to be searched */
584*c83a76b0SSuyog Pawar     S32 i4_search_idx = ps_search_prms->i1_ref_idx;
585*c83a76b0SSuyog Pawar     /* Using the member 0 to store for all ref. idx. */
586*c83a76b0SSuyog Pawar     ps_range_prms = ps_search_prms->aps_mv_range[0];
587*c83a76b0SSuyog Pawar     pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx];
588*c83a76b0SSuyog Pawar     i4_inp_stride = ps_search_prms->i4_inp_stride;
589*c83a76b0SSuyog Pawar 
590*c83a76b0SSuyog Pawar     /* Move to the location of the search blk in inp buffer */
591*c83a76b0SSuyog Pawar     pu1_inp_orig += ps_search_prms->i4_cu_x_off;
592*c83a76b0SSuyog Pawar     pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride;
593*c83a76b0SSuyog Pawar 
594*c83a76b0SSuyog Pawar     /*************************************************************************/
595*c83a76b0SSuyog Pawar     /* we use either input of previously encoded pictures as reference       */
596*c83a76b0SSuyog Pawar     /* in coarse layer                                                       */
597*c83a76b0SSuyog Pawar     /*************************************************************************/
598*c83a76b0SSuyog Pawar     i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
599*c83a76b0SSuyog Pawar     ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
600*c83a76b0SSuyog Pawar 
601*c83a76b0SSuyog Pawar     /* colocated position in reference picture */
602*c83a76b0SSuyog Pawar     i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
603*c83a76b0SSuyog Pawar     pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset;
604*c83a76b0SSuyog Pawar 
605*c83a76b0SSuyog Pawar     stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
606*c83a76b0SSuyog Pawar     /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */
607*c83a76b0SSuyog Pawar     step_shift_x = step_shift_y = 1;
608*c83a76b0SSuyog Pawar 
609*c83a76b0SSuyog Pawar     mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x);
610*c83a76b0SSuyog Pawar     mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y);
611*c83a76b0SSuyog Pawar     mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
612*c83a76b0SSuyog Pawar     mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
613*c83a76b0SSuyog Pawar 
614*c83a76b0SSuyog Pawar     /* Run 2loops to sweep over the reference area */
615*c83a76b0SSuyog Pawar     for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy)
616*c83a76b0SSuyog Pawar     {
617*c83a76b0SSuyog Pawar         for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx)
618*c83a76b0SSuyog Pawar         {
619*c83a76b0SSuyog Pawar             /* Set up the reference and inp ptr */
620*c83a76b0SSuyog Pawar             pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride);
621*c83a76b0SSuyog Pawar             pu1_inp = pu1_inp_orig;
622*c83a76b0SSuyog Pawar             /* SAD computation */
623*c83a76b0SSuyog Pawar             {
624*c83a76b0SSuyog Pawar                 sad = 0;
625*c83a76b0SSuyog Pawar                 for(i = 0; i < 4; i++)
626*c83a76b0SSuyog Pawar                 {
627*c83a76b0SSuyog Pawar                     for(j = 0; j < 4; j++)
628*c83a76b0SSuyog Pawar                     {
629*c83a76b0SSuyog Pawar                         sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j])));
630*c83a76b0SSuyog Pawar                     }
631*c83a76b0SSuyog Pawar                     pu1_inp += i4_inp_stride;
632*c83a76b0SSuyog Pawar                     pu1_ref += i4_ref_stride;
633*c83a76b0SSuyog Pawar                 }
634*c83a76b0SSuyog Pawar             }
635*c83a76b0SSuyog Pawar 
636*c83a76b0SSuyog Pawar             pi2_sads_4x4
637*c83a76b0SSuyog Pawar                 [((mvx >> step_shift_x) + mv_x_offset) +
638*c83a76b0SSuyog Pawar                  ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad;
639*c83a76b0SSuyog Pawar         }
640*c83a76b0SSuyog Pawar     }
641*c83a76b0SSuyog Pawar }
642*c83a76b0SSuyog Pawar 
hme_store_4x4_sads_high_speed(hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,range_prms_t * ps_mv_limit,wgt_pred_ctxt_t * ps_wt_inp_prms,S16 * pi2_sads_4x4)643*c83a76b0SSuyog Pawar void hme_store_4x4_sads_high_speed(
644*c83a76b0SSuyog Pawar     hme_search_prms_t *ps_search_prms,
645*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_layer_ctxt,
646*c83a76b0SSuyog Pawar     range_prms_t *ps_mv_limit,
647*c83a76b0SSuyog Pawar     wgt_pred_ctxt_t *ps_wt_inp_prms,
648*c83a76b0SSuyog Pawar     S16 *pi2_sads_4x4)
649*c83a76b0SSuyog Pawar {
650*c83a76b0SSuyog Pawar     S32 sad, i, j;
651*c83a76b0SSuyog Pawar 
652*c83a76b0SSuyog Pawar     /* Input and reference attributes */
653*c83a76b0SSuyog Pawar     U08 *pu1_inp, *pu1_inp_orig, *pu1_ref;
654*c83a76b0SSuyog Pawar     S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
655*c83a76b0SSuyog Pawar 
656*c83a76b0SSuyog Pawar     /* The reference is actually an array of ptrs since there are several    */
657*c83a76b0SSuyog Pawar     /* reference id. So an array gets passed form calling function           */
658*c83a76b0SSuyog Pawar     U08 **ppu1_ref, *pu1_ref_coloc;
659*c83a76b0SSuyog Pawar 
660*c83a76b0SSuyog Pawar     S32 stepy, stepx, step_shift_x, step_shift_y;
661*c83a76b0SSuyog Pawar     S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
662*c83a76b0SSuyog Pawar 
663*c83a76b0SSuyog Pawar     /* Points to the range limits for mv */
664*c83a76b0SSuyog Pawar     range_prms_t *ps_range_prms;
665*c83a76b0SSuyog Pawar 
666*c83a76b0SSuyog Pawar     /* Reference index to be searched */
667*c83a76b0SSuyog Pawar     S32 i4_search_idx = ps_search_prms->i1_ref_idx;
668*c83a76b0SSuyog Pawar 
669*c83a76b0SSuyog Pawar     /* Using the member 0 for all ref. idx */
670*c83a76b0SSuyog Pawar     ps_range_prms = ps_search_prms->aps_mv_range[0];
671*c83a76b0SSuyog Pawar     pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx];
672*c83a76b0SSuyog Pawar     i4_inp_stride = ps_search_prms->i4_inp_stride;
673*c83a76b0SSuyog Pawar 
674*c83a76b0SSuyog Pawar     /* Move to the location of the search blk in inp buffer */
675*c83a76b0SSuyog Pawar     pu1_inp_orig += ps_search_prms->i4_cu_x_off;
676*c83a76b0SSuyog Pawar     pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride;
677*c83a76b0SSuyog Pawar 
678*c83a76b0SSuyog Pawar     /*************************************************************************/
679*c83a76b0SSuyog Pawar     /* we use either input of previously encoded pictures as reference       */
680*c83a76b0SSuyog Pawar     /* in coarse layer                                                       */
681*c83a76b0SSuyog Pawar     /*************************************************************************/
682*c83a76b0SSuyog Pawar     i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
683*c83a76b0SSuyog Pawar     ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
684*c83a76b0SSuyog Pawar 
685*c83a76b0SSuyog Pawar     /* colocated position in reference picture */
686*c83a76b0SSuyog Pawar     i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
687*c83a76b0SSuyog Pawar     pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset;
688*c83a76b0SSuyog Pawar 
689*c83a76b0SSuyog Pawar     stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED;
690*c83a76b0SSuyog Pawar     /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */
691*c83a76b0SSuyog Pawar     step_shift_x = step_shift_y = 2;
692*c83a76b0SSuyog Pawar 
693*c83a76b0SSuyog Pawar     mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x);
694*c83a76b0SSuyog Pawar     mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y);
695*c83a76b0SSuyog Pawar     mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
696*c83a76b0SSuyog Pawar     mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
697*c83a76b0SSuyog Pawar 
698*c83a76b0SSuyog Pawar     /* Run 2loops to sweep over the reference area */
699*c83a76b0SSuyog Pawar     for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy)
700*c83a76b0SSuyog Pawar     {
701*c83a76b0SSuyog Pawar         for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx)
702*c83a76b0SSuyog Pawar         {
703*c83a76b0SSuyog Pawar             /* Set up the reference and inp ptr */
704*c83a76b0SSuyog Pawar             pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride);
705*c83a76b0SSuyog Pawar             pu1_inp = pu1_inp_orig;
706*c83a76b0SSuyog Pawar             /* SAD computation */
707*c83a76b0SSuyog Pawar             {
708*c83a76b0SSuyog Pawar                 sad = 0;
709*c83a76b0SSuyog Pawar                 for(i = 0; i < 4; i++)
710*c83a76b0SSuyog Pawar                 {
711*c83a76b0SSuyog Pawar                     for(j = 0; j < 4; j++)
712*c83a76b0SSuyog Pawar                     {
713*c83a76b0SSuyog Pawar                         sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j])));
714*c83a76b0SSuyog Pawar                     }
715*c83a76b0SSuyog Pawar                     pu1_inp += i4_inp_stride;
716*c83a76b0SSuyog Pawar                     pu1_ref += i4_ref_stride;
717*c83a76b0SSuyog Pawar                 }
718*c83a76b0SSuyog Pawar             }
719*c83a76b0SSuyog Pawar 
720*c83a76b0SSuyog Pawar             pi2_sads_4x4
721*c83a76b0SSuyog Pawar                 [((mvx >> step_shift_x) + mv_x_offset) +
722*c83a76b0SSuyog Pawar                  ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad;
723*c83a76b0SSuyog Pawar         }
724*c83a76b0SSuyog Pawar     }
725*c83a76b0SSuyog Pawar }
726*c83a76b0SSuyog Pawar /**
727*c83a76b0SSuyog Pawar ********************************************************************************
728*c83a76b0SSuyog Pawar *  @fn     void hme_coarsest(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
729*c83a76b0SSuyog Pawar *
730*c83a76b0SSuyog Pawar *  @brief  Top level entry point for Coarse ME. Runs across blks and searches
731*c83a76b0SSuyog Pawar *          at a 4x4 blk granularity by using 4x8 and 8x4 patterns.
732*c83a76b0SSuyog Pawar *
733*c83a76b0SSuyog Pawar *  @param[in,out]  ps_ctxt: ME Handle
734*c83a76b0SSuyog Pawar *
735*c83a76b0SSuyog Pawar *  @param[in]  ps_coarse_prms : Coarse layer config params
736*c83a76b0SSuyog Pawar *
737*c83a76b0SSuyog Pawar *  @param[in]  ps_multi_thrd_ctxt : Multi thread context
738*c83a76b0SSuyog Pawar *
739*c83a76b0SSuyog Pawar *  @return None
740*c83a76b0SSuyog Pawar ********************************************************************************
741*c83a76b0SSuyog Pawar */
hme_coarsest(coarse_me_ctxt_t * ps_ctxt,coarse_prms_t * ps_coarse_prms,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,WORD32 i4_ping_pong,void ** ppv_dep_mngr_hme_sync)742*c83a76b0SSuyog Pawar void hme_coarsest(
743*c83a76b0SSuyog Pawar     coarse_me_ctxt_t *ps_ctxt,
744*c83a76b0SSuyog Pawar     coarse_prms_t *ps_coarse_prms,
745*c83a76b0SSuyog Pawar     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
746*c83a76b0SSuyog Pawar     WORD32 i4_ping_pong,
747*c83a76b0SSuyog Pawar     void **ppv_dep_mngr_hme_sync)
748*c83a76b0SSuyog Pawar {
749*c83a76b0SSuyog Pawar     S16 *pi2_cur_ref_sads_4x4;
750*c83a76b0SSuyog Pawar     S32 ai4_sad_4x4_block_size[MAX_NUM_REF], ai4_sad_4x4_block_stride[MAX_NUM_REF];
751*c83a76b0SSuyog Pawar     S32 num_rows_coarse;
752*c83a76b0SSuyog Pawar     S32 sad_top_offset, sad_current_offset;
753*c83a76b0SSuyog Pawar     S32 search_node_top_offset, search_node_left_offset;
754*c83a76b0SSuyog Pawar 
755*c83a76b0SSuyog Pawar     ME_QUALITY_PRESETS_T e_me_quality_preset =
756*c83a76b0SSuyog Pawar         ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
757*c83a76b0SSuyog Pawar 
758*c83a76b0SSuyog Pawar     search_results_t *ps_search_results;
759*c83a76b0SSuyog Pawar     mvbank_update_prms_t s_mv_update_prms;
760*c83a76b0SSuyog Pawar     BLK_SIZE_T e_search_blk_size = BLK_4x4;
761*c83a76b0SSuyog Pawar     hme_search_prms_t s_search_prms_4x8, s_search_prms_8x4, s_search_prms_4x4;
762*c83a76b0SSuyog Pawar 
763*c83a76b0SSuyog Pawar     S32 global_id_8x4, global_id_4x8;
764*c83a76b0SSuyog Pawar 
765*c83a76b0SSuyog Pawar     /*************************************************************************/
766*c83a76b0SSuyog Pawar     /* These directly point to the best search result nodes that will be     */
767*c83a76b0SSuyog Pawar     /* updated by the search algorithm, rather than have to go through an    */
768*c83a76b0SSuyog Pawar     /* elaborate structure                                                   */
769*c83a76b0SSuyog Pawar     /*************************************************************************/
770*c83a76b0SSuyog Pawar     search_node_t *aps_best_search_node_8x4[MAX_NUM_REF];
771*c83a76b0SSuyog Pawar     search_node_t *aps_best_search_node_4x8[MAX_NUM_REF];
772*c83a76b0SSuyog Pawar 
773*c83a76b0SSuyog Pawar     /* These point to various spatial candts */
774*c83a76b0SSuyog Pawar     search_node_t *ps_candt_8x4_l, *ps_candt_8x4_t, *ps_candt_8x4_tl;
775*c83a76b0SSuyog Pawar     search_node_t *ps_candt_4x8_l, *ps_candt_4x8_t, *ps_candt_4x8_tl;
776*c83a76b0SSuyog Pawar     search_node_t *ps_candt_zeromv_8x4, *ps_candt_zeromv_4x8;
777*c83a76b0SSuyog Pawar     search_node_t *ps_candt_fs_8x4, *ps_candt_fs_4x8;
778*c83a76b0SSuyog Pawar     search_node_t as_top_neighbours[4], as_left_neighbours[3];
779*c83a76b0SSuyog Pawar 
780*c83a76b0SSuyog Pawar     /* Holds the global mv for a given ref index */
781*c83a76b0SSuyog Pawar     search_node_t s_candt_global[MAX_NUM_REF];
782*c83a76b0SSuyog Pawar 
783*c83a76b0SSuyog Pawar     /* All the search candidates */
784*c83a76b0SSuyog Pawar     search_candt_t as_search_candts_8x4[MAX_INIT_CANDTS];
785*c83a76b0SSuyog Pawar     search_candt_t as_search_candts_4x8[MAX_INIT_CANDTS];
786*c83a76b0SSuyog Pawar     search_candt_t *ps_search_candts_8x4, *ps_search_candts_4x8;
787*c83a76b0SSuyog Pawar 
788*c83a76b0SSuyog Pawar     /* Actual range per blk and the pic level boundaries */
789*c83a76b0SSuyog Pawar     range_prms_t s_range_prms, s_pic_limit, as_mv_limit[MAX_NUM_REF];
790*c83a76b0SSuyog Pawar 
791*c83a76b0SSuyog Pawar     /* Current and prev pic layer ctxt at the coarsest layer */
792*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
793*c83a76b0SSuyog Pawar 
794*c83a76b0SSuyog Pawar     /* best mv of full search */
795*c83a76b0SSuyog Pawar     hme_mv_t best_mv_4x8, best_mv_8x4;
796*c83a76b0SSuyog Pawar 
797*c83a76b0SSuyog Pawar     /* Book keeping at blk level */
798*c83a76b0SSuyog Pawar     S32 blk_x, num_blks_in_pic, num_blks_in_row, num_4x4_blks_in_row;
799*c83a76b0SSuyog Pawar 
800*c83a76b0SSuyog Pawar     S32 blk_y;
801*c83a76b0SSuyog Pawar 
802*c83a76b0SSuyog Pawar     /* Block dimensions */
803*c83a76b0SSuyog Pawar     S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4;
804*c83a76b0SSuyog Pawar 
805*c83a76b0SSuyog Pawar     S32 lambda = ps_coarse_prms->lambda;
806*c83a76b0SSuyog Pawar 
807*c83a76b0SSuyog Pawar     /* Number of references to search */
808*c83a76b0SSuyog Pawar     S32 i4_num_ref;
809*c83a76b0SSuyog Pawar 
810*c83a76b0SSuyog Pawar     S32 i4_i, id, i;
811*c83a76b0SSuyog Pawar     S08 i1_ref_idx;
812*c83a76b0SSuyog Pawar 
813*c83a76b0SSuyog Pawar     S32 i4_pic_wd, i4_pic_ht;
814*c83a76b0SSuyog Pawar     S32 i4_layer_id;
815*c83a76b0SSuyog Pawar 
816*c83a76b0SSuyog Pawar     S32 end_of_frame;
817*c83a76b0SSuyog Pawar 
818*c83a76b0SSuyog Pawar     pf_get_wt_inp fp_get_wt_inp;
819*c83a76b0SSuyog Pawar 
820*c83a76b0SSuyog Pawar     /* Maximum search iterations around any candidate */
821*c83a76b0SSuyog Pawar     S32 i4_max_iters = ps_coarse_prms->i4_max_iters;
822*c83a76b0SSuyog Pawar 
823*c83a76b0SSuyog Pawar     ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id];
824*c83a76b0SSuyog Pawar     ps_prev_layer = hme_coarse_get_past_layer_ctxt(ps_ctxt, ps_coarse_prms->i4_layer_id);
825*c83a76b0SSuyog Pawar 
826*c83a76b0SSuyog Pawar     /* We need only one instance of search results structure */
827*c83a76b0SSuyog Pawar     ps_search_results = &ps_ctxt->s_search_results_8x8;
828*c83a76b0SSuyog Pawar 
829*c83a76b0SSuyog Pawar     ps_search_candts_8x4 = &as_search_candts_8x4[0];
830*c83a76b0SSuyog Pawar     ps_search_candts_4x8 = &as_search_candts_4x8[0];
831*c83a76b0SSuyog Pawar 
832*c83a76b0SSuyog Pawar     end_of_frame = 0;
833*c83a76b0SSuyog Pawar 
834*c83a76b0SSuyog Pawar     i4_pic_wd = ps_curr_layer->i4_wd;
835*c83a76b0SSuyog Pawar     i4_pic_ht = ps_curr_layer->i4_ht;
836*c83a76b0SSuyog Pawar 
837*c83a76b0SSuyog Pawar     fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
838*c83a76b0SSuyog Pawar                         ->pf_get_wt_inp_8x8;
839*c83a76b0SSuyog Pawar 
840*c83a76b0SSuyog Pawar     num_rows_coarse = ps_ctxt->i4_num_row_bufs;
841*c83a76b0SSuyog Pawar 
842*c83a76b0SSuyog Pawar     /*************************************************************************/
843*c83a76b0SSuyog Pawar     /* Coarse Layer always does explicit search. Number of reference frames  */
844*c83a76b0SSuyog Pawar     /* to search is a configurable parameter supplied by the application     */
845*c83a76b0SSuyog Pawar     /*************************************************************************/
846*c83a76b0SSuyog Pawar     i4_num_ref = ps_coarse_prms->i4_num_ref;
847*c83a76b0SSuyog Pawar     i4_layer_id = ps_coarse_prms->i4_layer_id;
848*c83a76b0SSuyog Pawar 
849*c83a76b0SSuyog Pawar     /*************************************************************************/
850*c83a76b0SSuyog Pawar     /*  The search algorithm goes as follows:                                */
851*c83a76b0SSuyog Pawar     /*                                                                       */
852*c83a76b0SSuyog Pawar     /*          ___                                                          */
853*c83a76b0SSuyog Pawar     /*         | e |                                                         */
854*c83a76b0SSuyog Pawar     /*      ___|___|___                                                      */
855*c83a76b0SSuyog Pawar     /*     | c | a | b |                                                     */
856*c83a76b0SSuyog Pawar     /*     |___|___|___|                                                     */
857*c83a76b0SSuyog Pawar     /*         | d |                                                         */
858*c83a76b0SSuyog Pawar     /*         |___|                                                         */
859*c83a76b0SSuyog Pawar     /*                                                                       */
860*c83a76b0SSuyog Pawar     /* For the target block a, we collect best results from 2 8x4 blks       */
861*c83a76b0SSuyog Pawar     /* These are c-a and a-b. The 4x8 blks are e-a and a-d                   */
862*c83a76b0SSuyog Pawar     /* c-a result is already available from results of blk c. a-b is         */
863*c83a76b0SSuyog Pawar     /* evaluated in this blk. Likewise e-a result is stored in a row buffer  */
864*c83a76b0SSuyog Pawar     /* a-d is evaluated this blk                                             */
865*c83a76b0SSuyog Pawar     /* So we store a row buffer which stores best 4x8 results of all top blk */
866*c83a76b0SSuyog Pawar     /*************************************************************************/
867*c83a76b0SSuyog Pawar 
868*c83a76b0SSuyog Pawar     /************************************************************************/
869*c83a76b0SSuyog Pawar     /* Initialize the pointers to the best node.                            */
870*c83a76b0SSuyog Pawar     /************************************************************************/
871*c83a76b0SSuyog Pawar     for(i4_i = 0; i4_i < i4_num_ref; i4_i++)
872*c83a76b0SSuyog Pawar     {
873*c83a76b0SSuyog Pawar         aps_best_search_node_8x4[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_2NxN_B];
874*c83a76b0SSuyog Pawar         aps_best_search_node_4x8[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_Nx2N_R];
875*c83a76b0SSuyog Pawar     }
876*c83a76b0SSuyog Pawar 
877*c83a76b0SSuyog Pawar     /************************************************************************/
878*c83a76b0SSuyog Pawar     /* Initialize the "searchresults" structure. This will set up the number*/
879*c83a76b0SSuyog Pawar     /* of search types, result updates etc                                  */
880*c83a76b0SSuyog Pawar     /************************************************************************/
881*c83a76b0SSuyog Pawar     {
882*c83a76b0SSuyog Pawar         S32 num_results_per_part;
883*c83a76b0SSuyog Pawar         /* We evaluate 4 types of results per 4x4 blk. 8x4L and 8x4R and     */
884*c83a76b0SSuyog Pawar         /* 4x8 T and 4x8B. So if we are to give 4 results, then we need to   */
885*c83a76b0SSuyog Pawar         /* only evaluate 1 result per part. In the coarse layer, we are      */
886*c83a76b0SSuyog Pawar         /* limited to 2 results max per part, and max of 8 results.          */
887*c83a76b0SSuyog Pawar         num_results_per_part = (ps_coarse_prms->num_results + 3) >> 2;
888*c83a76b0SSuyog Pawar         hme_init_search_results(
889*c83a76b0SSuyog Pawar             ps_search_results,
890*c83a76b0SSuyog Pawar             i4_num_ref,
891*c83a76b0SSuyog Pawar             ps_coarse_prms->num_results,
892*c83a76b0SSuyog Pawar             num_results_per_part,
893*c83a76b0SSuyog Pawar             BLK_8x8,
894*c83a76b0SSuyog Pawar             0,
895*c83a76b0SSuyog Pawar             0,
896*c83a76b0SSuyog Pawar             ps_ctxt->au1_is_past);
897*c83a76b0SSuyog Pawar     }
898*c83a76b0SSuyog Pawar 
899*c83a76b0SSuyog Pawar     /* Macro updates num_blks_in_pic and num_blks_in_row*/
900*c83a76b0SSuyog Pawar     GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
901*c83a76b0SSuyog Pawar 
902*c83a76b0SSuyog Pawar     num_4x4_blks_in_row = num_blks_in_row + 1;
903*c83a76b0SSuyog Pawar 
904*c83a76b0SSuyog Pawar     s_mv_update_prms.e_search_blk_size = e_search_blk_size;
905*c83a76b0SSuyog Pawar     s_mv_update_prms.i4_num_ref = i4_num_ref;
906*c83a76b0SSuyog Pawar     s_mv_update_prms.i4_shift = 0;
907*c83a76b0SSuyog Pawar 
908*c83a76b0SSuyog Pawar     /* For full search, support 2 or 4 step size */
909*c83a76b0SSuyog Pawar     if(ps_coarse_prms->do_full_search)
910*c83a76b0SSuyog Pawar     {
911*c83a76b0SSuyog Pawar         ASSERT((ps_coarse_prms->full_search_step == 2) || (ps_coarse_prms->full_search_step == 4));
912*c83a76b0SSuyog Pawar     }
913*c83a76b0SSuyog Pawar 
914*c83a76b0SSuyog Pawar     for(i4_i = 0; i4_i < i4_num_ref; i4_i++)
915*c83a76b0SSuyog Pawar     {
916*c83a76b0SSuyog Pawar         S32 blk, delta_poc;
917*c83a76b0SSuyog Pawar         S32 mv_x_clip, mv_y_clip;
918*c83a76b0SSuyog Pawar         /* Initialize only the first row */
919*c83a76b0SSuyog Pawar         for(blk = 0; blk < num_blks_in_row; blk++)
920*c83a76b0SSuyog Pawar         {
921*c83a76b0SSuyog Pawar             INIT_SEARCH_NODE(&ps_ctxt->aps_best_search_nodes_4x8_n_rows[i4_i][blk], i4_i);
922*c83a76b0SSuyog Pawar         }
923*c83a76b0SSuyog Pawar 
924*c83a76b0SSuyog Pawar         delta_poc = ABS(ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i4_i]);
925*c83a76b0SSuyog Pawar 
926*c83a76b0SSuyog Pawar         /* Setting search range for different references based on the delta poc */
927*c83a76b0SSuyog Pawar         /*************************************************************************/
928*c83a76b0SSuyog Pawar         /* set the MV limit per ref. pic.                                        */
929*c83a76b0SSuyog Pawar         /*    - P pic. : Based on the config params.                             */
930*c83a76b0SSuyog Pawar         /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
931*c83a76b0SSuyog Pawar         /*************************************************************************/
932*c83a76b0SSuyog Pawar         {
933*c83a76b0SSuyog Pawar             /* TO DO : Remove hard coding of P-P dist. of 4 */
934*c83a76b0SSuyog Pawar             mv_x_clip = (ps_curr_layer->i2_max_mv_x * delta_poc) / 4;
935*c83a76b0SSuyog Pawar 
936*c83a76b0SSuyog Pawar             /* Only for B/b pic. */
937*c83a76b0SSuyog Pawar             if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
938*c83a76b0SSuyog Pawar             {
939*c83a76b0SSuyog Pawar                 WORD16 i2_mv_y_per_poc;
940*c83a76b0SSuyog Pawar 
941*c83a76b0SSuyog Pawar                 /* Get abs MAX for symmetric search */
942*c83a76b0SSuyog Pawar                 i2_mv_y_per_poc =
943*c83a76b0SSuyog Pawar                     MAX(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id],
944*c83a76b0SSuyog Pawar                         (ABS(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id])));
945*c83a76b0SSuyog Pawar 
946*c83a76b0SSuyog Pawar                 mv_y_clip = i2_mv_y_per_poc * delta_poc;
947*c83a76b0SSuyog Pawar             }
948*c83a76b0SSuyog Pawar             /* Set the Config. File Params for P pic. */
949*c83a76b0SSuyog Pawar             else
950*c83a76b0SSuyog Pawar             {
951*c83a76b0SSuyog Pawar                 /* TO DO : Remove hard coding of P-P dist. of 4 */
952*c83a76b0SSuyog Pawar                 mv_y_clip = (ps_curr_layer->i2_max_mv_y * delta_poc) / 4;
953*c83a76b0SSuyog Pawar             }
954*c83a76b0SSuyog Pawar 
955*c83a76b0SSuyog Pawar             /* Making mv_x and mv_y range multiple of 4 */
956*c83a76b0SSuyog Pawar             mv_x_clip = (((mv_x_clip + 3) >> 2) << 2);
957*c83a76b0SSuyog Pawar             mv_y_clip = (((mv_y_clip + 3) >> 2) << 2);
958*c83a76b0SSuyog Pawar             /* Clipping the range of mv_x and mv_y */
959*c83a76b0SSuyog Pawar             mv_x_clip = CLIP3(mv_x_clip, 4, MAX_MVX_SUPPORTED_IN_COARSE_LAYER);
960*c83a76b0SSuyog Pawar             mv_y_clip = CLIP3(mv_y_clip, 4, MAX_MVY_SUPPORTED_IN_COARSE_LAYER);
961*c83a76b0SSuyog Pawar 
962*c83a76b0SSuyog Pawar             as_mv_limit[i4_i].i2_min_x = -mv_x_clip;
963*c83a76b0SSuyog Pawar             as_mv_limit[i4_i].i2_min_y = -mv_y_clip;
964*c83a76b0SSuyog Pawar             as_mv_limit[i4_i].i2_max_x = mv_x_clip;
965*c83a76b0SSuyog Pawar             as_mv_limit[i4_i].i2_max_y = mv_y_clip;
966*c83a76b0SSuyog Pawar         }
967*c83a76b0SSuyog Pawar         /*Populating SAD block size based on search range */
968*c83a76b0SSuyog Pawar         ai4_sad_4x4_block_size[i4_i] = ((2 * mv_x_clip) / ps_coarse_prms->full_search_step) *
969*c83a76b0SSuyog Pawar                                        ((2 * mv_y_clip) / ps_coarse_prms->full_search_step);
970*c83a76b0SSuyog Pawar         ai4_sad_4x4_block_stride[i4_i] = (num_blks_in_row + 1) * ai4_sad_4x4_block_size[i4_i];
971*c83a76b0SSuyog Pawar     }
972*c83a76b0SSuyog Pawar 
973*c83a76b0SSuyog Pawar     for(i = 0; i < 2 * MAX_INIT_CANDTS; i++)
974*c83a76b0SSuyog Pawar     {
975*c83a76b0SSuyog Pawar         search_node_t *ps_search_node;
976*c83a76b0SSuyog Pawar         ps_search_node = &ps_ctxt->s_init_search_node[i];
977*c83a76b0SSuyog Pawar         INIT_SEARCH_NODE(ps_search_node, 0);
978*c83a76b0SSuyog Pawar     }
979*c83a76b0SSuyog Pawar     for(i = 0; i < 3; i++)
980*c83a76b0SSuyog Pawar     {
981*c83a76b0SSuyog Pawar         search_node_t *ps_search_node;
982*c83a76b0SSuyog Pawar         ps_search_node = &as_left_neighbours[i];
983*c83a76b0SSuyog Pawar         INIT_SEARCH_NODE(ps_search_node, 0);
984*c83a76b0SSuyog Pawar         ps_search_node = &as_top_neighbours[i];
985*c83a76b0SSuyog Pawar         INIT_SEARCH_NODE(ps_search_node, 0);
986*c83a76b0SSuyog Pawar     }
987*c83a76b0SSuyog Pawar     INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
988*c83a76b0SSuyog Pawar     /* Set up place holders to hold the search nodes of each initial candt */
989*c83a76b0SSuyog Pawar     for(i = 0; i < MAX_INIT_CANDTS; i++)
990*c83a76b0SSuyog Pawar     {
991*c83a76b0SSuyog Pawar         ps_search_candts_8x4[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
992*c83a76b0SSuyog Pawar 
993*c83a76b0SSuyog Pawar         ps_search_candts_4x8[i].ps_search_node = &ps_ctxt->s_init_search_node[MAX_INIT_CANDTS + i];
994*c83a76b0SSuyog Pawar 
995*c83a76b0SSuyog Pawar         ps_search_candts_8x4[i].u1_num_steps_refine = (U08)i4_max_iters;
996*c83a76b0SSuyog Pawar         ps_search_candts_4x8[i].u1_num_steps_refine = (U08)i4_max_iters;
997*c83a76b0SSuyog Pawar     }
998*c83a76b0SSuyog Pawar 
999*c83a76b0SSuyog Pawar     /* For Top,TopLeft and Left cand., no need for refinement */
1000*c83a76b0SSuyog Pawar     id = 0;
1001*c83a76b0SSuyog Pawar     if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset))
1002*c83a76b0SSuyog Pawar     {
1003*c83a76b0SSuyog Pawar         /* This search candt has the full search result */
1004*c83a76b0SSuyog Pawar         ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node;
1005*c83a76b0SSuyog Pawar         id++;
1006*c83a76b0SSuyog Pawar     }
1007*c83a76b0SSuyog Pawar 
1008*c83a76b0SSuyog Pawar     ps_candt_8x4_l = ps_search_candts_8x4[id].ps_search_node;
1009*c83a76b0SSuyog Pawar     ps_search_candts_8x4[id].u1_num_steps_refine = 0;
1010*c83a76b0SSuyog Pawar     id++;
1011*c83a76b0SSuyog Pawar     ps_candt_8x4_t = ps_search_candts_8x4[id].ps_search_node;
1012*c83a76b0SSuyog Pawar     ps_search_candts_8x4[id].u1_num_steps_refine = 0;
1013*c83a76b0SSuyog Pawar     id++;
1014*c83a76b0SSuyog Pawar     ps_candt_8x4_tl = ps_search_candts_8x4[id].ps_search_node;
1015*c83a76b0SSuyog Pawar     ps_search_candts_8x4[id].u1_num_steps_refine = 0;
1016*c83a76b0SSuyog Pawar     id++;
1017*c83a76b0SSuyog Pawar     /* This search candt stores the global candt */
1018*c83a76b0SSuyog Pawar     global_id_8x4 = id;
1019*c83a76b0SSuyog Pawar     id++;
1020*c83a76b0SSuyog Pawar 
1021*c83a76b0SSuyog Pawar     if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset))
1022*c83a76b0SSuyog Pawar     {
1023*c83a76b0SSuyog Pawar         /* This search candt has the full search result */
1024*c83a76b0SSuyog Pawar         ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node;
1025*c83a76b0SSuyog Pawar         id++;
1026*c83a76b0SSuyog Pawar     }
1027*c83a76b0SSuyog Pawar     /* Don't increment id as (0,0) is removed from cand. list. Initializing */
1028*c83a76b0SSuyog Pawar     /* the pointer for hme_init_pred_ctxt_no_encode()                       */
1029*c83a76b0SSuyog Pawar     ps_candt_zeromv_8x4 = ps_search_candts_8x4[id].ps_search_node;
1030*c83a76b0SSuyog Pawar 
1031*c83a76b0SSuyog Pawar     /* For Top,TopLeft and Left cand., no need for refinement */
1032*c83a76b0SSuyog Pawar     id = 0;
1033*c83a76b0SSuyog Pawar     if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset))
1034*c83a76b0SSuyog Pawar     {
1035*c83a76b0SSuyog Pawar         /* This search candt has the full search result */
1036*c83a76b0SSuyog Pawar         ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node;
1037*c83a76b0SSuyog Pawar         id++;
1038*c83a76b0SSuyog Pawar     }
1039*c83a76b0SSuyog Pawar 
1040*c83a76b0SSuyog Pawar     ps_candt_4x8_l = ps_search_candts_4x8[id].ps_search_node;
1041*c83a76b0SSuyog Pawar     ps_search_candts_4x8[id].u1_num_steps_refine = 0;
1042*c83a76b0SSuyog Pawar     id++;
1043*c83a76b0SSuyog Pawar     ps_candt_4x8_t = ps_search_candts_4x8[id].ps_search_node;
1044*c83a76b0SSuyog Pawar     ps_search_candts_4x8[id].u1_num_steps_refine = 0;
1045*c83a76b0SSuyog Pawar     id++;
1046*c83a76b0SSuyog Pawar     ps_candt_4x8_tl = ps_search_candts_4x8[id].ps_search_node;
1047*c83a76b0SSuyog Pawar     ps_search_candts_4x8[id].u1_num_steps_refine = 0;
1048*c83a76b0SSuyog Pawar     id++;
1049*c83a76b0SSuyog Pawar     /* This search candt stores the global candt */
1050*c83a76b0SSuyog Pawar     global_id_4x8 = id;
1051*c83a76b0SSuyog Pawar     id++;
1052*c83a76b0SSuyog Pawar     if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset))
1053*c83a76b0SSuyog Pawar     {
1054*c83a76b0SSuyog Pawar         /* This search candt has the full search result */
1055*c83a76b0SSuyog Pawar         ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node;
1056*c83a76b0SSuyog Pawar         id++;
1057*c83a76b0SSuyog Pawar     }
1058*c83a76b0SSuyog Pawar     /* Don't increment id4as (0,0) is removed from cand. list. Initializing */
1059*c83a76b0SSuyog Pawar     /* the pointer for hme_init_pred_ctxt_no_encode()                       */
1060*c83a76b0SSuyog Pawar     ps_candt_zeromv_4x8 = ps_search_candts_4x8[id].ps_search_node;
1061*c83a76b0SSuyog Pawar 
1062*c83a76b0SSuyog Pawar     /* Zero mv always has 0 mvx and y componnent, ref idx initialized inside */
1063*c83a76b0SSuyog Pawar     ps_candt_zeromv_8x4->s_mv.i2_mvx = 0;
1064*c83a76b0SSuyog Pawar     ps_candt_zeromv_8x4->s_mv.i2_mvy = 0;
1065*c83a76b0SSuyog Pawar     ps_candt_zeromv_4x8->s_mv.i2_mvx = 0;
1066*c83a76b0SSuyog Pawar     ps_candt_zeromv_4x8->s_mv.i2_mvy = 0;
1067*c83a76b0SSuyog Pawar 
1068*c83a76b0SSuyog Pawar     /* SET UP THE PRED CTXT FOR L0 AND L1 */
1069*c83a76b0SSuyog Pawar     {
1070*c83a76b0SSuyog Pawar         S32 pred_lx;
1071*c83a76b0SSuyog Pawar 
1072*c83a76b0SSuyog Pawar         /* Bottom left always not available */
1073*c83a76b0SSuyog Pawar         as_left_neighbours[2].u1_is_avail = 0;
1074*c83a76b0SSuyog Pawar 
1075*c83a76b0SSuyog Pawar         for(pred_lx = 0; pred_lx < 2; pred_lx++)
1076*c83a76b0SSuyog Pawar         {
1077*c83a76b0SSuyog Pawar             pred_ctxt_t *ps_pred_ctxt;
1078*c83a76b0SSuyog Pawar 
1079*c83a76b0SSuyog Pawar             ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
1080*c83a76b0SSuyog Pawar             hme_init_pred_ctxt_no_encode(
1081*c83a76b0SSuyog Pawar                 ps_pred_ctxt,
1082*c83a76b0SSuyog Pawar                 ps_search_results,
1083*c83a76b0SSuyog Pawar                 as_top_neighbours,
1084*c83a76b0SSuyog Pawar                 as_left_neighbours,
1085*c83a76b0SSuyog Pawar                 NULL,
1086*c83a76b0SSuyog Pawar                 ps_candt_zeromv_8x4,
1087*c83a76b0SSuyog Pawar                 ps_candt_zeromv_8x4,
1088*c83a76b0SSuyog Pawar                 pred_lx,
1089*c83a76b0SSuyog Pawar                 lambda,
1090*c83a76b0SSuyog Pawar                 ps_coarse_prms->lambda_q_shift,
1091*c83a76b0SSuyog Pawar                 ps_ctxt->apu1_ref_bits_tlu_lc,
1092*c83a76b0SSuyog Pawar                 ps_ctxt->ai2_ref_scf);
1093*c83a76b0SSuyog Pawar         }
1094*c83a76b0SSuyog Pawar     }
1095*c83a76b0SSuyog Pawar 
1096*c83a76b0SSuyog Pawar     /*************************************************************************/
1097*c83a76b0SSuyog Pawar     /* Initialize the search parameters for search algo with the following   */
1098*c83a76b0SSuyog Pawar     /* parameters: No SATD, calculated number of initial candidates,         */
1099*c83a76b0SSuyog Pawar     /* No post refinement, initial step size and number of iterations as     */
1100*c83a76b0SSuyog Pawar     /* passed by the calling function.                                       */
1101*c83a76b0SSuyog Pawar     /* Also, we use input for this layer search, and not recon.              */
1102*c83a76b0SSuyog Pawar     /*************************************************************************/
1103*c83a76b0SSuyog Pawar     if(e_me_quality_preset == ME_XTREME_SPEED_25)
1104*c83a76b0SSuyog Pawar         s_search_prms_8x4.i4_num_init_candts = 1;
1105*c83a76b0SSuyog Pawar     else
1106*c83a76b0SSuyog Pawar         s_search_prms_8x4.i4_num_init_candts = id;
1107*c83a76b0SSuyog Pawar     s_search_prms_8x4.i4_use_satd = 0;
1108*c83a76b0SSuyog Pawar     s_search_prms_8x4.i4_start_step = ps_coarse_prms->i4_start_step;
1109*c83a76b0SSuyog Pawar     s_search_prms_8x4.i4_num_steps_post_refine = 0;
1110*c83a76b0SSuyog Pawar     s_search_prms_8x4.i4_use_rec = 0;
1111*c83a76b0SSuyog Pawar     s_search_prms_8x4.ps_search_candts = ps_search_candts_8x4;
1112*c83a76b0SSuyog Pawar     s_search_prms_8x4.e_blk_size = BLK_8x4;
1113*c83a76b0SSuyog Pawar     s_search_prms_8x4.i4_max_iters = ps_coarse_prms->i4_max_iters;
1114*c83a76b0SSuyog Pawar     /* Coarse layer is always explicit */
1115*c83a76b0SSuyog Pawar     if(ME_MEDIUM_SPEED > e_me_quality_preset)
1116*c83a76b0SSuyog Pawar     {
1117*c83a76b0SSuyog Pawar         s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse;
1118*c83a76b0SSuyog Pawar     }
1119*c83a76b0SSuyog Pawar     else
1120*c83a76b0SSuyog Pawar     {
1121*c83a76b0SSuyog Pawar         s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse_high_speed;
1122*c83a76b0SSuyog Pawar     }
1123*c83a76b0SSuyog Pawar 
1124*c83a76b0SSuyog Pawar     s_search_prms_8x4.i4_inp_stride = 8;
1125*c83a76b0SSuyog Pawar     s_search_prms_8x4.i4_cu_x_off = s_search_prms_8x4.i4_cu_y_off = 0;
1126*c83a76b0SSuyog Pawar     if(ps_coarse_prms->do_full_search)
1127*c83a76b0SSuyog Pawar         s_search_prms_8x4.i4_max_iters = 1;
1128*c83a76b0SSuyog Pawar     s_search_prms_8x4.i4_part_mask = (1 << PART_ID_2NxN_B);
1129*c83a76b0SSuyog Pawar     /* Using the member 0 to store for all ref. idx. */
1130*c83a76b0SSuyog Pawar     s_search_prms_8x4.aps_mv_range[0] = &s_range_prms;
1131*c83a76b0SSuyog Pawar     s_search_prms_8x4.ps_search_results = ps_search_results;
1132*c83a76b0SSuyog Pawar     s_search_prms_8x4.full_search_step = ps_coarse_prms->full_search_step;
1133*c83a76b0SSuyog Pawar 
1134*c83a76b0SSuyog Pawar     s_search_prms_4x8 = s_search_prms_8x4;
1135*c83a76b0SSuyog Pawar     s_search_prms_4x8.ps_search_candts = ps_search_candts_4x8;
1136*c83a76b0SSuyog Pawar     s_search_prms_4x8.e_blk_size = BLK_4x8;
1137*c83a76b0SSuyog Pawar     s_search_prms_4x8.i4_part_mask = (1 << PART_ID_Nx2N_R);
1138*c83a76b0SSuyog Pawar 
1139*c83a76b0SSuyog Pawar     s_search_prms_4x4 = s_search_prms_8x4;
1140*c83a76b0SSuyog Pawar     /* Since s_search_prms_4x4 is used only to computer sad at 4x4 level, search candidate is not used */
1141*c83a76b0SSuyog Pawar     s_search_prms_4x4.ps_search_candts = ps_search_candts_4x8;
1142*c83a76b0SSuyog Pawar     s_search_prms_4x4.e_blk_size = BLK_4x4;
1143*c83a76b0SSuyog Pawar     s_search_prms_4x4.i4_part_mask = (1 << PART_ID_2Nx2N);
1144*c83a76b0SSuyog Pawar     /*************************************************************************/
1145*c83a76b0SSuyog Pawar     /* Picture limit on all 4 sides. This will be used to set mv limits for  */
1146*c83a76b0SSuyog Pawar     /* every block given its coordinate.                                     */
1147*c83a76b0SSuyog Pawar     /*************************************************************************/
1148*c83a76b0SSuyog Pawar     SET_PIC_LIMIT(
1149*c83a76b0SSuyog Pawar         s_pic_limit,
1150*c83a76b0SSuyog Pawar         ps_curr_layer->i4_pad_x_inp,
1151*c83a76b0SSuyog Pawar         ps_curr_layer->i4_pad_y_inp,
1152*c83a76b0SSuyog Pawar         ps_curr_layer->i4_wd,
1153*c83a76b0SSuyog Pawar         ps_curr_layer->i4_ht,
1154*c83a76b0SSuyog Pawar         s_search_prms_4x4.i4_num_steps_post_refine);
1155*c83a76b0SSuyog Pawar 
1156*c83a76b0SSuyog Pawar     /* Pick the global mv from previous reference */
1157*c83a76b0SSuyog Pawar     for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
1158*c83a76b0SSuyog Pawar     {
1159*c83a76b0SSuyog Pawar         if(ME_XTREME_SPEED_25 != e_me_quality_preset)
1160*c83a76b0SSuyog Pawar         {
1161*c83a76b0SSuyog Pawar             /* Distance of current pic from reference */
1162*c83a76b0SSuyog Pawar             S32 i4_delta_poc;
1163*c83a76b0SSuyog Pawar 
1164*c83a76b0SSuyog Pawar             hme_mv_t s_mv;
1165*c83a76b0SSuyog Pawar             i4_delta_poc = ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx];
1166*c83a76b0SSuyog Pawar 
1167*c83a76b0SSuyog Pawar             hme_get_global_mv(ps_prev_layer, &s_mv, i4_delta_poc);
1168*c83a76b0SSuyog Pawar 
1169*c83a76b0SSuyog Pawar             s_candt_global[i1_ref_idx].s_mv.i2_mvx = s_mv.i2_mv_x;
1170*c83a76b0SSuyog Pawar             s_candt_global[i1_ref_idx].s_mv.i2_mvy = s_mv.i2_mv_y;
1171*c83a76b0SSuyog Pawar             s_candt_global[i1_ref_idx].i1_ref_idx = i1_ref_idx;
1172*c83a76b0SSuyog Pawar 
1173*c83a76b0SSuyog Pawar             /*********************************************************************/
1174*c83a76b0SSuyog Pawar             /* Initialize the histogram for each reference index in current      */
1175*c83a76b0SSuyog Pawar             /* layer ctxt                                                        */
1176*c83a76b0SSuyog Pawar             /*********************************************************************/
1177*c83a76b0SSuyog Pawar             hme_init_histogram(
1178*c83a76b0SSuyog Pawar                 ps_ctxt->aps_mv_hist[i1_ref_idx],
1179*c83a76b0SSuyog Pawar                 (S32)as_mv_limit[i1_ref_idx].i2_max_x,
1180*c83a76b0SSuyog Pawar                 (S32)as_mv_limit[i1_ref_idx].i2_max_y);
1181*c83a76b0SSuyog Pawar         }
1182*c83a76b0SSuyog Pawar 
1183*c83a76b0SSuyog Pawar         /*********************************************************************/
1184*c83a76b0SSuyog Pawar         /* Initialize the dyn. search range params. for each reference index */
1185*c83a76b0SSuyog Pawar         /* in current layer ctxt                                             */
1186*c83a76b0SSuyog Pawar         /*********************************************************************/
1187*c83a76b0SSuyog Pawar         /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
1188*c83a76b0SSuyog Pawar         if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
1189*c83a76b0SSuyog Pawar         {
1190*c83a76b0SSuyog Pawar             INIT_DYN_SEARCH_PRMS(
1191*c83a76b0SSuyog Pawar                 &ps_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][i1_ref_idx],
1192*c83a76b0SSuyog Pawar                 ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx]);
1193*c83a76b0SSuyog Pawar         }
1194*c83a76b0SSuyog Pawar     }
1195*c83a76b0SSuyog Pawar 
1196*c83a76b0SSuyog Pawar     /*************************************************************************/
1197*c83a76b0SSuyog Pawar     /* if exhaustive algorithmm then we use only 1 candt 0, 0                */
1198*c83a76b0SSuyog Pawar     /* else we use a lot of causal and non causal candts                     */
1199*c83a76b0SSuyog Pawar     /* finally set number to the configured number of candts                 */
1200*c83a76b0SSuyog Pawar     /*************************************************************************/
1201*c83a76b0SSuyog Pawar 
1202*c83a76b0SSuyog Pawar     /* Loop in raster order over each 4x4 blk in a given row till end of frame */
1203*c83a76b0SSuyog Pawar     while(0 == end_of_frame)
1204*c83a76b0SSuyog Pawar     {
1205*c83a76b0SSuyog Pawar         job_queue_t *ps_job;
1206*c83a76b0SSuyog Pawar         void *pv_hme_dep_mngr;
1207*c83a76b0SSuyog Pawar         WORD32 offset_val, check_dep_pos, set_dep_pos;
1208*c83a76b0SSuyog Pawar 
1209*c83a76b0SSuyog Pawar         /* Get the current layer HME Dep Mngr       */
1210*c83a76b0SSuyog Pawar         /* Note : Use layer_id - 1 in HME layers    */
1211*c83a76b0SSuyog Pawar         pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_coarse_prms->i4_layer_id - 1];
1212*c83a76b0SSuyog Pawar 
1213*c83a76b0SSuyog Pawar         /* Get the current row from the job queue */
1214*c83a76b0SSuyog Pawar         ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
1215*c83a76b0SSuyog Pawar             ps_multi_thrd_ctxt, ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type, 1, i4_ping_pong);
1216*c83a76b0SSuyog Pawar 
1217*c83a76b0SSuyog Pawar         /* If all rows are done, set the end of process flag to 1, */
1218*c83a76b0SSuyog Pawar         /* and the current row to -1 */
1219*c83a76b0SSuyog Pawar         if(NULL == ps_job)
1220*c83a76b0SSuyog Pawar         {
1221*c83a76b0SSuyog Pawar             blk_y = -1;
1222*c83a76b0SSuyog Pawar             end_of_frame = 1;
1223*c83a76b0SSuyog Pawar         }
1224*c83a76b0SSuyog Pawar         else
1225*c83a76b0SSuyog Pawar         {
1226*c83a76b0SSuyog Pawar             ASSERT(ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type == ps_job->i4_pre_enc_task_type);
1227*c83a76b0SSuyog Pawar 
1228*c83a76b0SSuyog Pawar             /* Obtain the current row's details from the job */
1229*c83a76b0SSuyog Pawar             blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
1230*c83a76b0SSuyog Pawar 
1231*c83a76b0SSuyog Pawar             if(1 == ps_ctxt->s_frm_prms.is_i_pic)
1232*c83a76b0SSuyog Pawar             {
1233*c83a76b0SSuyog Pawar                 /* set the output dependency of current row */
1234*c83a76b0SSuyog Pawar                 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
1235*c83a76b0SSuyog Pawar                 continue;
1236*c83a76b0SSuyog Pawar             }
1237*c83a76b0SSuyog Pawar 
1238*c83a76b0SSuyog Pawar             /* Set Variables for Dep. Checking and Setting */
1239*c83a76b0SSuyog Pawar             set_dep_pos = blk_y + 1;
1240*c83a76b0SSuyog Pawar             if(blk_y > 0)
1241*c83a76b0SSuyog Pawar             {
1242*c83a76b0SSuyog Pawar                 offset_val = 2;
1243*c83a76b0SSuyog Pawar                 check_dep_pos = blk_y - 1;
1244*c83a76b0SSuyog Pawar             }
1245*c83a76b0SSuyog Pawar             else
1246*c83a76b0SSuyog Pawar             {
1247*c83a76b0SSuyog Pawar                 /* First row should run without waiting */
1248*c83a76b0SSuyog Pawar                 offset_val = -1;
1249*c83a76b0SSuyog Pawar                 check_dep_pos = 0;
1250*c83a76b0SSuyog Pawar             }
1251*c83a76b0SSuyog Pawar 
1252*c83a76b0SSuyog Pawar             /* Loop over all the blocks in current row */
1253*c83a76b0SSuyog Pawar             /* One block extra, since the last block in a row needs East block */
1254*c83a76b0SSuyog Pawar             for(blk_x = 0; blk_x < (num_blks_in_row + 1); blk_x++)
1255*c83a76b0SSuyog Pawar             {
1256*c83a76b0SSuyog Pawar                 /* Wait till top row block is processed   */
1257*c83a76b0SSuyog Pawar                 /* Currently checking till top right block*/
1258*c83a76b0SSuyog Pawar                 if(blk_x < (num_blks_in_row))
1259*c83a76b0SSuyog Pawar                 {
1260*c83a76b0SSuyog Pawar                     ihevce_dmgr_chk_row_row_sync(
1261*c83a76b0SSuyog Pawar                         pv_hme_dep_mngr,
1262*c83a76b0SSuyog Pawar                         blk_x,
1263*c83a76b0SSuyog Pawar                         offset_val,
1264*c83a76b0SSuyog Pawar                         check_dep_pos,
1265*c83a76b0SSuyog Pawar                         0, /* Col Tile No. : Not supported in PreEnc*/
1266*c83a76b0SSuyog Pawar                         ps_ctxt->thrd_id);
1267*c83a76b0SSuyog Pawar                 }
1268*c83a76b0SSuyog Pawar 
1269*c83a76b0SSuyog Pawar                 /***************************************************************/
1270*c83a76b0SSuyog Pawar                 /* Get Weighted input for all references                       */
1271*c83a76b0SSuyog Pawar                 /***************************************************************/
1272*c83a76b0SSuyog Pawar                 fp_get_wt_inp(
1273*c83a76b0SSuyog Pawar                     ps_curr_layer,
1274*c83a76b0SSuyog Pawar                     &ps_ctxt->s_wt_pred,
1275*c83a76b0SSuyog Pawar                     1 << (blk_size_shift + 1),
1276*c83a76b0SSuyog Pawar                     blk_x << blk_size_shift,
1277*c83a76b0SSuyog Pawar                     (blk_y - 1) << blk_size_shift,
1278*c83a76b0SSuyog Pawar                     1 << (blk_size_shift + 1),
1279*c83a76b0SSuyog Pawar                     i4_num_ref,
1280*c83a76b0SSuyog Pawar                     ps_ctxt->i4_wt_pred_enable_flag);
1281*c83a76b0SSuyog Pawar 
1282*c83a76b0SSuyog Pawar                 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
1283*c83a76b0SSuyog Pawar                 hme_reset_search_results(
1284*c83a76b0SSuyog Pawar                     ps_search_results,
1285*c83a76b0SSuyog Pawar                     s_search_prms_8x4.i4_part_mask | s_search_prms_4x8.i4_part_mask,
1286*c83a76b0SSuyog Pawar                     MV_RES_FPEL);
1287*c83a76b0SSuyog Pawar 
1288*c83a76b0SSuyog Pawar                 /* Compute the search node offsets */
1289*c83a76b0SSuyog Pawar                 /* MAX is used to clip when left and top neighbours are not availbale at coarse boundaries  */
1290*c83a76b0SSuyog Pawar                 search_node_top_offset =
1291*c83a76b0SSuyog Pawar                     blk_x + ps_ctxt->ai4_row_index[MAX((blk_y - 2), 0)] * num_blks_in_row;
1292*c83a76b0SSuyog Pawar                 search_node_left_offset =
1293*c83a76b0SSuyog Pawar                     MAX((blk_x - 1), 0) +
1294*c83a76b0SSuyog Pawar                     ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] * num_blks_in_row;
1295*c83a76b0SSuyog Pawar 
1296*c83a76b0SSuyog Pawar                 /* Input offset: wrt CU start. Offset for South block */
1297*c83a76b0SSuyog Pawar                 s_search_prms_4x4.i4_cu_x_off = 0;
1298*c83a76b0SSuyog Pawar                 s_search_prms_4x4.i4_cu_y_off = 4;
1299*c83a76b0SSuyog Pawar                 s_search_prms_4x4.i4_inp_stride = 8;
1300*c83a76b0SSuyog Pawar                 s_search_prms_4x4.i4_x_off = blk_x << blk_size_shift;
1301*c83a76b0SSuyog Pawar                 s_search_prms_4x4.i4_y_off = blk_y << blk_size_shift;
1302*c83a76b0SSuyog Pawar 
1303*c83a76b0SSuyog Pawar                 s_search_prms_4x8.i4_x_off = s_search_prms_8x4.i4_x_off = blk_x << blk_size_shift;
1304*c83a76b0SSuyog Pawar                 s_search_prms_4x8.i4_y_off = s_search_prms_8x4.i4_y_off = (blk_y - 1)
1305*c83a76b0SSuyog Pawar                                                                           << blk_size_shift;
1306*c83a76b0SSuyog Pawar 
1307*c83a76b0SSuyog Pawar                 /* This layer will always use explicit ME */
1308*c83a76b0SSuyog Pawar                 /* Loop across different Ref IDx */
1309*c83a76b0SSuyog Pawar                 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
1310*c83a76b0SSuyog Pawar                 {
1311*c83a76b0SSuyog Pawar                     sad_top_offset = (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) +
1312*c83a76b0SSuyog Pawar                                      ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] *
1313*c83a76b0SSuyog Pawar                                          ai4_sad_4x4_block_stride[i1_ref_idx];
1314*c83a76b0SSuyog Pawar                     sad_current_offset =
1315*c83a76b0SSuyog Pawar                         (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) +
1316*c83a76b0SSuyog Pawar                         ps_ctxt->ai4_row_index[blk_y] * ai4_sad_4x4_block_stride[i1_ref_idx];
1317*c83a76b0SSuyog Pawar 
1318*c83a76b0SSuyog Pawar                     /* Initialize search node if blk_x == 0, as it doesn't have left neighbours */
1319*c83a76b0SSuyog Pawar                     if(0 == blk_x)
1320*c83a76b0SSuyog Pawar                         INIT_SEARCH_NODE(
1321*c83a76b0SSuyog Pawar                             &ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx][blk_x],
1322*c83a76b0SSuyog Pawar                             i1_ref_idx);
1323*c83a76b0SSuyog Pawar 
1324*c83a76b0SSuyog Pawar                     pi2_cur_ref_sads_4x4 = ps_ctxt->api2_sads_4x4_n_rows[i1_ref_idx];
1325*c83a76b0SSuyog Pawar 
1326*c83a76b0SSuyog Pawar                     /* Initialize changing params here */
1327*c83a76b0SSuyog Pawar                     s_search_prms_8x4.i1_ref_idx = i1_ref_idx;
1328*c83a76b0SSuyog Pawar                     s_search_prms_4x8.i1_ref_idx = i1_ref_idx;
1329*c83a76b0SSuyog Pawar                     s_search_prms_4x4.i1_ref_idx = i1_ref_idx;
1330*c83a76b0SSuyog Pawar 
1331*c83a76b0SSuyog Pawar                     if(num_blks_in_row == blk_x)
1332*c83a76b0SSuyog Pawar                     {
1333*c83a76b0SSuyog Pawar                         S16 *pi2_sads_4x4_current;
1334*c83a76b0SSuyog Pawar                         /* Since the current 4x4 block will be a padded region, which may not match with any of the reference  */
1335*c83a76b0SSuyog Pawar                         pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
1336*c83a76b0SSuyog Pawar 
1337*c83a76b0SSuyog Pawar                         memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]);
1338*c83a76b0SSuyog Pawar                     }
1339*c83a76b0SSuyog Pawar 
1340*c83a76b0SSuyog Pawar                     /* SAD to be computed and stored for the 4x4 block in 1st row and the last block of all rows*/
1341*c83a76b0SSuyog Pawar                     if((0 == blk_y) || (num_blks_in_row == blk_x))
1342*c83a76b0SSuyog Pawar                     {
1343*c83a76b0SSuyog Pawar                         S16 *pi2_sads_4x4_current;
1344*c83a76b0SSuyog Pawar                         /* Computer 4x4 SADs for current block */
1345*c83a76b0SSuyog Pawar                         /* Pointer to store SADs */
1346*c83a76b0SSuyog Pawar                         pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
1347*c83a76b0SSuyog Pawar 
1348*c83a76b0SSuyog Pawar                         hme_derive_worst_case_search_range(
1349*c83a76b0SSuyog Pawar                             &s_range_prms,
1350*c83a76b0SSuyog Pawar                             &s_pic_limit,
1351*c83a76b0SSuyog Pawar                             &as_mv_limit[i1_ref_idx],
1352*c83a76b0SSuyog Pawar                             blk_x << blk_size_shift,
1353*c83a76b0SSuyog Pawar                             blk_y << blk_size_shift,
1354*c83a76b0SSuyog Pawar                             blk_wd,
1355*c83a76b0SSuyog Pawar                             blk_ht);
1356*c83a76b0SSuyog Pawar 
1357*c83a76b0SSuyog Pawar                         if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
1358*c83a76b0SSuyog Pawar                         {
1359*c83a76b0SSuyog Pawar                             ((ihevce_me_optimised_function_list_t *)
1360*c83a76b0SSuyog Pawar                                  ps_ctxt->pv_me_optimised_function_list)
1361*c83a76b0SSuyog Pawar                                 ->pf_store_4x4_sads_high_quality(
1362*c83a76b0SSuyog Pawar                                     &s_search_prms_4x4,
1363*c83a76b0SSuyog Pawar                                     ps_curr_layer,
1364*c83a76b0SSuyog Pawar                                     &as_mv_limit[i1_ref_idx],
1365*c83a76b0SSuyog Pawar                                     &ps_ctxt->s_wt_pred,
1366*c83a76b0SSuyog Pawar                                     pi2_sads_4x4_current);
1367*c83a76b0SSuyog Pawar                         }
1368*c83a76b0SSuyog Pawar                         else
1369*c83a76b0SSuyog Pawar                         {
1370*c83a76b0SSuyog Pawar                             ((ihevce_me_optimised_function_list_t *)
1371*c83a76b0SSuyog Pawar                                  ps_ctxt->pv_me_optimised_function_list)
1372*c83a76b0SSuyog Pawar                                 ->pf_store_4x4_sads_high_speed(
1373*c83a76b0SSuyog Pawar                                     &s_search_prms_4x4,
1374*c83a76b0SSuyog Pawar                                     ps_curr_layer,
1375*c83a76b0SSuyog Pawar                                     &as_mv_limit[i1_ref_idx],
1376*c83a76b0SSuyog Pawar                                     &ps_ctxt->s_wt_pred,
1377*c83a76b0SSuyog Pawar                                     pi2_sads_4x4_current);
1378*c83a76b0SSuyog Pawar                         }
1379*c83a76b0SSuyog Pawar                     }
1380*c83a76b0SSuyog Pawar                     else
1381*c83a76b0SSuyog Pawar                     {
1382*c83a76b0SSuyog Pawar                         /* For the zero mv candt, the ref idx to be modified */
1383*c83a76b0SSuyog Pawar                         ps_candt_zeromv_8x4->i1_ref_idx = i1_ref_idx;
1384*c83a76b0SSuyog Pawar                         ps_candt_zeromv_4x8->i1_ref_idx = i1_ref_idx;
1385*c83a76b0SSuyog Pawar 
1386*c83a76b0SSuyog Pawar                         if(ME_XTREME_SPEED_25 != e_me_quality_preset)
1387*c83a76b0SSuyog Pawar                         {
1388*c83a76b0SSuyog Pawar                             /* For the global mvs alone, the search node points to a local variable */
1389*c83a76b0SSuyog Pawar                             ps_search_candts_8x4[global_id_8x4].ps_search_node =
1390*c83a76b0SSuyog Pawar                                 &s_candt_global[i1_ref_idx];
1391*c83a76b0SSuyog Pawar                             ps_search_candts_4x8[global_id_4x8].ps_search_node =
1392*c83a76b0SSuyog Pawar                                 &s_candt_global[i1_ref_idx];
1393*c83a76b0SSuyog Pawar                         }
1394*c83a76b0SSuyog Pawar 
1395*c83a76b0SSuyog Pawar                         hme_get_spatial_candt(
1396*c83a76b0SSuyog Pawar                             ps_curr_layer,
1397*c83a76b0SSuyog Pawar                             BLK_4x4,
1398*c83a76b0SSuyog Pawar                             blk_x,
1399*c83a76b0SSuyog Pawar                             blk_y - 1,
1400*c83a76b0SSuyog Pawar                             i1_ref_idx,
1401*c83a76b0SSuyog Pawar                             as_top_neighbours,
1402*c83a76b0SSuyog Pawar                             as_left_neighbours,
1403*c83a76b0SSuyog Pawar                             0,
1404*c83a76b0SSuyog Pawar                             1,
1405*c83a76b0SSuyog Pawar                             0,
1406*c83a76b0SSuyog Pawar                             0);
1407*c83a76b0SSuyog Pawar                         /* set up the various candts */
1408*c83a76b0SSuyog Pawar                         *ps_candt_4x8_l = as_left_neighbours[0];
1409*c83a76b0SSuyog Pawar                         *ps_candt_4x8_t = as_top_neighbours[1];
1410*c83a76b0SSuyog Pawar                         *ps_candt_4x8_tl = as_top_neighbours[0];
1411*c83a76b0SSuyog Pawar                         *ps_candt_8x4_l = *ps_candt_4x8_l;
1412*c83a76b0SSuyog Pawar                         *ps_candt_8x4_tl = *ps_candt_4x8_tl;
1413*c83a76b0SSuyog Pawar                         *ps_candt_8x4_t = *ps_candt_4x8_t;
1414*c83a76b0SSuyog Pawar 
1415*c83a76b0SSuyog Pawar                         {
1416*c83a76b0SSuyog Pawar                             S32 pred_lx;
1417*c83a76b0SSuyog Pawar                             S16 *pi2_sads_4x4_current, *pi2_sads_4x4_top;
1418*c83a76b0SSuyog Pawar                             pred_ctxt_t *ps_pred_ctxt;
1419*c83a76b0SSuyog Pawar                             PF_MV_COST_FXN pf_mv_cost_compute;
1420*c83a76b0SSuyog Pawar 
1421*c83a76b0SSuyog Pawar                             /* Computer 4x4 SADs for current block */
1422*c83a76b0SSuyog Pawar                             /* Pointer to store SADs */
1423*c83a76b0SSuyog Pawar                             pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
1424*c83a76b0SSuyog Pawar 
1425*c83a76b0SSuyog Pawar                             hme_derive_worst_case_search_range(
1426*c83a76b0SSuyog Pawar                                 &s_range_prms,
1427*c83a76b0SSuyog Pawar                                 &s_pic_limit,
1428*c83a76b0SSuyog Pawar                                 &as_mv_limit[i1_ref_idx],
1429*c83a76b0SSuyog Pawar                                 blk_x << blk_size_shift,
1430*c83a76b0SSuyog Pawar                                 blk_y << blk_size_shift,
1431*c83a76b0SSuyog Pawar                                 blk_wd,
1432*c83a76b0SSuyog Pawar                                 blk_ht);
1433*c83a76b0SSuyog Pawar                             if(i4_pic_ht == blk_y)
1434*c83a76b0SSuyog Pawar                             {
1435*c83a76b0SSuyog Pawar                                 memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]);
1436*c83a76b0SSuyog Pawar                             }
1437*c83a76b0SSuyog Pawar                             else
1438*c83a76b0SSuyog Pawar                             {
1439*c83a76b0SSuyog Pawar                                 if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
1440*c83a76b0SSuyog Pawar                                 {
1441*c83a76b0SSuyog Pawar                                     ((ihevce_me_optimised_function_list_t *)
1442*c83a76b0SSuyog Pawar                                          ps_ctxt->pv_me_optimised_function_list)
1443*c83a76b0SSuyog Pawar                                         ->pf_store_4x4_sads_high_quality(
1444*c83a76b0SSuyog Pawar                                             &s_search_prms_4x4,
1445*c83a76b0SSuyog Pawar                                             ps_curr_layer,
1446*c83a76b0SSuyog Pawar                                             &as_mv_limit[i1_ref_idx],
1447*c83a76b0SSuyog Pawar                                             &ps_ctxt->s_wt_pred,
1448*c83a76b0SSuyog Pawar                                             pi2_sads_4x4_current);
1449*c83a76b0SSuyog Pawar                                 }
1450*c83a76b0SSuyog Pawar                                 else
1451*c83a76b0SSuyog Pawar                                 {
1452*c83a76b0SSuyog Pawar                                     ((ihevce_me_optimised_function_list_t *)
1453*c83a76b0SSuyog Pawar                                          ps_ctxt->pv_me_optimised_function_list)
1454*c83a76b0SSuyog Pawar                                         ->pf_store_4x4_sads_high_speed(
1455*c83a76b0SSuyog Pawar                                             &s_search_prms_4x4,
1456*c83a76b0SSuyog Pawar                                             ps_curr_layer,
1457*c83a76b0SSuyog Pawar                                             &as_mv_limit[i1_ref_idx],
1458*c83a76b0SSuyog Pawar                                             &ps_ctxt->s_wt_pred,
1459*c83a76b0SSuyog Pawar                                             pi2_sads_4x4_current);
1460*c83a76b0SSuyog Pawar                                 }
1461*c83a76b0SSuyog Pawar                             }
1462*c83a76b0SSuyog Pawar                             /* Set pred direction to L0 or L1 */
1463*c83a76b0SSuyog Pawar                             pred_lx = 1 - ps_search_results->pu1_is_past[i1_ref_idx];
1464*c83a76b0SSuyog Pawar 
1465*c83a76b0SSuyog Pawar                             /* Suitable context (L0 or L1) */
1466*c83a76b0SSuyog Pawar                             ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
1467*c83a76b0SSuyog Pawar 
1468*c83a76b0SSuyog Pawar                             /* Coarse layer is always explicit */
1469*c83a76b0SSuyog Pawar                             if(ME_PRISTINE_QUALITY > e_me_quality_preset)
1470*c83a76b0SSuyog Pawar                             {
1471*c83a76b0SSuyog Pawar                                 pf_mv_cost_compute = compute_mv_cost_coarse;
1472*c83a76b0SSuyog Pawar                             }
1473*c83a76b0SSuyog Pawar                             else
1474*c83a76b0SSuyog Pawar                             {
1475*c83a76b0SSuyog Pawar                                 /* Cost function is not called in high speed case. Below one is just a dummy function */
1476*c83a76b0SSuyog Pawar                                 pf_mv_cost_compute = compute_mv_cost_coarse_high_speed;
1477*c83a76b0SSuyog Pawar                             }
1478*c83a76b0SSuyog Pawar 
1479*c83a76b0SSuyog Pawar                             /*********************************************************************/
1480*c83a76b0SSuyog Pawar                             /* Now, compute the mv for the top block                             */
1481*c83a76b0SSuyog Pawar                             /*********************************************************************/
1482*c83a76b0SSuyog Pawar                             pi2_sads_4x4_top = pi2_cur_ref_sads_4x4 + sad_top_offset;
1483*c83a76b0SSuyog Pawar 
1484*c83a76b0SSuyog Pawar                             /*********************************************************************/
1485*c83a76b0SSuyog Pawar                             /* For every blk in the picture, the search range needs to be derived*/
1486*c83a76b0SSuyog Pawar                             /* Any blk can have any mv, but practical search constraints are     */
1487*c83a76b0SSuyog Pawar                             /* imposed by the picture boundary and amt of padding.               */
1488*c83a76b0SSuyog Pawar                             /*********************************************************************/
1489*c83a76b0SSuyog Pawar                             hme_derive_search_range(
1490*c83a76b0SSuyog Pawar                                 &s_range_prms,
1491*c83a76b0SSuyog Pawar                                 &s_pic_limit,
1492*c83a76b0SSuyog Pawar                                 &as_mv_limit[i1_ref_idx],
1493*c83a76b0SSuyog Pawar                                 blk_x << blk_size_shift,
1494*c83a76b0SSuyog Pawar                                 (blk_y - 1) << blk_size_shift,
1495*c83a76b0SSuyog Pawar                                 blk_wd,
1496*c83a76b0SSuyog Pawar                                 blk_ht);
1497*c83a76b0SSuyog Pawar 
1498*c83a76b0SSuyog Pawar                             /* Computer the mv for the top block */
1499*c83a76b0SSuyog Pawar                             if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
1500*c83a76b0SSuyog Pawar                             {
1501*c83a76b0SSuyog Pawar                                 ((ihevce_me_optimised_function_list_t *)
1502*c83a76b0SSuyog Pawar                                      ps_ctxt->pv_me_optimised_function_list)
1503*c83a76b0SSuyog Pawar                                     ->pf_combine_4x4_sads_and_compute_cost_high_quality(
1504*c83a76b0SSuyog Pawar                                         i1_ref_idx,
1505*c83a76b0SSuyog Pawar                                         &s_range_prms, /* Both 4x8 and 8x4 has same search range */
1506*c83a76b0SSuyog Pawar                                         &as_mv_limit[i1_ref_idx],
1507*c83a76b0SSuyog Pawar                                         &best_mv_4x8,
1508*c83a76b0SSuyog Pawar                                         &best_mv_8x4,
1509*c83a76b0SSuyog Pawar                                         ps_pred_ctxt,
1510*c83a76b0SSuyog Pawar                                         pf_mv_cost_compute,
1511*c83a76b0SSuyog Pawar                                         pi2_sads_4x4_top, /* Current SAD block */
1512*c83a76b0SSuyog Pawar                                         (pi2_sads_4x4_top +
1513*c83a76b0SSuyog Pawar                                          ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */
1514*c83a76b0SSuyog Pawar                                         pi2_sads_4x4_current); /* South SAD block */
1515*c83a76b0SSuyog Pawar                             }
1516*c83a76b0SSuyog Pawar                             else
1517*c83a76b0SSuyog Pawar                             {
1518*c83a76b0SSuyog Pawar                                 ((ihevce_me_optimised_function_list_t *)
1519*c83a76b0SSuyog Pawar                                      ps_ctxt->pv_me_optimised_function_list)
1520*c83a76b0SSuyog Pawar                                     ->pf_combine_4x4_sads_and_compute_cost_high_speed(
1521*c83a76b0SSuyog Pawar                                         i1_ref_idx,
1522*c83a76b0SSuyog Pawar                                         &s_range_prms, /* Both 4x8 and 8x4 has same search range */
1523*c83a76b0SSuyog Pawar                                         &as_mv_limit[i1_ref_idx],
1524*c83a76b0SSuyog Pawar                                         &best_mv_4x8,
1525*c83a76b0SSuyog Pawar                                         &best_mv_8x4,
1526*c83a76b0SSuyog Pawar                                         ps_pred_ctxt,
1527*c83a76b0SSuyog Pawar                                         pf_mv_cost_compute,
1528*c83a76b0SSuyog Pawar                                         pi2_sads_4x4_top, /* Current SAD block */
1529*c83a76b0SSuyog Pawar                                         (pi2_sads_4x4_top +
1530*c83a76b0SSuyog Pawar                                          ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */
1531*c83a76b0SSuyog Pawar                                         pi2_sads_4x4_current); /* South SAD block */
1532*c83a76b0SSuyog Pawar                             }
1533*c83a76b0SSuyog Pawar 
1534*c83a76b0SSuyog Pawar                             ps_candt_fs_4x8->s_mv.i2_mvx = best_mv_4x8.i2_mv_x;
1535*c83a76b0SSuyog Pawar                             ps_candt_fs_4x8->s_mv.i2_mvy = best_mv_4x8.i2_mv_y;
1536*c83a76b0SSuyog Pawar                             ps_candt_fs_4x8->i1_ref_idx = i1_ref_idx;
1537*c83a76b0SSuyog Pawar 
1538*c83a76b0SSuyog Pawar                             ps_candt_fs_8x4->s_mv.i2_mvx = best_mv_8x4.i2_mv_x;
1539*c83a76b0SSuyog Pawar                             ps_candt_fs_8x4->s_mv.i2_mvy = best_mv_8x4.i2_mv_y;
1540*c83a76b0SSuyog Pawar                             ps_candt_fs_8x4->i1_ref_idx = i1_ref_idx;
1541*c83a76b0SSuyog Pawar                         }
1542*c83a76b0SSuyog Pawar 
1543*c83a76b0SSuyog Pawar                         /* call the appropriate Search Algo for 4x8S. The 4x8N would  */
1544*c83a76b0SSuyog Pawar                         /* have already been called by top block */
1545*c83a76b0SSuyog Pawar                         hme_pred_search_square_stepn(
1546*c83a76b0SSuyog Pawar                             &s_search_prms_8x4,
1547*c83a76b0SSuyog Pawar                             ps_curr_layer,
1548*c83a76b0SSuyog Pawar                             &ps_ctxt->s_wt_pred,
1549*c83a76b0SSuyog Pawar                             e_me_quality_preset,
1550*c83a76b0SSuyog Pawar                             (ihevce_me_optimised_function_list_t *)
1551*c83a76b0SSuyog Pawar                                 ps_ctxt->pv_me_optimised_function_list
1552*c83a76b0SSuyog Pawar 
1553*c83a76b0SSuyog Pawar                         );
1554*c83a76b0SSuyog Pawar 
1555*c83a76b0SSuyog Pawar                         /* Call the appropriate search algo for 8x4E */
1556*c83a76b0SSuyog Pawar                         hme_pred_search_square_stepn(
1557*c83a76b0SSuyog Pawar                             &s_search_prms_4x8,
1558*c83a76b0SSuyog Pawar                             ps_curr_layer,
1559*c83a76b0SSuyog Pawar                             &ps_ctxt->s_wt_pred,
1560*c83a76b0SSuyog Pawar                             e_me_quality_preset,
1561*c83a76b0SSuyog Pawar                             (ihevce_me_optimised_function_list_t *)
1562*c83a76b0SSuyog Pawar                                 ps_ctxt->pv_me_optimised_function_list);
1563*c83a76b0SSuyog Pawar 
1564*c83a76b0SSuyog Pawar                         if(ME_XTREME_SPEED_25 != e_me_quality_preset)
1565*c83a76b0SSuyog Pawar                         {
1566*c83a76b0SSuyog Pawar                             /* Histogram updates across different Ref ID for global MV */
1567*c83a76b0SSuyog Pawar                             hme_update_histogram(
1568*c83a76b0SSuyog Pawar                                 ps_ctxt->aps_mv_hist[i1_ref_idx],
1569*c83a76b0SSuyog Pawar                                 aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvx,
1570*c83a76b0SSuyog Pawar                                 aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvy);
1571*c83a76b0SSuyog Pawar                             hme_update_histogram(
1572*c83a76b0SSuyog Pawar                                 ps_ctxt->aps_mv_hist[i1_ref_idx],
1573*c83a76b0SSuyog Pawar                                 aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvx,
1574*c83a76b0SSuyog Pawar                                 aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvy);
1575*c83a76b0SSuyog Pawar                         }
1576*c83a76b0SSuyog Pawar 
1577*c83a76b0SSuyog Pawar                         /* update the best results to the mv bank */
1578*c83a76b0SSuyog Pawar                         hme_update_mv_bank_coarse(
1579*c83a76b0SSuyog Pawar                             ps_search_results,
1580*c83a76b0SSuyog Pawar                             ps_curr_layer->ps_layer_mvbank,
1581*c83a76b0SSuyog Pawar                             blk_x,
1582*c83a76b0SSuyog Pawar                             (blk_y - 1),
1583*c83a76b0SSuyog Pawar                             ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] +
1584*c83a76b0SSuyog Pawar                                 search_node_top_offset, /* Top Candidate */
1585*c83a76b0SSuyog Pawar                             ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] +
1586*c83a76b0SSuyog Pawar                                 search_node_left_offset, /* Left candidate */
1587*c83a76b0SSuyog Pawar                             i1_ref_idx,
1588*c83a76b0SSuyog Pawar                             &s_mv_update_prms);
1589*c83a76b0SSuyog Pawar 
1590*c83a76b0SSuyog Pawar                         /* Copy the best search result to 5 row array for future use */
1591*c83a76b0SSuyog Pawar                         *(ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] + blk_x +
1592*c83a76b0SSuyog Pawar                           ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) =
1593*c83a76b0SSuyog Pawar                             *(aps_best_search_node_4x8[i1_ref_idx]);
1594*c83a76b0SSuyog Pawar 
1595*c83a76b0SSuyog Pawar                         *(ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] + blk_x +
1596*c83a76b0SSuyog Pawar                           ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) =
1597*c83a76b0SSuyog Pawar                             *(aps_best_search_node_8x4[i1_ref_idx]);
1598*c83a76b0SSuyog Pawar 
1599*c83a76b0SSuyog Pawar                         /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
1600*c83a76b0SSuyog Pawar                         /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
1601*c83a76b0SSuyog Pawar                         if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
1602*c83a76b0SSuyog Pawar                         {
1603*c83a76b0SSuyog Pawar                             WORD32 num_mvs, i, j;
1604*c83a76b0SSuyog Pawar                             search_node_t *aps_search_nodes[4];
1605*c83a76b0SSuyog Pawar                             /* Best results for 8x4R and 4x8B blocks */
1606*c83a76b0SSuyog Pawar                             search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b;
1607*c83a76b0SSuyog Pawar 
1608*c83a76b0SSuyog Pawar                             num_mvs = ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
1609*c83a76b0SSuyog Pawar 
1610*c83a76b0SSuyog Pawar                             /*************************************************************************/
1611*c83a76b0SSuyog Pawar                             /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */
1612*c83a76b0SSuyog Pawar                             /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp.      */
1613*c83a76b0SSuyog Pawar                             /* If number of results to be stored is 4, then we store all these 4     */
1614*c83a76b0SSuyog Pawar                             /* results, else we pick best ones                                       */
1615*c83a76b0SSuyog Pawar                             /*************************************************************************/
1616*c83a76b0SSuyog Pawar                             ps_search_node_8x4_r =
1617*c83a76b0SSuyog Pawar                                 ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B];
1618*c83a76b0SSuyog Pawar                             ps_search_node_4x8_b =
1619*c83a76b0SSuyog Pawar                                 ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R];
1620*c83a76b0SSuyog Pawar 
1621*c83a76b0SSuyog Pawar                             ASSERT(num_mvs <= 4);
1622*c83a76b0SSuyog Pawar 
1623*c83a76b0SSuyog Pawar                             /* Doing this to sort best results */
1624*c83a76b0SSuyog Pawar                             aps_search_nodes[0] = ps_search_node_8x4_r;
1625*c83a76b0SSuyog Pawar                             aps_search_nodes[1] = ps_search_node_4x8_b;
1626*c83a76b0SSuyog Pawar                             aps_search_nodes[2] =
1627*c83a76b0SSuyog Pawar                                 ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] +
1628*c83a76b0SSuyog Pawar                                 search_node_left_offset; /* Left candidate */
1629*c83a76b0SSuyog Pawar                             aps_search_nodes[3] =
1630*c83a76b0SSuyog Pawar                                 ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] +
1631*c83a76b0SSuyog Pawar                                 search_node_top_offset; /* Top Candidate */
1632*c83a76b0SSuyog Pawar 
1633*c83a76b0SSuyog Pawar                             /* Note : Need to be resolved!!! */
1634*c83a76b0SSuyog Pawar                             /* Added this to match with "hme_update_mv_bank_coarse" */
1635*c83a76b0SSuyog Pawar                             if(num_mvs != 4)
1636*c83a76b0SSuyog Pawar                             {
1637*c83a76b0SSuyog Pawar                                 /* Run through the results, store them in best to worst order */
1638*c83a76b0SSuyog Pawar                                 for(i = 0; i < num_mvs; i++)
1639*c83a76b0SSuyog Pawar                                 {
1640*c83a76b0SSuyog Pawar                                     for(j = i + 1; j < 4; j++)
1641*c83a76b0SSuyog Pawar                                     {
1642*c83a76b0SSuyog Pawar                                         if(aps_search_nodes[j]->i4_tot_cost <
1643*c83a76b0SSuyog Pawar                                            aps_search_nodes[i]->i4_tot_cost)
1644*c83a76b0SSuyog Pawar                                         {
1645*c83a76b0SSuyog Pawar                                             SWAP_HME(
1646*c83a76b0SSuyog Pawar                                                 aps_search_nodes[j],
1647*c83a76b0SSuyog Pawar                                                 aps_search_nodes[i],
1648*c83a76b0SSuyog Pawar                                                 search_node_t *);
1649*c83a76b0SSuyog Pawar                                         }
1650*c83a76b0SSuyog Pawar                                     }
1651*c83a76b0SSuyog Pawar                                 }
1652*c83a76b0SSuyog Pawar                             }
1653*c83a76b0SSuyog Pawar 
1654*c83a76b0SSuyog Pawar                             /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
1655*c83a76b0SSuyog Pawar                             for(i = 0; i < num_mvs; i++)
1656*c83a76b0SSuyog Pawar                             {
1657*c83a76b0SSuyog Pawar                                 hme_update_dynamic_search_params(
1658*c83a76b0SSuyog Pawar                                     &ps_ctxt->s_coarse_dyn_range_prms
1659*c83a76b0SSuyog Pawar                                          .as_dyn_range_prms[i4_layer_id][i1_ref_idx],
1660*c83a76b0SSuyog Pawar                                     aps_search_nodes[i]->s_mv.i2_mvy);
1661*c83a76b0SSuyog Pawar                             }
1662*c83a76b0SSuyog Pawar                         }
1663*c83a76b0SSuyog Pawar                     }
1664*c83a76b0SSuyog Pawar                 }
1665*c83a76b0SSuyog Pawar 
1666*c83a76b0SSuyog Pawar                 /* Update the number of blocks processed in the current row */
1667*c83a76b0SSuyog Pawar                 ihevce_dmgr_set_row_row_sync(
1668*c83a76b0SSuyog Pawar                     pv_hme_dep_mngr,
1669*c83a76b0SSuyog Pawar                     (blk_x + 1),
1670*c83a76b0SSuyog Pawar                     blk_y,
1671*c83a76b0SSuyog Pawar                     0 /* Col Tile No. : Not supported in PreEnc*/);
1672*c83a76b0SSuyog Pawar             }
1673*c83a76b0SSuyog Pawar 
1674*c83a76b0SSuyog Pawar             /* set the output dependency after completion of row */
1675*c83a76b0SSuyog Pawar             ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
1676*c83a76b0SSuyog Pawar         }
1677*c83a76b0SSuyog Pawar     }
1678*c83a76b0SSuyog Pawar 
1679*c83a76b0SSuyog Pawar     return;
1680*c83a76b0SSuyog Pawar }
1681