xref: /aosp_15_r20/external/libhevc/encoder/hme_search_algo.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar  *
3*c83a76b0SSuyog Pawar  * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar  *
5*c83a76b0SSuyog Pawar  * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar  * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar  * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar  *
9*c83a76b0SSuyog Pawar  * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar  *
11*c83a76b0SSuyog Pawar  * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar  * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar  * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar  * limitations under the License.
16*c83a76b0SSuyog Pawar  *
17*c83a76b0SSuyog Pawar  *****************************************************************************
18*c83a76b0SSuyog Pawar  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar /**
21*c83a76b0SSuyog Pawar ******************************************************************************
22*c83a76b0SSuyog Pawar * @file hme_search_algo.c
23*c83a76b0SSuyog Pawar *
24*c83a76b0SSuyog Pawar * @brief
25*c83a76b0SSuyog Pawar *    Contains various search algorithms to be used by coarse/refinement layers
26*c83a76b0SSuyog Pawar *
27*c83a76b0SSuyog Pawar * @author
28*c83a76b0SSuyog Pawar *    Ittiam
29*c83a76b0SSuyog Pawar *
30*c83a76b0SSuyog Pawar *
31*c83a76b0SSuyog Pawar * List of Functions
32*c83a76b0SSuyog Pawar * hme_compute_grid_results_step_gt_1()
33*c83a76b0SSuyog Pawar * hme_compute_grid_results_step_1()
34*c83a76b0SSuyog Pawar * hme_pred_search_square_stepn()
35*c83a76b0SSuyog Pawar *
36*c83a76b0SSuyog Pawar ******************************************************************************
37*c83a76b0SSuyog Pawar */
38*c83a76b0SSuyog Pawar 
39*c83a76b0SSuyog Pawar /*****************************************************************************/
40*c83a76b0SSuyog Pawar /* File Includes                                                             */
41*c83a76b0SSuyog Pawar /*****************************************************************************/
42*c83a76b0SSuyog Pawar /* System include files */
43*c83a76b0SSuyog Pawar #include <stdio.h>
44*c83a76b0SSuyog Pawar #include <string.h>
45*c83a76b0SSuyog Pawar #include <stdlib.h>
46*c83a76b0SSuyog Pawar #include <assert.h>
47*c83a76b0SSuyog Pawar #include <stdarg.h>
48*c83a76b0SSuyog Pawar #include <math.h>
49*c83a76b0SSuyog Pawar #include <limits.h>
50*c83a76b0SSuyog Pawar 
51*c83a76b0SSuyog Pawar /* User include files */
52*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
53*c83a76b0SSuyog Pawar #include "itt_video_api.h"
54*c83a76b0SSuyog Pawar #include "ihevce_api.h"
55*c83a76b0SSuyog Pawar 
56*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
57*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
58*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
59*c83a76b0SSuyog Pawar 
60*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
61*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
62*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
63*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
64*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
65*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
66*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
67*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
68*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
69*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
70*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
71*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
72*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
73*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
74*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
75*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
76*c83a76b0SSuyog Pawar 
77*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
78*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
79*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
80*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_funcs.h"
81*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
82*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
83*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
84*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
85*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
86*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
87*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
88*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
89*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
90*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
91*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
92*c83a76b0SSuyog Pawar #include "ihevce_bs_compute_ctb.h"
93*c83a76b0SSuyog Pawar #include "ihevce_global_tables.h"
94*c83a76b0SSuyog Pawar #include "ihevce_dep_mngr_interface.h"
95*c83a76b0SSuyog Pawar #include "hme_datatype.h"
96*c83a76b0SSuyog Pawar #include "hme_interface.h"
97*c83a76b0SSuyog Pawar #include "hme_common_defs.h"
98*c83a76b0SSuyog Pawar #include "hme_defs.h"
99*c83a76b0SSuyog Pawar #include "ihevce_me_instr_set_router.h"
100*c83a76b0SSuyog Pawar #include "hme_globals.h"
101*c83a76b0SSuyog Pawar #include "hme_utils.h"
102*c83a76b0SSuyog Pawar #include "hme_coarse.h"
103*c83a76b0SSuyog Pawar #include "hme_fullpel.h"
104*c83a76b0SSuyog Pawar #include "hme_subpel.h"
105*c83a76b0SSuyog Pawar #include "hme_refine.h"
106*c83a76b0SSuyog Pawar #include "hme_err_compute.h"
107*c83a76b0SSuyog Pawar #include "hme_common_utils.h"
108*c83a76b0SSuyog Pawar #include "hme_search_algo.h"
109*c83a76b0SSuyog Pawar #include "ihevce_stasino_helpers.h"
110*c83a76b0SSuyog Pawar #include "ihevce_common_utils.h"
111*c83a76b0SSuyog Pawar 
112*c83a76b0SSuyog Pawar /*****************************************************************************/
113*c83a76b0SSuyog Pawar /* Function Definitions                                                      */
114*c83a76b0SSuyog Pawar /*****************************************************************************/
115*c83a76b0SSuyog Pawar 
116*c83a76b0SSuyog Pawar /**
117*c83a76b0SSuyog Pawar ********************************************************************************
118*c83a76b0SSuyog Pawar *  @fn     void hme_compute_grid_results_step_1(err_prms_t *ps_err_prms,
119*c83a76b0SSuyog Pawar result_upd_prms_t *ps_result_prms,
120*c83a76b0SSuyog Pawar BLK_SIZE_T e_blk_size)
121*c83a76b0SSuyog Pawar *
122*c83a76b0SSuyog Pawar *  @brief  Updates results for a grid of step = 1
123*c83a76b0SSuyog Pawar *
124*c83a76b0SSuyog Pawar *  @param[in] ps_err_prms: Various parameters to this function
125*c83a76b0SSuyog Pawar *
126*c83a76b0SSuyog Pawar *  @param[in] ps_result_prms : Parameters pertaining to result updation
127*c83a76b0SSuyog Pawar *
128*c83a76b0SSuyog Pawar *  @param[out] e_blk_size: Block size of the blk being searched for
129*c83a76b0SSuyog Pawar *
130*c83a76b0SSuyog Pawar *  @return none
131*c83a76b0SSuyog Pawar ********************************************************************************
132*c83a76b0SSuyog Pawar */
hme_compute_grid_results(err_prms_t * ps_err_prms,result_upd_prms_t * ps_result_prms,BLK_SIZE_T e_blk_size)133*c83a76b0SSuyog Pawar void hme_compute_grid_results(
134*c83a76b0SSuyog Pawar     err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms, BLK_SIZE_T e_blk_size)
135*c83a76b0SSuyog Pawar {
136*c83a76b0SSuyog Pawar     PF_RESULT_FXN_T pf_hme_result_fxn;
137*c83a76b0SSuyog Pawar     PF_SAD_FXN_T pf_sad_fxn;
138*c83a76b0SSuyog Pawar     S32 i4_num_results;
139*c83a76b0SSuyog Pawar     S32 part_id;
140*c83a76b0SSuyog Pawar 
141*c83a76b0SSuyog Pawar     part_id = ps_result_prms->pi4_valid_part_ids[0];
142*c83a76b0SSuyog Pawar 
143*c83a76b0SSuyog Pawar     i4_num_results = (S32)ps_result_prms->ps_search_results->u1_num_results_per_part;
144*c83a76b0SSuyog Pawar 
145*c83a76b0SSuyog Pawar     pf_sad_fxn = hme_get_sad_fxn(e_blk_size, ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask);
146*c83a76b0SSuyog Pawar 
147*c83a76b0SSuyog Pawar     pf_hme_result_fxn =
148*c83a76b0SSuyog Pawar         hme_get_result_fxn(ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask, i4_num_results);
149*c83a76b0SSuyog Pawar 
150*c83a76b0SSuyog Pawar     pf_sad_fxn(ps_err_prms);
151*c83a76b0SSuyog Pawar     pf_hme_result_fxn(ps_result_prms);
152*c83a76b0SSuyog Pawar }
153*c83a76b0SSuyog Pawar 
154*c83a76b0SSuyog Pawar /**
155*c83a76b0SSuyog Pawar ********************************************************************************
156*c83a76b0SSuyog Pawar *  @fn     void hme_pred_search_square_stepn(hme_search_prms_t *ps_search_prms,
157*c83a76b0SSuyog Pawar *                                   layer_ctxt_t *ps_layer_ctxt)
158*c83a76b0SSuyog Pawar *
159*c83a76b0SSuyog Pawar *  @brief  Implements predictive search, with square grid refinement. In this
160*c83a76b0SSuyog Pawar *          case, we start with a bigger step size, like 4, refining upto a
161*c83a76b0SSuyog Pawar *          variable number of pts, till we hit end of search range or hit a
162*c83a76b0SSuyog Pawar *          minima. Then we refine using smaller steps. The bigger step size
163*c83a76b0SSuyog Pawar *          like 4 or 2, do not use optimized SAD functions, they evaluate
164*c83a76b0SSuyog Pawar *          SAD for each individual pt.
165*c83a76b0SSuyog Pawar *
166*c83a76b0SSuyog Pawar *  @param[in,out]  ps_search_prms: All the params to this function
167*c83a76b0SSuyog Pawar *
168*c83a76b0SSuyog Pawar *  @param[in] ps_layer_ctxt: Context for the layer
169*c83a76b0SSuyog Pawar *
170*c83a76b0SSuyog Pawar *  @return None
171*c83a76b0SSuyog Pawar ********************************************************************************
172*c83a76b0SSuyog Pawar */
hme_pred_search_square_stepn(hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,wgt_pred_ctxt_t * ps_wt_inp_prms,ME_QUALITY_PRESETS_T e_me_quality_preset,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)173*c83a76b0SSuyog Pawar void hme_pred_search_square_stepn(
174*c83a76b0SSuyog Pawar     hme_search_prms_t *ps_search_prms,
175*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_layer_ctxt,
176*c83a76b0SSuyog Pawar     wgt_pred_ctxt_t *ps_wt_inp_prms,
177*c83a76b0SSuyog Pawar     ME_QUALITY_PRESETS_T e_me_quality_preset,
178*c83a76b0SSuyog Pawar     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
179*c83a76b0SSuyog Pawar 
180*c83a76b0SSuyog Pawar )
181*c83a76b0SSuyog Pawar {
182*c83a76b0SSuyog Pawar     /* Stores the SAD for all parts at each pt in the grid */
183*c83a76b0SSuyog Pawar     S32 ai4_sad_grid[9][TOT_NUM_PARTS];
184*c83a76b0SSuyog Pawar 
185*c83a76b0SSuyog Pawar     S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
186*c83a76b0SSuyog Pawar 
187*c83a76b0SSuyog Pawar     /* Atributes of input candidates */
188*c83a76b0SSuyog Pawar     search_candt_t *ps_search_candts;
189*c83a76b0SSuyog Pawar     search_node_t s_search_node;
190*c83a76b0SSuyog Pawar 
191*c83a76b0SSuyog Pawar     /* Number of candidates to search */
192*c83a76b0SSuyog Pawar     S32 i4_num_candts, max_num_iters, i4_num_results;
193*c83a76b0SSuyog Pawar 
194*c83a76b0SSuyog Pawar     /* Input and reference attributes */
195*c83a76b0SSuyog Pawar     S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
196*c83a76b0SSuyog Pawar 
197*c83a76b0SSuyog Pawar     /* The reference is actually an array of ptrs since there are several    */
198*c83a76b0SSuyog Pawar     /* reference id. So an array gets passed form calling function           */
199*c83a76b0SSuyog Pawar     U08 **ppu1_ref;
200*c83a76b0SSuyog Pawar 
201*c83a76b0SSuyog Pawar     /* Holds the search results at the end of this fxn */
202*c83a76b0SSuyog Pawar     search_results_t *ps_search_results;
203*c83a76b0SSuyog Pawar 
204*c83a76b0SSuyog Pawar     /* These control number of parts and number of pts in grid to search */
205*c83a76b0SSuyog Pawar     S32 i4_part_mask, i4_grid_mask;
206*c83a76b0SSuyog Pawar 
207*c83a76b0SSuyog Pawar     /* Blk width, blk height and blk size are derived from input params */
208*c83a76b0SSuyog Pawar     BLK_SIZE_T e_blk_size;
209*c83a76b0SSuyog Pawar     CU_SIZE_T e_cu_size;
210*c83a76b0SSuyog Pawar     S32 i4_blk_wd, i4_blk_ht, i4_step, i4_candt, i4_iter;
211*c83a76b0SSuyog Pawar     S32 i4_inp_off;
212*c83a76b0SSuyog Pawar     S32 i4_min_id;
213*c83a76b0SSuyog Pawar     /* Points to the range limits for mv */
214*c83a76b0SSuyog Pawar     range_prms_t *ps_range_prms;
215*c83a76b0SSuyog Pawar 
216*c83a76b0SSuyog Pawar     /*************************************************************************/
217*c83a76b0SSuyog Pawar     /* These functions pointers for calculating Err and the result update    */
218*c83a76b0SSuyog Pawar     /* Each carries its own parameters structure, which is generated on the  */
219*c83a76b0SSuyog Pawar     /* fly in this function                                                  */
220*c83a76b0SSuyog Pawar     /*************************************************************************/
221*c83a76b0SSuyog Pawar     err_prms_t s_err_prms;
222*c83a76b0SSuyog Pawar     result_upd_prms_t s_result_prms;
223*c83a76b0SSuyog Pawar 
224*c83a76b0SSuyog Pawar     max_num_iters = ps_search_prms->i4_max_iters;
225*c83a76b0SSuyog Pawar     /* Using the member 0 to store for all ref. idx., see in coarsest */
226*c83a76b0SSuyog Pawar     ps_range_prms = ps_search_prms->aps_mv_range[0];
227*c83a76b0SSuyog Pawar     i4_inp_stride = ps_search_prms->i4_inp_stride;
228*c83a76b0SSuyog Pawar     /* Move to the location of the search blk in inp buffer */
229*c83a76b0SSuyog Pawar     i4_inp_off = ps_search_prms->i4_cu_x_off;
230*c83a76b0SSuyog Pawar     i4_inp_off += (ps_search_prms->i4_cu_y_off * i4_inp_stride);
231*c83a76b0SSuyog Pawar 
232*c83a76b0SSuyog Pawar     ps_search_results = ps_search_prms->ps_search_results;
233*c83a76b0SSuyog Pawar 
234*c83a76b0SSuyog Pawar     /*************************************************************************/
235*c83a76b0SSuyog Pawar     /* Depending on flag i4_use_rec, we use either input of previously       */
236*c83a76b0SSuyog Pawar     /* encoded pictures or we use recon of previously encoded pictures.      */
237*c83a76b0SSuyog Pawar     /*************************************************************************/
238*c83a76b0SSuyog Pawar     if(ps_search_prms->i4_use_rec == 1)
239*c83a76b0SSuyog Pawar     {
240*c83a76b0SSuyog Pawar         i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
241*c83a76b0SSuyog Pawar         ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
242*c83a76b0SSuyog Pawar     }
243*c83a76b0SSuyog Pawar     else
244*c83a76b0SSuyog Pawar     {
245*c83a76b0SSuyog Pawar         i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
246*c83a76b0SSuyog Pawar         ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
247*c83a76b0SSuyog Pawar     }
248*c83a76b0SSuyog Pawar     i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
249*c83a76b0SSuyog Pawar 
250*c83a76b0SSuyog Pawar     /*************************************************************************/
251*c83a76b0SSuyog Pawar     /* Obtain the blk size of the search blk. Assumed here that the search   */
252*c83a76b0SSuyog Pawar     /* is done on a CU size, rather than any arbitrary blk size.             */
253*c83a76b0SSuyog Pawar     /*************************************************************************/
254*c83a76b0SSuyog Pawar     ps_search_results = ps_search_prms->ps_search_results;
255*c83a76b0SSuyog Pawar     e_blk_size = ps_search_prms->e_blk_size;
256*c83a76b0SSuyog Pawar     i4_blk_wd = (S32)gau1_blk_size_to_wd[e_blk_size];
257*c83a76b0SSuyog Pawar     i4_blk_ht = (S32)gau1_blk_size_to_ht[e_blk_size];
258*c83a76b0SSuyog Pawar     e_cu_size = ps_search_results->e_cu_size;
259*c83a76b0SSuyog Pawar     i4_num_results = (S32)ps_search_results->u1_num_results_per_part;
260*c83a76b0SSuyog Pawar 
261*c83a76b0SSuyog Pawar     ps_search_candts = ps_search_prms->ps_search_candts;
262*c83a76b0SSuyog Pawar     i4_num_candts = ps_search_prms->i4_num_init_candts;
263*c83a76b0SSuyog Pawar     i4_part_mask = ps_search_prms->i4_part_mask;
264*c83a76b0SSuyog Pawar 
265*c83a76b0SSuyog Pawar     /*************************************************************************/
266*c83a76b0SSuyog Pawar     /* This array stores the ids of the partitions whose                     */
267*c83a76b0SSuyog Pawar     /* SADs are updated. Since the partitions whose SADs are updated may not */
268*c83a76b0SSuyog Pawar     /* be in contiguous order, we supply another level of indirection.       */
269*c83a76b0SSuyog Pawar     /*************************************************************************/
270*c83a76b0SSuyog Pawar     hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
271*c83a76b0SSuyog Pawar 
272*c83a76b0SSuyog Pawar     /* Update the parameters used to pass to SAD */
273*c83a76b0SSuyog Pawar     /* input ptr, strides, SAD Grid, part mask, blk width and ht */
274*c83a76b0SSuyog Pawar     /* The above are fixed ptrs, only pu1_ref and grid mask are  */
275*c83a76b0SSuyog Pawar     /* varying params which are updated just before calling fxn  */
276*c83a76b0SSuyog Pawar     s_err_prms.i4_inp_stride = i4_inp_stride;
277*c83a76b0SSuyog Pawar     s_err_prms.i4_ref_stride = i4_ref_stride;
278*c83a76b0SSuyog Pawar     s_err_prms.i4_part_mask = i4_part_mask;
279*c83a76b0SSuyog Pawar     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
280*c83a76b0SSuyog Pawar     s_err_prms.i4_blk_wd = i4_blk_wd;
281*c83a76b0SSuyog Pawar     s_err_prms.i4_blk_ht = i4_blk_ht;
282*c83a76b0SSuyog Pawar     s_err_prms.pi4_valid_part_ids = ai4_valid_part_ids;
283*c83a76b0SSuyog Pawar 
284*c83a76b0SSuyog Pawar     s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
285*c83a76b0SSuyog Pawar     s_result_prms.ps_search_results = ps_search_results;
286*c83a76b0SSuyog Pawar     s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
287*c83a76b0SSuyog Pawar     s_result_prms.i1_ref_idx = ps_search_prms->i1_ref_idx;
288*c83a76b0SSuyog Pawar     s_result_prms.i4_part_mask = ps_search_prms->i4_part_mask;
289*c83a76b0SSuyog Pawar     s_result_prms.ps_search_node_base = &s_search_node;
290*c83a76b0SSuyog Pawar     s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
291*c83a76b0SSuyog Pawar 
292*c83a76b0SSuyog Pawar     /* Run through each of the candts in a loop */
293*c83a76b0SSuyog Pawar     for(i4_candt = 0; i4_candt < i4_num_candts; i4_candt++)
294*c83a76b0SSuyog Pawar     {
295*c83a76b0SSuyog Pawar         S32 i4_num_refine;
296*c83a76b0SSuyog Pawar 
297*c83a76b0SSuyog Pawar         i4_step = ps_search_prms->i4_start_step;
298*c83a76b0SSuyog Pawar 
299*c83a76b0SSuyog Pawar         s_search_node = *(ps_search_candts->ps_search_node);
300*c83a76b0SSuyog Pawar 
301*c83a76b0SSuyog Pawar         /* initialize minimum cost for this candidate. As we search around */
302*c83a76b0SSuyog Pawar         /* this candidate, this is used to check early exit, when in any   */
303*c83a76b0SSuyog Pawar         /* given iteration, the center pt of the grid is lowest value      */
304*c83a76b0SSuyog Pawar         s_result_prms.i4_min_cost = MAX_32BIT_VAL;
305*c83a76b0SSuyog Pawar 
306*c83a76b0SSuyog Pawar         /* If we need to do refinements, then we need to evaluate */
307*c83a76b0SSuyog Pawar         /* neighbouring pts. Before doing so, we have to do       */
308*c83a76b0SSuyog Pawar         /* basic range checks against max allowed mvs             */
309*c83a76b0SSuyog Pawar         i4_num_refine = ps_search_candts->u1_num_steps_refine;
310*c83a76b0SSuyog Pawar 
311*c83a76b0SSuyog Pawar         CLIP_MV_WITHIN_RANGE(
312*c83a76b0SSuyog Pawar             s_search_node.s_mv.i2_mvx, s_search_node.s_mv.i2_mvy, ps_range_prms, 0, 0, 0);
313*c83a76b0SSuyog Pawar 
314*c83a76b0SSuyog Pawar         /* The first time, we search all 8 pts around init candt plus the init candt */
315*c83a76b0SSuyog Pawar         i4_grid_mask = 0x1ff;
316*c83a76b0SSuyog Pawar         s_err_prms.pu1_inp = ps_wt_inp_prms->apu1_wt_inp[s_search_node.i1_ref_idx] + i4_inp_off;
317*c83a76b0SSuyog Pawar 
318*c83a76b0SSuyog Pawar         for(i4_iter = 0; i4_iter < max_num_iters; i4_iter++)
319*c83a76b0SSuyog Pawar         {
320*c83a76b0SSuyog Pawar             i4_grid_mask &= hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms);
321*c83a76b0SSuyog Pawar 
322*c83a76b0SSuyog Pawar             s_err_prms.i4_grid_mask = i4_grid_mask;
323*c83a76b0SSuyog Pawar             s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset;
324*c83a76b0SSuyog Pawar             s_err_prms.pu1_ref +=
325*c83a76b0SSuyog Pawar                 (s_search_node.s_mv.i2_mvx +
326*c83a76b0SSuyog Pawar                  (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride));
327*c83a76b0SSuyog Pawar 
328*c83a76b0SSuyog Pawar             s_result_prms.i4_step = i4_step;
329*c83a76b0SSuyog Pawar             s_err_prms.i4_step = i4_step;
330*c83a76b0SSuyog Pawar             s_result_prms.i4_grid_mask = i4_grid_mask;
331*c83a76b0SSuyog Pawar 
332*c83a76b0SSuyog Pawar             /* For Top,TopLeft and Left cand., get only center point SAD    */
333*c83a76b0SSuyog Pawar             /* and do early exit                                            */
334*c83a76b0SSuyog Pawar             if(0 == i4_num_refine)
335*c83a76b0SSuyog Pawar             {
336*c83a76b0SSuyog Pawar                 s_err_prms.i4_grid_mask = 0x1;
337*c83a76b0SSuyog Pawar                 s_result_prms.i4_grid_mask = 0x1;
338*c83a76b0SSuyog Pawar 
339*c83a76b0SSuyog Pawar                 /* sad pt fun. populates sad to 0th location, whereas update */
340*c83a76b0SSuyog Pawar                 /* fun. takes it based on part. id                           */
341*c83a76b0SSuyog Pawar                 s_err_prms.pi4_sad_grid =
342*c83a76b0SSuyog Pawar                     s_result_prms.pi4_sad_grid + (1 * s_result_prms.pi4_valid_part_ids[0]);
343*c83a76b0SSuyog Pawar 
344*c83a76b0SSuyog Pawar                 ps_me_optimised_function_list->pf_evalsad_pt_npu_mxn_8bit(&s_err_prms);
345*c83a76b0SSuyog Pawar 
346*c83a76b0SSuyog Pawar                 s_err_prms.pi4_sad_grid = s_result_prms.pi4_sad_grid;
347*c83a76b0SSuyog Pawar 
348*c83a76b0SSuyog Pawar                 if(ME_XTREME_SPEED_25 == e_me_quality_preset)
349*c83a76b0SSuyog Pawar                     hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
350*c83a76b0SSuyog Pawar                 else
351*c83a76b0SSuyog Pawar                     hme_update_results_grid_pu_bestn(&s_result_prms);
352*c83a76b0SSuyog Pawar 
353*c83a76b0SSuyog Pawar                 i4_min_id = (S32)PT_C; /* Center Point         */
354*c83a76b0SSuyog Pawar                 i4_step = 0; /* No further refinment */
355*c83a76b0SSuyog Pawar                 s_result_prms.i4_step = i4_step;
356*c83a76b0SSuyog Pawar                 s_err_prms.i4_step = i4_step;
357*c83a76b0SSuyog Pawar             }
358*c83a76b0SSuyog Pawar             else
359*c83a76b0SSuyog Pawar             {
360*c83a76b0SSuyog Pawar                 if(ME_XTREME_SPEED_25 == e_me_quality_preset)
361*c83a76b0SSuyog Pawar                 {
362*c83a76b0SSuyog Pawar                     err_prms_t *ps_err_prms = &s_err_prms;
363*c83a76b0SSuyog Pawar                     ASSERT(ps_err_prms->i4_grid_mask != 1);
364*c83a76b0SSuyog Pawar                     ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16));
365*c83a76b0SSuyog Pawar 
366*c83a76b0SSuyog Pawar                     /*****************************************************************/
367*c83a76b0SSuyog Pawar                     /* In this case, there are no partial updates. The blk can be    */
368*c83a76b0SSuyog Pawar                     /* of any type and need not be a CU. The only thing that matters */
369*c83a76b0SSuyog Pawar                     /* here is the width of the blk, 4/8/(>=16)                      */
370*c83a76b0SSuyog Pawar                     /*****************************************************************/
371*c83a76b0SSuyog Pawar                     ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms);
372*c83a76b0SSuyog Pawar 
373*c83a76b0SSuyog Pawar                     hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
374*c83a76b0SSuyog Pawar                 }
375*c83a76b0SSuyog Pawar                 else
376*c83a76b0SSuyog Pawar                 {
377*c83a76b0SSuyog Pawar                     /* Obtain SAD for all 9 pts in grid*/
378*c83a76b0SSuyog Pawar                     hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size);
379*c83a76b0SSuyog Pawar                 }
380*c83a76b0SSuyog Pawar 
381*c83a76b0SSuyog Pawar                 /* Early exit in case of centre being local minima */
382*c83a76b0SSuyog Pawar                 i4_min_id = s_result_prms.i4_min_id;
383*c83a76b0SSuyog Pawar             }
384*c83a76b0SSuyog Pawar 
385*c83a76b0SSuyog Pawar             i4_grid_mask = gai4_opt_grid_mask[i4_min_id];
386*c83a76b0SSuyog Pawar 
387*c83a76b0SSuyog Pawar             s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]);
388*c83a76b0SSuyog Pawar             s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]);
389*c83a76b0SSuyog Pawar             if(i4_min_id == (S32)PT_C)
390*c83a76b0SSuyog Pawar                 break;
391*c83a76b0SSuyog Pawar         }
392*c83a76b0SSuyog Pawar 
393*c83a76b0SSuyog Pawar         /* Next keep reducing stepsize by factor of 2 */
394*c83a76b0SSuyog Pawar         i4_step >>= 1;
395*c83a76b0SSuyog Pawar         while(i4_step)
396*c83a76b0SSuyog Pawar         {
397*c83a76b0SSuyog Pawar             i4_grid_mask = 0x1fe &
398*c83a76b0SSuyog Pawar                            hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms);
399*c83a76b0SSuyog Pawar             //i4_grid_mask &= 0x1fe;
400*c83a76b0SSuyog Pawar 
401*c83a76b0SSuyog Pawar             s_err_prms.i4_grid_mask = i4_grid_mask;
402*c83a76b0SSuyog Pawar             s_result_prms.i4_grid_mask = i4_grid_mask;
403*c83a76b0SSuyog Pawar             s_err_prms.i4_step = i4_step;
404*c83a76b0SSuyog Pawar             s_result_prms.i4_step = i4_step;
405*c83a76b0SSuyog Pawar             s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset;
406*c83a76b0SSuyog Pawar             s_err_prms.pu1_ref +=
407*c83a76b0SSuyog Pawar                 (s_search_node.s_mv.i2_mvx +
408*c83a76b0SSuyog Pawar                  (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride));
409*c83a76b0SSuyog Pawar             if(ME_XTREME_SPEED_25 == e_me_quality_preset)
410*c83a76b0SSuyog Pawar             {
411*c83a76b0SSuyog Pawar                 err_prms_t *ps_err_prms = &s_err_prms;
412*c83a76b0SSuyog Pawar                 ASSERT(ps_err_prms->i4_grid_mask != 1);
413*c83a76b0SSuyog Pawar                 ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16));
414*c83a76b0SSuyog Pawar 
415*c83a76b0SSuyog Pawar                 /*****************************************************************/
416*c83a76b0SSuyog Pawar                 /* In this case, there are no partial updates. The blk can be    */
417*c83a76b0SSuyog Pawar                 /* of any type and need not be a CU. The only thing that matters */
418*c83a76b0SSuyog Pawar                 /* here is the width of the blk, 4/8/(>=16)                      */
419*c83a76b0SSuyog Pawar                 /*****************************************************************/
420*c83a76b0SSuyog Pawar                 ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms);
421*c83a76b0SSuyog Pawar 
422*c83a76b0SSuyog Pawar                 hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
423*c83a76b0SSuyog Pawar             }
424*c83a76b0SSuyog Pawar             else
425*c83a76b0SSuyog Pawar             {
426*c83a76b0SSuyog Pawar                 hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size);
427*c83a76b0SSuyog Pawar             }
428*c83a76b0SSuyog Pawar 
429*c83a76b0SSuyog Pawar             i4_min_id = s_result_prms.i4_min_id;
430*c83a76b0SSuyog Pawar 
431*c83a76b0SSuyog Pawar             s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]);
432*c83a76b0SSuyog Pawar             s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]);
433*c83a76b0SSuyog Pawar 
434*c83a76b0SSuyog Pawar             i4_step >>= 1;
435*c83a76b0SSuyog Pawar         }
436*c83a76b0SSuyog Pawar 
437*c83a76b0SSuyog Pawar         ps_search_candts++;
438*c83a76b0SSuyog Pawar     }
439*c83a76b0SSuyog Pawar }
440*c83a76b0SSuyog Pawar 
441*c83a76b0SSuyog Pawar /**
442*c83a76b0SSuyog Pawar ********************************************************************************
443*c83a76b0SSuyog Pawar *  @fn     hme_pred_search_square_step1(hme_search_prms_t *ps_search_prms,
444*c83a76b0SSuyog Pawar *                               layer_ctxt_t *ps_layer_ctxt)
445*c83a76b0SSuyog Pawar *
446*c83a76b0SSuyog Pawar *  @brief  Implements predictive search with square grid refinement. In this
447*c83a76b0SSuyog Pawar *           case, the square grid is of step 1 always. since this is considered
448*c83a76b0SSuyog Pawar *           to be more of a refinement search
449*c83a76b0SSuyog Pawar *
450*c83a76b0SSuyog Pawar *  @param[in,out]  ps_search_prms: All the params to this function
451*c83a76b0SSuyog Pawar *
452*c83a76b0SSuyog Pawar *  @param[in] ps_layer_ctxt: All info about this layer
453*c83a76b0SSuyog Pawar *
454*c83a76b0SSuyog Pawar *  @return None
455*c83a76b0SSuyog Pawar ********************************************************************************
456*c83a76b0SSuyog Pawar */
457*c83a76b0SSuyog Pawar /**
458*c83a76b0SSuyog Pawar ********************************************************************************
459*c83a76b0SSuyog Pawar *  @fn     hme_pred_search(hme_search_prms_t *ps_search_prms,
460*c83a76b0SSuyog Pawar *                               layer_ctxt_t *ps_layer_ctxt)
461*c83a76b0SSuyog Pawar *
462*c83a76b0SSuyog Pawar *  @brief  Implements predictive search after removing duplicate candidates
463*c83a76b0SSuyog Pawar *          from initial list. Each square grid (of step 1) is expanded
464*c83a76b0SSuyog Pawar *          to nine search pts before the dedeuplication process. one point
465*c83a76b0SSuyog Pawar *          cost is then evaluated for each unique node after the deduplication
466*c83a76b0SSuyog Pawar *          process
467*c83a76b0SSuyog Pawar *
468*c83a76b0SSuyog Pawar *  @param[in,out]  ps_search_prms: All the params to this function
469*c83a76b0SSuyog Pawar *
470*c83a76b0SSuyog Pawar *  @param[in] ps_layer_ctxt: All info about this layer
471*c83a76b0SSuyog Pawar *
472*c83a76b0SSuyog Pawar *  @return None
473*c83a76b0SSuyog Pawar ********************************************************************************
474*c83a76b0SSuyog Pawar */
hme_pred_search(hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,wgt_pred_ctxt_t * ps_wt_inp_prms,S08 i1_grid_flag,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)475*c83a76b0SSuyog Pawar void hme_pred_search(
476*c83a76b0SSuyog Pawar     hme_search_prms_t *ps_search_prms,
477*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_layer_ctxt,
478*c83a76b0SSuyog Pawar     wgt_pred_ctxt_t *ps_wt_inp_prms,
479*c83a76b0SSuyog Pawar     S08 i1_grid_flag,
480*c83a76b0SSuyog Pawar     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
481*c83a76b0SSuyog Pawar 
482*c83a76b0SSuyog Pawar )
483*c83a76b0SSuyog Pawar {
484*c83a76b0SSuyog Pawar     /* Stores the SAD for all parts at each pt in the grid */
485*c83a76b0SSuyog Pawar     S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
486*c83a76b0SSuyog Pawar 
487*c83a76b0SSuyog Pawar     /* Atributes of input candidates */
488*c83a76b0SSuyog Pawar     search_node_t *ps_search_node;
489*c83a76b0SSuyog Pawar 
490*c83a76b0SSuyog Pawar     search_results_t *ps_search_results;
491*c83a76b0SSuyog Pawar     S32 i4_num_nodes, i4_candt;
492*c83a76b0SSuyog Pawar 
493*c83a76b0SSuyog Pawar     /* Input and reference attributes */
494*c83a76b0SSuyog Pawar     S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
495*c83a76b0SSuyog Pawar 
496*c83a76b0SSuyog Pawar     /* The reference is actually an array of ptrs since there are several    */
497*c83a76b0SSuyog Pawar     /* reference id. So an array gets passed form calling function           */
498*c83a76b0SSuyog Pawar     U08 **ppu1_ref;
499*c83a76b0SSuyog Pawar 
500*c83a76b0SSuyog Pawar     /* These control number of parts and number of pts in grid to search */
501*c83a76b0SSuyog Pawar     S32 i4_part_mask, i4_grid_mask;
502*c83a76b0SSuyog Pawar 
503*c83a76b0SSuyog Pawar     S32 shift_for_cu_size;
504*c83a76b0SSuyog Pawar 
505*c83a76b0SSuyog Pawar     /* Blk width, blk height and blk size are derived from input params */
506*c83a76b0SSuyog Pawar     BLK_SIZE_T e_blk_size;
507*c83a76b0SSuyog Pawar     CU_SIZE_T e_cu_size;
508*c83a76b0SSuyog Pawar     S32 i4_blk_wd, i4_blk_ht;
509*c83a76b0SSuyog Pawar 
510*c83a76b0SSuyog Pawar     /*************************************************************************/
511*c83a76b0SSuyog Pawar     /* These functions pointers for calculating Err and the result update    */
512*c83a76b0SSuyog Pawar     /* Each carries its own parameters structure, which is generated on the  */
513*c83a76b0SSuyog Pawar     /* fly in this function                                                  */
514*c83a76b0SSuyog Pawar     /*************************************************************************/
515*c83a76b0SSuyog Pawar     PF_RESULT_FXN_T pf_hme_result_fxn;
516*c83a76b0SSuyog Pawar     PF_SAD_FXN_T pf_sad_fxn;
517*c83a76b0SSuyog Pawar     PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result;
518*c83a76b0SSuyog Pawar     err_prms_t s_err_prms;
519*c83a76b0SSuyog Pawar     result_upd_prms_t s_result_prms;
520*c83a76b0SSuyog Pawar     S32 i4_num_results;
521*c83a76b0SSuyog Pawar     S32 i4_inp_off;
522*c83a76b0SSuyog Pawar     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
523*c83a76b0SSuyog Pawar 
524*c83a76b0SSuyog Pawar     i4_inp_stride = ps_search_prms->i4_inp_stride;
525*c83a76b0SSuyog Pawar 
526*c83a76b0SSuyog Pawar     /* Move to the location of the search blk in inp buffer */
527*c83a76b0SSuyog Pawar     i4_inp_off = ps_search_prms->i4_cu_x_off;
528*c83a76b0SSuyog Pawar     i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride;
529*c83a76b0SSuyog Pawar 
530*c83a76b0SSuyog Pawar     /*************************************************************************/
531*c83a76b0SSuyog Pawar     /* Depending on flag i4_use_rec, we use either input of previously       */
532*c83a76b0SSuyog Pawar     /* encoded pictures or we use recon of previously encoded pictures.      */
533*c83a76b0SSuyog Pawar     /*************************************************************************/
534*c83a76b0SSuyog Pawar     if(ps_search_prms->i4_use_rec == 1)
535*c83a76b0SSuyog Pawar     {
536*c83a76b0SSuyog Pawar         i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
537*c83a76b0SSuyog Pawar         ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
538*c83a76b0SSuyog Pawar     }
539*c83a76b0SSuyog Pawar     else
540*c83a76b0SSuyog Pawar     {
541*c83a76b0SSuyog Pawar         i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
542*c83a76b0SSuyog Pawar         ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
543*c83a76b0SSuyog Pawar     }
544*c83a76b0SSuyog Pawar     i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
545*c83a76b0SSuyog Pawar     /* Obtain the blk size of the search blk. Assumed here that the search   */
546*c83a76b0SSuyog Pawar     /* is done on a CU size, rather than any arbitrary blk size.             */
547*c83a76b0SSuyog Pawar     ps_search_results = ps_search_prms->ps_search_results;
548*c83a76b0SSuyog Pawar     e_blk_size = ps_search_prms->e_blk_size;
549*c83a76b0SSuyog Pawar     i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
550*c83a76b0SSuyog Pawar     i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
551*c83a76b0SSuyog Pawar     e_cu_size = ps_search_results->e_cu_size;
552*c83a76b0SSuyog Pawar 
553*c83a76b0SSuyog Pawar     /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */
554*c83a76b0SSuyog Pawar     /* This will also set the shift w.r.t. the base cu size of 8x8 */
555*c83a76b0SSuyog Pawar     shift_for_cu_size = e_cu_size;
556*c83a76b0SSuyog Pawar 
557*c83a76b0SSuyog Pawar     ps_search_node = ps_search_prms->ps_search_nodes;
558*c83a76b0SSuyog Pawar     i4_num_nodes = ps_search_prms->i4_num_search_nodes;
559*c83a76b0SSuyog Pawar     i4_part_mask = ps_search_prms->i4_part_mask;
560*c83a76b0SSuyog Pawar 
561*c83a76b0SSuyog Pawar     /* Update the parameters used to pass to SAD */
562*c83a76b0SSuyog Pawar     /* input ptr, strides, SAD Grid, part mask, blk width and ht */
563*c83a76b0SSuyog Pawar     /* The above are fixed ptrs, only pu1_ref and grid mask are  */
564*c83a76b0SSuyog Pawar     /* varying params which are updated just before calling fxn  */
565*c83a76b0SSuyog Pawar     s_err_prms.i4_inp_stride = i4_inp_stride;
566*c83a76b0SSuyog Pawar     s_err_prms.i4_ref_stride = i4_ref_stride;
567*c83a76b0SSuyog Pawar     s_err_prms.i4_part_mask = i4_part_mask;
568*c83a76b0SSuyog Pawar     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
569*c83a76b0SSuyog Pawar     s_err_prms.i4_blk_wd = i4_blk_wd;
570*c83a76b0SSuyog Pawar     s_err_prms.i4_blk_ht = i4_blk_ht;
571*c83a76b0SSuyog Pawar     s_err_prms.i4_step = 1;
572*c83a76b0SSuyog Pawar     s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts;
573*c83a76b0SSuyog Pawar 
574*c83a76b0SSuyog Pawar     s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
575*c83a76b0SSuyog Pawar     s_result_prms.ps_search_results = ps_search_results;
576*c83a76b0SSuyog Pawar     s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx;
577*c83a76b0SSuyog Pawar     s_result_prms.pi4_sad_grid = ai4_sad_grid;
578*c83a76b0SSuyog Pawar     s_result_prms.i4_part_mask = i4_part_mask;
579*c83a76b0SSuyog Pawar     s_result_prms.i4_step = 1;
580*c83a76b0SSuyog Pawar     pf_calc_sad_and_result = hme_get_calc_sad_and_result_fxn(
581*c83a76b0SSuyog Pawar         i1_grid_flag,
582*c83a76b0SSuyog Pawar         ps_search_prms->u1_is_cu_noisy,
583*c83a76b0SSuyog Pawar         i4_part_mask,
584*c83a76b0SSuyog Pawar         ps_fullpel_refine_ctxt->i4_num_valid_parts,
585*c83a76b0SSuyog Pawar         ps_search_results->u1_num_results_per_part);
586*c83a76b0SSuyog Pawar 
587*c83a76b0SSuyog Pawar     pf_calc_sad_and_result(
588*c83a76b0SSuyog Pawar         ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride);
589*c83a76b0SSuyog Pawar }
590*c83a76b0SSuyog Pawar 
hme_get_calc_sad_and_result_explicit_fxn(ihevce_me_optimised_function_list_t * ps_me_optimised_function_list,S32 i4_part_mask,S32 i4_num_partitions,S08 i1_grid_enable,U08 u1_num_results_per_part)591*c83a76b0SSuyog Pawar static __inline FT_CALC_SAD_AND_RESULT *hme_get_calc_sad_and_result_explicit_fxn(
592*c83a76b0SSuyog Pawar     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list,
593*c83a76b0SSuyog Pawar     S32 i4_part_mask,
594*c83a76b0SSuyog Pawar     S32 i4_num_partitions,
595*c83a76b0SSuyog Pawar     S08 i1_grid_enable,
596*c83a76b0SSuyog Pawar     U08 u1_num_results_per_part)
597*c83a76b0SSuyog Pawar {
598*c83a76b0SSuyog Pawar     FT_CALC_SAD_AND_RESULT *pf_func = NULL;
599*c83a76b0SSuyog Pawar 
600*c83a76b0SSuyog Pawar     if(2 == u1_num_results_per_part)
601*c83a76b0SSuyog Pawar     {
602*c83a76b0SSuyog Pawar         if(i4_part_mask == 1)
603*c83a76b0SSuyog Pawar         {
604*c83a76b0SSuyog Pawar             ASSERT(i4_num_partitions == 1);
605*c83a76b0SSuyog Pawar 
606*c83a76b0SSuyog Pawar             if(i1_grid_enable == 0)
607*c83a76b0SSuyog Pawar             {
608*c83a76b0SSuyog Pawar                 pf_func =
609*c83a76b0SSuyog Pawar                     ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8;
610*c83a76b0SSuyog Pawar             }
611*c83a76b0SSuyog Pawar             else
612*c83a76b0SSuyog Pawar             {
613*c83a76b0SSuyog Pawar                 pf_func = ps_me_optimised_function_list
614*c83a76b0SSuyog Pawar                               ->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid;
615*c83a76b0SSuyog Pawar             }
616*c83a76b0SSuyog Pawar         }
617*c83a76b0SSuyog Pawar         else
618*c83a76b0SSuyog Pawar         {
619*c83a76b0SSuyog Pawar             ASSERT(i4_num_partitions == 5);
620*c83a76b0SSuyog Pawar 
621*c83a76b0SSuyog Pawar             pf_func =
622*c83a76b0SSuyog Pawar                 ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4;
623*c83a76b0SSuyog Pawar         }
624*c83a76b0SSuyog Pawar     }
625*c83a76b0SSuyog Pawar     else if(1 == u1_num_results_per_part)
626*c83a76b0SSuyog Pawar     {
627*c83a76b0SSuyog Pawar         if(i4_part_mask == 1)
628*c83a76b0SSuyog Pawar         {
629*c83a76b0SSuyog Pawar             ASSERT(i4_num_partitions == 1);
630*c83a76b0SSuyog Pawar 
631*c83a76b0SSuyog Pawar             if(i1_grid_enable == 0)
632*c83a76b0SSuyog Pawar             {
633*c83a76b0SSuyog Pawar                 pf_func =
634*c83a76b0SSuyog Pawar                     ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8;
635*c83a76b0SSuyog Pawar             }
636*c83a76b0SSuyog Pawar             else
637*c83a76b0SSuyog Pawar             {
638*c83a76b0SSuyog Pawar                 pf_func = ps_me_optimised_function_list
639*c83a76b0SSuyog Pawar                               ->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid;
640*c83a76b0SSuyog Pawar             }
641*c83a76b0SSuyog Pawar         }
642*c83a76b0SSuyog Pawar         else
643*c83a76b0SSuyog Pawar         {
644*c83a76b0SSuyog Pawar             ASSERT(i4_num_partitions == 5);
645*c83a76b0SSuyog Pawar 
646*c83a76b0SSuyog Pawar             pf_func =
647*c83a76b0SSuyog Pawar                 ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4;
648*c83a76b0SSuyog Pawar         }
649*c83a76b0SSuyog Pawar     }
650*c83a76b0SSuyog Pawar 
651*c83a76b0SSuyog Pawar     return pf_func;
652*c83a76b0SSuyog Pawar }
653*c83a76b0SSuyog Pawar 
654*c83a76b0SSuyog Pawar /**
655*c83a76b0SSuyog Pawar ********************************************************************************
656*c83a76b0SSuyog Pawar *  @fn     void hme_pred_search_no_encode(hme_search_prms_t *ps_search_prms,
657*c83a76b0SSuyog Pawar *                                         layer_ctxt_t *ps_layer_ctxt,
658*c83a76b0SSuyog Pawar *                                         wgt_pred_ctxt_t *ps_wt_inp_prms,
659*c83a76b0SSuyog Pawar *                                         S32 *pi4_valid_part_ids,
660*c83a76b0SSuyog Pawar *                                         S32 disable_refine,
661*c83a76b0SSuyog Pawar *                                         ME_QUALITY_PRESETS_T e_me_quality_preset)
662*c83a76b0SSuyog Pawar *
663*c83a76b0SSuyog Pawar *  @brief  Implements predictive search after removing duplicate candidates
664*c83a76b0SSuyog Pawar *          from initial list. Each square grid (of step 1) is expanded
665*c83a76b0SSuyog Pawar *          to nine search pts before the dedeuplication process. one point
666*c83a76b0SSuyog Pawar *          cost is then evaluated for each unique node after the deduplication
667*c83a76b0SSuyog Pawar *          process
668*c83a76b0SSuyog Pawar *
669*c83a76b0SSuyog Pawar *  @param[in,out]  ps_search_prms: All the params to this function
670*c83a76b0SSuyog Pawar *
671*c83a76b0SSuyog Pawar *  @param[in] ps_layer_ctxt: All info about this layer
672*c83a76b0SSuyog Pawar *
673*c83a76b0SSuyog Pawar *  @return None
674*c83a76b0SSuyog Pawar ********************************************************************************
675*c83a76b0SSuyog Pawar */
hme_pred_search_no_encode(hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 * pi4_valid_part_ids,S32 disable_refine,ME_QUALITY_PRESETS_T e_me_quality_preset,S08 i1_grid_enable,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)676*c83a76b0SSuyog Pawar void hme_pred_search_no_encode(
677*c83a76b0SSuyog Pawar     hme_search_prms_t *ps_search_prms,
678*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_layer_ctxt,
679*c83a76b0SSuyog Pawar     wgt_pred_ctxt_t *ps_wt_inp_prms,
680*c83a76b0SSuyog Pawar     S32 *pi4_valid_part_ids,
681*c83a76b0SSuyog Pawar     S32 disable_refine,
682*c83a76b0SSuyog Pawar     ME_QUALITY_PRESETS_T e_me_quality_preset,
683*c83a76b0SSuyog Pawar     S08 i1_grid_enable,
684*c83a76b0SSuyog Pawar     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
685*c83a76b0SSuyog Pawar {
686*c83a76b0SSuyog Pawar     /* Stores the SAD for all parts at each pt in the grid */
687*c83a76b0SSuyog Pawar     S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
688*c83a76b0SSuyog Pawar 
689*c83a76b0SSuyog Pawar     /* Atributes of input candidates */
690*c83a76b0SSuyog Pawar     search_node_t *ps_search_node;
691*c83a76b0SSuyog Pawar     search_results_t *ps_search_results;
692*c83a76b0SSuyog Pawar     S32 i4_num_nodes;
693*c83a76b0SSuyog Pawar 
694*c83a76b0SSuyog Pawar     /* Input and reference attributes */
695*c83a76b0SSuyog Pawar     S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
696*c83a76b0SSuyog Pawar 
697*c83a76b0SSuyog Pawar     /* The reference is actually an array of ptrs since there are several    */
698*c83a76b0SSuyog Pawar     /* reference id. So an array gets passed form calling function           */
699*c83a76b0SSuyog Pawar     U08 **ppu1_ref;
700*c83a76b0SSuyog Pawar 
701*c83a76b0SSuyog Pawar     /* These control number of parts and number of pts in grid to search */
702*c83a76b0SSuyog Pawar     S32 i4_part_mask;  // i4_grid_mask;
703*c83a76b0SSuyog Pawar 
704*c83a76b0SSuyog Pawar     S32 shift_for_cu_size;
705*c83a76b0SSuyog Pawar     /* Blk width, blk height and blk size are derived from input params */
706*c83a76b0SSuyog Pawar     BLK_SIZE_T e_blk_size;
707*c83a76b0SSuyog Pawar     CU_SIZE_T e_cu_size;
708*c83a76b0SSuyog Pawar     S32 i4_blk_wd, i4_blk_ht;
709*c83a76b0SSuyog Pawar 
710*c83a76b0SSuyog Pawar     /*************************************************************************/
711*c83a76b0SSuyog Pawar     /* These functions pointers for calculating Err and the result update    */
712*c83a76b0SSuyog Pawar     /* Each carries its own parameters structure, which is generated on the  */
713*c83a76b0SSuyog Pawar     /* fly in this function                                                  */
714*c83a76b0SSuyog Pawar     /*************************************************************************/
715*c83a76b0SSuyog Pawar     PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result;
716*c83a76b0SSuyog Pawar     err_prms_t s_err_prms;
717*c83a76b0SSuyog Pawar     result_upd_prms_t s_result_prms;
718*c83a76b0SSuyog Pawar     S32 i4_num_results;
719*c83a76b0SSuyog Pawar     S32 i4_search_idx = ps_search_prms->i1_ref_idx;
720*c83a76b0SSuyog Pawar     S32 i4_inp_off;
721*c83a76b0SSuyog Pawar     S32 i4_num_partitions;
722*c83a76b0SSuyog Pawar 
723*c83a76b0SSuyog Pawar     i4_inp_stride = ps_search_prms->i4_inp_stride;
724*c83a76b0SSuyog Pawar 
725*c83a76b0SSuyog Pawar     /* Move to the location of the search blk in inp buffer */
726*c83a76b0SSuyog Pawar     i4_inp_off = ps_search_prms->i4_cu_x_off;
727*c83a76b0SSuyog Pawar     i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride;
728*c83a76b0SSuyog Pawar 
729*c83a76b0SSuyog Pawar     /*************************************************************************/
730*c83a76b0SSuyog Pawar     /* Depending on flag i4_use_rec, we use either input of previously       */
731*c83a76b0SSuyog Pawar     /* encoded pictures or we use recon of previously encoded pictures.      */
732*c83a76b0SSuyog Pawar     /*************************************************************************/
733*c83a76b0SSuyog Pawar     if(ps_search_prms->i4_use_rec == 1)
734*c83a76b0SSuyog Pawar     {
735*c83a76b0SSuyog Pawar         i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
736*c83a76b0SSuyog Pawar         ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
737*c83a76b0SSuyog Pawar     }
738*c83a76b0SSuyog Pawar     else
739*c83a76b0SSuyog Pawar     {
740*c83a76b0SSuyog Pawar         i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
741*c83a76b0SSuyog Pawar         ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
742*c83a76b0SSuyog Pawar     }
743*c83a76b0SSuyog Pawar     i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
744*c83a76b0SSuyog Pawar     /* Obtain the blk size of the search blk. Assumed here that the search   */
745*c83a76b0SSuyog Pawar     /* is done on a CU size, rather than any arbitrary blk size.             */
746*c83a76b0SSuyog Pawar     ps_search_results = ps_search_prms->ps_search_results;
747*c83a76b0SSuyog Pawar     e_blk_size = ps_search_prms->e_blk_size;
748*c83a76b0SSuyog Pawar     i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
749*c83a76b0SSuyog Pawar     i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
750*c83a76b0SSuyog Pawar     e_cu_size = ps_search_results->e_cu_size;
751*c83a76b0SSuyog Pawar 
752*c83a76b0SSuyog Pawar     /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */
753*c83a76b0SSuyog Pawar     /* This will also set the shift w.r.t. the base cu size of 8x8 */
754*c83a76b0SSuyog Pawar     shift_for_cu_size = e_cu_size;
755*c83a76b0SSuyog Pawar 
756*c83a76b0SSuyog Pawar     ps_search_node = ps_search_prms->ps_search_nodes;
757*c83a76b0SSuyog Pawar     i4_num_nodes = ps_search_prms->i4_num_search_nodes;
758*c83a76b0SSuyog Pawar     i4_part_mask = ps_search_prms->i4_part_mask;
759*c83a76b0SSuyog Pawar 
760*c83a76b0SSuyog Pawar     /*************************************************************************/
761*c83a76b0SSuyog Pawar     /* This array stores the ids of the partitions whose                     */
762*c83a76b0SSuyog Pawar     /* SADs are updated. Since the partitions whose SADs are updated may not */
763*c83a76b0SSuyog Pawar     /* be in contiguous order, we supply another level of indirection.       */
764*c83a76b0SSuyog Pawar     /*************************************************************************/
765*c83a76b0SSuyog Pawar     i4_num_partitions = hme_create_valid_part_ids(i4_part_mask, pi4_valid_part_ids);
766*c83a76b0SSuyog Pawar 
767*c83a76b0SSuyog Pawar     /* Update the parameters used to pass to SAD */
768*c83a76b0SSuyog Pawar     /* input ptr, strides, SAD Grid, part mask, blk width and ht */
769*c83a76b0SSuyog Pawar     /* The above are fixed ptrs, only pu1_ref and grid mask are  */
770*c83a76b0SSuyog Pawar     /* varying params which are updated just before calling fxn  */
771*c83a76b0SSuyog Pawar     s_err_prms.i4_inp_stride = i4_inp_stride;
772*c83a76b0SSuyog Pawar     s_err_prms.i4_ref_stride = i4_ref_stride;
773*c83a76b0SSuyog Pawar     s_err_prms.i4_part_mask = i4_part_mask;
774*c83a76b0SSuyog Pawar     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
775*c83a76b0SSuyog Pawar     s_err_prms.i4_blk_wd = i4_blk_wd;
776*c83a76b0SSuyog Pawar     s_err_prms.i4_blk_ht = i4_blk_ht;
777*c83a76b0SSuyog Pawar     s_err_prms.i4_step = 1;
778*c83a76b0SSuyog Pawar     s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids;
779*c83a76b0SSuyog Pawar     s_err_prms.i4_num_partitions = i4_num_partitions;
780*c83a76b0SSuyog Pawar 
781*c83a76b0SSuyog Pawar     s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
782*c83a76b0SSuyog Pawar     s_result_prms.ps_search_results = ps_search_results;
783*c83a76b0SSuyog Pawar     s_result_prms.pi4_valid_part_ids = pi4_valid_part_ids;
784*c83a76b0SSuyog Pawar     s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx;
785*c83a76b0SSuyog Pawar     s_result_prms.pi4_sad_grid = ai4_sad_grid;
786*c83a76b0SSuyog Pawar     s_result_prms.i4_part_mask = i4_part_mask;
787*c83a76b0SSuyog Pawar     s_result_prms.i4_step = 1;
788*c83a76b0SSuyog Pawar 
789*c83a76b0SSuyog Pawar     pf_calc_sad_and_result = hme_get_calc_sad_and_result_explicit_fxn(
790*c83a76b0SSuyog Pawar         ps_me_optimised_function_list,
791*c83a76b0SSuyog Pawar         i4_part_mask,
792*c83a76b0SSuyog Pawar         i4_num_partitions,
793*c83a76b0SSuyog Pawar         i1_grid_enable,
794*c83a76b0SSuyog Pawar         ps_search_results->u1_num_results_per_part);
795*c83a76b0SSuyog Pawar 
796*c83a76b0SSuyog Pawar     pf_calc_sad_and_result(
797*c83a76b0SSuyog Pawar         ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride);
798*c83a76b0SSuyog Pawar }
799