1*c83a76b0SSuyog Pawar /****************************************************************************** 2*c83a76b0SSuyog Pawar * 3*c83a76b0SSuyog Pawar * Copyright (C) 2018 The Android Open Source Project 4*c83a76b0SSuyog Pawar * 5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License"); 6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License. 7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at: 8*c83a76b0SSuyog Pawar * 9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0 10*c83a76b0SSuyog Pawar * 11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software 12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS, 13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and 15*c83a76b0SSuyog Pawar * limitations under the License. 16*c83a76b0SSuyog Pawar * 17*c83a76b0SSuyog Pawar ***************************************************************************** 18*c83a76b0SSuyog Pawar * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19*c83a76b0SSuyog Pawar */ 20*c83a76b0SSuyog Pawar /*! 21*c83a76b0SSuyog Pawar ****************************************************************************** 22*c83a76b0SSuyog Pawar * \file hme_err_compute.h 23*c83a76b0SSuyog Pawar * 24*c83a76b0SSuyog Pawar * \brief 25*c83a76b0SSuyog Pawar * contains prototypes for functions that compute error or best results or 26*c83a76b0SSuyog Pawar * return fxn ptrs for the same. 27*c83a76b0SSuyog Pawar * 28*c83a76b0SSuyog Pawar * \date 29*c83a76b0SSuyog Pawar * 18/09/2012 30*c83a76b0SSuyog Pawar * 31*c83a76b0SSuyog Pawar * \author 32*c83a76b0SSuyog Pawar * Ittiam 33*c83a76b0SSuyog Pawar * 34*c83a76b0SSuyog Pawar ****************************************************************************** 35*c83a76b0SSuyog Pawar */ 36*c83a76b0SSuyog Pawar 37*c83a76b0SSuyog Pawar #ifndef _HME_ERR_COMPUTE_H_ 38*c83a76b0SSuyog Pawar #define _HME_ERR_COMPUTE_H_ 39*c83a76b0SSuyog Pawar 40*c83a76b0SSuyog Pawar /*****************************************************************************/ 41*c83a76b0SSuyog Pawar /* Constant Macros */ 42*c83a76b0SSuyog Pawar /*****************************************************************************/ 43*c83a76b0SSuyog Pawar #define NUM_4X4 16 44*c83a76b0SSuyog Pawar #define NUM_4X4_IN_8x8 4 45*c83a76b0SSuyog Pawar #define NUM_4X4_IN_16x16 16 46*c83a76b0SSuyog Pawar #define NUM_8X8_IN_16x16 4 47*c83a76b0SSuyog Pawar #define NUM_8X8_IN_32x32 16 48*c83a76b0SSuyog Pawar #define NUM_8X8_IN_64x64 64 49*c83a76b0SSuyog Pawar #define NUM_16X16_IN_64x64 16 50*c83a76b0SSuyog Pawar #define NUM_ROWS_IN_4X4 4 51*c83a76b0SSuyog Pawar #define NUM_PIXELS_IN_ROW 4 52*c83a76b0SSuyog Pawar #define NUM_CANDIDATES_IN_GRID 9 53*c83a76b0SSuyog Pawar 54*c83a76b0SSuyog Pawar // 0 => best + good; 55*c83a76b0SSuyog Pawar // 1 => 1st and 2nd best; 56*c83a76b0SSuyog Pawar // good => worse or equal to second best 57*c83a76b0SSuyog Pawar #define BESTEST 0 58*c83a76b0SSuyog Pawar 59*c83a76b0SSuyog Pawar #define COST(a, b, c, d, e) (a) 60*c83a76b0SSuyog Pawar 61*c83a76b0SSuyog Pawar /*****************************************************************************/ 62*c83a76b0SSuyog Pawar /* Functions */ 63*c83a76b0SSuyog Pawar /*****************************************************************************/ 64*c83a76b0SSuyog Pawar void hme_evalsad_pt_npu_MxN_16bit(err_prms_t *ps_prms); 65*c83a76b0SSuyog Pawar 66*c83a76b0SSuyog Pawar #define compute_sad_16bit hme_evalsad_pt_npu_MxN_16bit 67*c83a76b0SSuyog Pawar 68*c83a76b0SSuyog Pawar /** 69*c83a76b0SSuyog Pawar ******************************************************************************** 70*c83a76b0SSuyog Pawar * @fn S32 hme_update_results_grid_pu_bestn(result_upd_prms_t *ps_result_prms); 71*c83a76b0SSuyog Pawar * 72*c83a76b0SSuyog Pawar * @brief Updates the best N results based on a grid SAD for enabled partitions 73*c83a76b0SSuyog Pawar * 74*c83a76b0SSuyog Pawar * @param[in,out] ps_result_prms : contains parametrs pertaining to the results 75*c83a76b0SSuyog Pawar * 76*c83a76b0SSuyog Pawar * @return None 77*c83a76b0SSuyog Pawar ******************************************************************************** 78*c83a76b0SSuyog Pawar */ 79*c83a76b0SSuyog Pawar void hme_update_results_grid_pu_bestn(result_upd_prms_t *ps_result_prms); 80*c83a76b0SSuyog Pawar 81*c83a76b0SSuyog Pawar void hme_update_results_grid_pu_bestn_xtreme_speed(result_upd_prms_t *ps_result_prms); 82*c83a76b0SSuyog Pawar 83*c83a76b0SSuyog Pawar /** 84*c83a76b0SSuyog Pawar ******************************************************************************** 85*c83a76b0SSuyog Pawar * @fn hme_update_results_grid_pu_bestn_no_encode(result_upd_prms_t *ps_result_prms) 86*c83a76b0SSuyog Pawar * 87*c83a76b0SSuyog Pawar * @brief Updates results for the case where 1 best result is to be updated 88*c83a76b0SSuyog Pawar * for a given pt, for several parts 89*c83a76b0SSuyog Pawar * Note : The function is replicated for CLIPing the cost to 16bit to make 90*c83a76b0SSuyog Pawar * bit match with SIMD version 91*c83a76b0SSuyog Pawar * 92*c83a76b0SSuyog Pawar * @param[in] result_upd_prms_t : Contains the input parameters to this fxn 93*c83a76b0SSuyog Pawar * 94*c83a76b0SSuyog Pawar * @return The result_upd_prms_t structure is updated for all the active 95*c83a76b0SSuyog Pawar * parts in case the current candt has results for any given part 96*c83a76b0SSuyog Pawar * that is the best result for that part 97*c83a76b0SSuyog Pawar ******************************************************************************** 98*c83a76b0SSuyog Pawar */ 99*c83a76b0SSuyog Pawar void hme_update_results_grid_pu_bestn_no_encode(result_upd_prms_t *ps_result_prms); 100*c83a76b0SSuyog Pawar 101*c83a76b0SSuyog Pawar /** 102*c83a76b0SSuyog Pawar ******************************************************************************** 103*c83a76b0SSuyog Pawar * @fn hme_get_result_fxn(i4_grid_mask, i4_part_mask, i4_num_results) 104*c83a76b0SSuyog Pawar * 105*c83a76b0SSuyog Pawar * @brief Implements predictive search with square grid refinement. In this 106*c83a76b0SSuyog Pawar * case, the square grid is of step 1 always. since this is considered 107*c83a76b0SSuyog Pawar * to be more of a refinement search 108*c83a76b0SSuyog Pawar * 109*c83a76b0SSuyog Pawar * @param[in] i4_grid_mask : Mask containing which of 9 grid pts active 110*c83a76b0SSuyog Pawar * 111*c83a76b0SSuyog Pawar * @param[in] i4_part_mask : Mask containing which of the 17 parts active 112*c83a76b0SSuyog Pawar * 113*c83a76b0SSuyog Pawar * @param[in] i4_num_results: Number of active results 114*c83a76b0SSuyog Pawar * 115*c83a76b0SSuyog Pawar * @return Pointer to the appropriate result update function 116*c83a76b0SSuyog Pawar * (type PF_RESULT_FXN_T) 117*c83a76b0SSuyog Pawar ******************************************************************************** 118*c83a76b0SSuyog Pawar */ 119*c83a76b0SSuyog Pawar PF_RESULT_FXN_T hme_get_result_fxn(S32 i4_grid_mask, S32 i4_part_mask, S32 i4_num_results); 120*c83a76b0SSuyog Pawar 121*c83a76b0SSuyog Pawar void compute_satd_16bit(err_prms_t *ps_prms); 122*c83a76b0SSuyog Pawar 123*c83a76b0SSuyog Pawar void compute_satd_8bit(err_prms_t *ps_prms); 124*c83a76b0SSuyog Pawar 125*c83a76b0SSuyog Pawar void compute_sad_16bit(err_prms_t *ps_prms); 126*c83a76b0SSuyog Pawar 127*c83a76b0SSuyog Pawar S32 compute_mv_cost(search_node_t *ps_search_node, pred_ctxt_t *ps_pred_ctxt, BLK_SIZE_T e_blk_size); 128*c83a76b0SSuyog Pawar 129*c83a76b0SSuyog Pawar void hme_init_pred_ctxt_no_encode( 130*c83a76b0SSuyog Pawar pred_ctxt_t *ps_pred_ctxt, 131*c83a76b0SSuyog Pawar search_results_t *ps_search_results, 132*c83a76b0SSuyog Pawar search_node_t *ps_top_candts, 133*c83a76b0SSuyog Pawar search_node_t *ps_left_candts, 134*c83a76b0SSuyog Pawar search_node_t **pps_proj_coloc_candts, 135*c83a76b0SSuyog Pawar search_node_t *ps_coloc_candts, 136*c83a76b0SSuyog Pawar search_node_t *ps_zeromv_candt, 137*c83a76b0SSuyog Pawar S32 pred_lx, 138*c83a76b0SSuyog Pawar S32 lambda, 139*c83a76b0SSuyog Pawar S32 lambda_q_shift, 140*c83a76b0SSuyog Pawar U08 **ppu1_ref_bits_tlu, 141*c83a76b0SSuyog Pawar S16 *pi2_ref_scf); 142*c83a76b0SSuyog Pawar 143*c83a76b0SSuyog Pawar void hme_init_pred_ctxt_encode( 144*c83a76b0SSuyog Pawar pred_ctxt_t *ps_pred_ctxt, 145*c83a76b0SSuyog Pawar search_results_t *ps_search_results, 146*c83a76b0SSuyog Pawar search_node_t *ps_coloc_candts, 147*c83a76b0SSuyog Pawar search_node_t *ps_zeromv_candt, 148*c83a76b0SSuyog Pawar mv_grid_t *ps_mv_grid, 149*c83a76b0SSuyog Pawar S32 pred_lx, 150*c83a76b0SSuyog Pawar S32 lambda, 151*c83a76b0SSuyog Pawar S32 lambda_q_shift, 152*c83a76b0SSuyog Pawar U08 **ppu1_ref_bits_tlu, 153*c83a76b0SSuyog Pawar S16 *pi2_ref_scf); 154*c83a76b0SSuyog Pawar 155*c83a76b0SSuyog Pawar /** 156*c83a76b0SSuyog Pawar ******************************************************************************** 157*c83a76b0SSuyog Pawar * @fn compute_mv_cost_coarse(search_node_t *ps_node, 158*c83a76b0SSuyog Pawar * pred_ctxt_t *ps_pred_ctxt, 159*c83a76b0SSuyog Pawar * PART_ID_T e_part_id) 160*c83a76b0SSuyog Pawar * 161*c83a76b0SSuyog Pawar * @brief MV cost for coarse explicit search in coarsest layer 162*c83a76b0SSuyog Pawar * 163*c83a76b0SSuyog Pawar * @param[in] ps_node: search node having mv and ref id for which to eval cost 164*c83a76b0SSuyog Pawar * 165*c83a76b0SSuyog Pawar * @param[in] ps_pred_ctxt : mv pred context 166*c83a76b0SSuyog Pawar * 167*c83a76b0SSuyog Pawar * @param[in] e_part_id : Partition id. 168*c83a76b0SSuyog Pawar * 169*c83a76b0SSuyog Pawar * @return Cost value 170*c83a76b0SSuyog Pawar 171*c83a76b0SSuyog Pawar ******************************************************************************** 172*c83a76b0SSuyog Pawar */ 173*c83a76b0SSuyog Pawar S32 compute_mv_cost_coarse( 174*c83a76b0SSuyog Pawar search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel); 175*c83a76b0SSuyog Pawar 176*c83a76b0SSuyog Pawar /** 177*c83a76b0SSuyog Pawar ******************************************************************************** 178*c83a76b0SSuyog Pawar * @fn compute_mv_cost_coarse(search_node_t *ps_node, 179*c83a76b0SSuyog Pawar * pred_ctxt_t *ps_pred_ctxt, 180*c83a76b0SSuyog Pawar * PART_ID_T e_part_id) 181*c83a76b0SSuyog Pawar * 182*c83a76b0SSuyog Pawar * @brief MV cost for coarse explicit search in coarsest layer 183*c83a76b0SSuyog Pawar * 184*c83a76b0SSuyog Pawar * @param[in] ps_node: search node having mv and ref id for which to eval cost 185*c83a76b0SSuyog Pawar * 186*c83a76b0SSuyog Pawar * @param[in] ps_pred_ctxt : mv pred context 187*c83a76b0SSuyog Pawar * 188*c83a76b0SSuyog Pawar * @param[in] e_part_id : Partition id. 189*c83a76b0SSuyog Pawar * 190*c83a76b0SSuyog Pawar * @return Cost value 191*c83a76b0SSuyog Pawar 192*c83a76b0SSuyog Pawar ******************************************************************************** 193*c83a76b0SSuyog Pawar */ 194*c83a76b0SSuyog Pawar S32 compute_mv_cost_coarse_high_speed( 195*c83a76b0SSuyog Pawar search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel); 196*c83a76b0SSuyog Pawar 197*c83a76b0SSuyog Pawar /** 198*c83a76b0SSuyog Pawar ******************************************************************************** 199*c83a76b0SSuyog Pawar * @fn compute_mv_cost_coarse(search_node_t *ps_node, 200*c83a76b0SSuyog Pawar * pred_ctxt_t *ps_pred_ctxt, 201*c83a76b0SSuyog Pawar * PART_ID_T e_part_id) 202*c83a76b0SSuyog Pawar * 203*c83a76b0SSuyog Pawar * @brief MV cost for coarse explicit search in coarsest layer 204*c83a76b0SSuyog Pawar * 205*c83a76b0SSuyog Pawar * @param[in] ps_node: search node having mv and ref id for which to eval cost 206*c83a76b0SSuyog Pawar * 207*c83a76b0SSuyog Pawar * @param[in] ps_pred_ctxt : mv pred context 208*c83a76b0SSuyog Pawar * 209*c83a76b0SSuyog Pawar * @param[in] e_part_id : Partition id. 210*c83a76b0SSuyog Pawar * 211*c83a76b0SSuyog Pawar * @return Cost value 212*c83a76b0SSuyog Pawar 213*c83a76b0SSuyog Pawar ******************************************************************************** 214*c83a76b0SSuyog Pawar */ 215*c83a76b0SSuyog Pawar S32 compute_mv_cost_refine( 216*c83a76b0SSuyog Pawar search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel); 217*c83a76b0SSuyog Pawar 218*c83a76b0SSuyog Pawar /** 219*c83a76b0SSuyog Pawar ******************************************************************************** 220*c83a76b0SSuyog Pawar * @fn compute_mv_cost_explicit(search_node_t *ps_node, 221*c83a76b0SSuyog Pawar * pred_ctxt_t *ps_pred_ctxt, 222*c83a76b0SSuyog Pawar * PART_ID_T e_part_id) 223*c83a76b0SSuyog Pawar * 224*c83a76b0SSuyog Pawar * @brief MV cost for explicit search in layers not encoded 225*c83a76b0SSuyog Pawar * 226*c83a76b0SSuyog Pawar * @param[in] ps_node: search node having mv and ref id for which to eval cost 227*c83a76b0SSuyog Pawar * 228*c83a76b0SSuyog Pawar * @param[in] ps_pred_ctxt : mv pred context 229*c83a76b0SSuyog Pawar * 230*c83a76b0SSuyog Pawar * @param[in] e_part_id : Partition id. 231*c83a76b0SSuyog Pawar * 232*c83a76b0SSuyog Pawar * @return Cost value 233*c83a76b0SSuyog Pawar 234*c83a76b0SSuyog Pawar ******************************************************************************** 235*c83a76b0SSuyog Pawar */ 236*c83a76b0SSuyog Pawar S32 compute_mv_cost_explicit( 237*c83a76b0SSuyog Pawar search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel); 238*c83a76b0SSuyog Pawar 239*c83a76b0SSuyog Pawar S32 compute_mv_cost_implicit( 240*c83a76b0SSuyog Pawar search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel); 241*c83a76b0SSuyog Pawar 242*c83a76b0SSuyog Pawar S32 compute_mv_cost_implicit_high_speed( 243*c83a76b0SSuyog Pawar search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel); 244*c83a76b0SSuyog Pawar 245*c83a76b0SSuyog Pawar S32 compute_mv_cost_implicit_high_speed_modified( 246*c83a76b0SSuyog Pawar search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel); 247*c83a76b0SSuyog Pawar 248*c83a76b0SSuyog Pawar void hme_evalsad_grid_pu_16x16(err_prms_t *ps_prms); 249*c83a76b0SSuyog Pawar 250*c83a76b0SSuyog Pawar void hme_evalsatd_pt_pu_8x8(err_prms_t *ps_prms); 251*c83a76b0SSuyog Pawar 252*c83a76b0SSuyog Pawar WORD32 hme_evalsatd_pt_pu_8x8_tu_rec( 253*c83a76b0SSuyog Pawar err_prms_t *ps_prms, 254*c83a76b0SSuyog Pawar WORD32 lambda, 255*c83a76b0SSuyog Pawar WORD32 lambda_q_shift, 256*c83a76b0SSuyog Pawar WORD32 i4_frm_qstep, 257*c83a76b0SSuyog Pawar me_func_selector_t *ps_func_selector); 258*c83a76b0SSuyog Pawar 259*c83a76b0SSuyog Pawar void hme_evalsatd_update_1_best_result_pt_pu_16x16( 260*c83a76b0SSuyog Pawar err_prms_t *ps_prms, result_upd_prms_t *ps_result_prms); 261*c83a76b0SSuyog Pawar 262*c83a76b0SSuyog Pawar WORD32 hme_evalsatd_pt_pu_32x32_tu_rec( 263*c83a76b0SSuyog Pawar err_prms_t *ps_prms, 264*c83a76b0SSuyog Pawar WORD32 lambda, 265*c83a76b0SSuyog Pawar WORD32 lambda_q_shift, 266*c83a76b0SSuyog Pawar WORD32 i4_frm_qstep, 267*c83a76b0SSuyog Pawar me_func_selector_t *ps_func_selector); 268*c83a76b0SSuyog Pawar 269*c83a76b0SSuyog Pawar void hme_evalsatd_pt_pu_32x32(err_prms_t *ps_prms); 270*c83a76b0SSuyog Pawar 271*c83a76b0SSuyog Pawar void hme_evalsatd_pt_pu_64x64(err_prms_t *ps_prms); 272*c83a76b0SSuyog Pawar 273*c83a76b0SSuyog Pawar WORD32 hme_evalsatd_pt_pu_64x64_tu_rec( 274*c83a76b0SSuyog Pawar err_prms_t *ps_prms, 275*c83a76b0SSuyog Pawar WORD32 lambda, 276*c83a76b0SSuyog Pawar WORD32 lambda_q_shift, 277*c83a76b0SSuyog Pawar WORD32 i4_frm_qstep, 278*c83a76b0SSuyog Pawar me_func_selector_t *ps_func_selector); 279*c83a76b0SSuyog Pawar 280*c83a76b0SSuyog Pawar WORD32 hme_evalsatd_pt_pu_16x16_tu_rec( 281*c83a76b0SSuyog Pawar err_prms_t *ps_prms, 282*c83a76b0SSuyog Pawar WORD32 lambda, 283*c83a76b0SSuyog Pawar WORD32 lambda_q_shift, 284*c83a76b0SSuyog Pawar WORD32 i4_frm_qstep, 285*c83a76b0SSuyog Pawar me_func_selector_t *ps_func_selector); 286*c83a76b0SSuyog Pawar 287*c83a76b0SSuyog Pawar void ihevce_had_32x32_r( 288*c83a76b0SSuyog Pawar UWORD8 *pu1_src, 289*c83a76b0SSuyog Pawar WORD32 src_strd, 290*c83a76b0SSuyog Pawar UWORD8 *pu1_pred, 291*c83a76b0SSuyog Pawar WORD32 pred_strd, 292*c83a76b0SSuyog Pawar WORD16 *pi2_dst, 293*c83a76b0SSuyog Pawar WORD32 dst_strd, 294*c83a76b0SSuyog Pawar WORD32 **ppi4_hsad, 295*c83a76b0SSuyog Pawar WORD32 **ppi4_tu_split, 296*c83a76b0SSuyog Pawar WORD32 **ppi4_tu_early_cbf, 297*c83a76b0SSuyog Pawar WORD32 pos_x_y_4x4, 298*c83a76b0SSuyog Pawar WORD32 num_4x4_in_row, 299*c83a76b0SSuyog Pawar WORD32 lambda, 300*c83a76b0SSuyog Pawar WORD32 lambda_q_shift, 301*c83a76b0SSuyog Pawar WORD32 i4_frm_qstep, 302*c83a76b0SSuyog Pawar WORD32 i4_cur_depth, 303*c83a76b0SSuyog Pawar WORD32 i4_max_depth, 304*c83a76b0SSuyog Pawar WORD32 i4_max_tr_size, 305*c83a76b0SSuyog Pawar WORD32 *pi4_tu_split_cost, 306*c83a76b0SSuyog Pawar me_func_selector_t *ps_func_selector); 307*c83a76b0SSuyog Pawar 308*c83a76b0SSuyog Pawar void hme_update_results_pt_pu_best1_subpel_hs( 309*c83a76b0SSuyog Pawar err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms); 310*c83a76b0SSuyog Pawar 311*c83a76b0SSuyog Pawar void hme_set_mvp_node( 312*c83a76b0SSuyog Pawar search_results_t *ps_search_results, 313*c83a76b0SSuyog Pawar search_node_t *ps_candt_prj_coloc, 314*c83a76b0SSuyog Pawar U08 u1_pred_lx, 315*c83a76b0SSuyog Pawar U08 u1_default_ref_id); 316*c83a76b0SSuyog Pawar 317*c83a76b0SSuyog Pawar S32 hme_cmp_nodes(search_node_t *ps_best_node1, search_node_t *ps_best_node2); 318*c83a76b0SSuyog Pawar 319*c83a76b0SSuyog Pawar #endif /* #ifndef _HME_SEARCH_ALGO_H_*/ 320