xref: /aosp_15_r20/external/libavc/encoder/ih264e_me.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  ih264e_me.c
25 *
26 * @brief
27 *  Contains definition of functions for motion estimation
28 *
29 * @author
30 *  ittiam
31 *
32 * @par List of Functions:
33 *  - ih264e_init_mv_bits
34 *  - ih264e_get_search_candidates
35 *  - ih264e_find_pskip_params
36 *  - ih264e_find_pskip_params_me
37 *  - ih264e_get_mv_predictor
38 *  - ih264e_mv_pred
39 *  - ih264e_mv_pred_me
40 *  - ih264e_compute_me_single_reflist
41 *  - ih264e_compute_me_nmb
42 *  - ih264e_find_bskip_params_me
43 *  - ih264e_find_bskip_params
44 *  - ih264e_evaluate_bipred
45 *  - ih264e_compute_me_multi_reflist
46 *
47 * @remarks
48 *  none
49 *
50 *******************************************************************************
51 */
52 
53 /*****************************************************************************/
54 /* File Includes                                                             */
55 /*****************************************************************************/
56 
57 /* System Include Files */
58 #include <stdio.h>
59 #include <assert.h>
60 #include <limits.h>
61 
62 /* User Include Files */
63 #include "ih264_typedefs.h"
64 #include "iv2.h"
65 #include "ive2.h"
66 #include "ithread.h"
67 
68 #include "ih264_debug.h"
69 #include "ih264_macros.h"
70 #include "ih264_defs.h"
71 #include "ih264_mem_fns.h"
72 #include "ih264_padding.h"
73 #include "ih264_structs.h"
74 #include "ih264_trans_quant_itrans_iquant.h"
75 #include "ih264_inter_pred_filters.h"
76 #include "ih264_intra_pred_filters.h"
77 #include "ih264_deblk_edge_filters.h"
78 #include "ih264_cabac_tables.h"
79 #include "ih264_platform_macros.h"
80 
81 #include "ime_defs.h"
82 #include "ime_distortion_metrics.h"
83 #include "ime_structs.h"
84 #include "ime.h"
85 #include "ime_statistics.h"
86 
87 #include "irc_cntrl_param.h"
88 #include "irc_frame_info_collector.h"
89 
90 #include "ih264e_error.h"
91 #include "ih264e_defs.h"
92 #include "ih264e_globals.h"
93 #include "ih264e_rate_control.h"
94 #include "ih264e_bitstream.h"
95 #include "ih264e_cabac_structs.h"
96 #include "ih264e_structs.h"
97 #include "ih264e_mc.h"
98 #include "ih264e_me.h"
99 #include "ih264e_half_pel.h"
100 #include "ih264e_intra_modes_eval.h"
101 #include "ih264e_core_coding.h"
102 #include "ih264e_platform_macros.h"
103 
104 
105 /*****************************************************************************/
106 /* Function Definitions                                                      */
107 /*****************************************************************************/
108 
109 /**
110 *******************************************************************************
111 *
112 * @brief
113 *  This function populates the length of the codewords for motion vectors in the
114 *  range (-search range, search range) in pixels
115 *
116 * @param[in] ps_me
117 *  Pointer to me ctxt
118 *
119 * @param[out] pu1_mv_bits
120 *  length of the codeword for all mv's
121 *
122 * @remarks The length of the code words are derived from signed exponential
123 *  goloumb codes.
124 *
125 *******************************************************************************
126 */
ih264e_init_mv_bits(me_ctxt_t * ps_me_ctxt)127 void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt)
128 {
129     /* temp var */
130     WORD32 i, codesize = 3, diff, limit;
131     UWORD32 u4_code_num, u4_range;
132     UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max;
133 
134     /* max srch range */
135     diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y);
136     /* sub pel */
137     diff <<= 2;
138     /* delta mv */
139     diff <<= 1;
140 
141     /* codeNum for positive integer     =  2x-1     : Table9-3  */
142     u4_code_num = (diff << 1);
143 
144     /* get range of the bit string and put using put_bits()                 */
145     GETRANGE(u4_range, u4_code_num);
146 
147     limit = 2*u4_range - 1;
148 
149     /* init mv bits */
150     ps_me_ctxt->pu1_mv_bits[0] = 1;
151 
152     while (codesize < limit)
153     {
154         u4_uev_min = (1 << (codesize >> 1));
155         u4_uev_max = 2*u4_uev_min - 1;
156 
157         u4_sev_min = u4_uev_min >> 1;
158         u4_sev_max = u4_uev_max >> 1;
159 
160         DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize);
161 
162         for (i = u4_sev_min; i <= (WORD32)u4_sev_max; i++)
163         {
164             ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize;
165         }
166 
167         codesize += 2;
168     }
169 }
170 
171 /**
172 *******************************************************************************
173 *
174 * @brief Determines the valid candidates for which the initial search shall happen.
175 * The best of these candidates is used to center the diamond pixel search.
176 *
177 * @par Description The function sends the skip, (0,0), left, top and top-right
178 * neighbouring MBs MVs. The left, top and top-right MBs MVs are used because
179 * these are the same MVs that are used to form the MV predictor. This initial MV
180 * search candidates need not take care of slice boundaries and hence neighbor
181 * availability checks are not made here.
182 *
183 * @param[in] ps_proc
184 *  Pointer to process context
185 *
186 * @param[in] ps_me_ctxt
187 *  pointer to me context
188 *
189 * @param[in] i4_ref_list
190 *  Current active reference list
191 *
192 * @returns  The list of MVs to be used of priming the full pel search and the
193 * number of such MVs
194 *
195 * @remarks
196 *   Assumptions : 1. Assumes Only partition of size 16x16
197 *
198 *******************************************************************************
199 */
ih264e_get_search_candidates(process_ctxt_t * ps_proc,me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)200 static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
201                                          me_ctxt_t *ps_me_ctxt,
202                                          WORD32 i4_reflist)
203 {
204     /* curr mb indices */
205     WORD32 i4_mb_x = ps_proc->i4_mb_x;
206 
207     /* Motion vector */
208     mv_t *ps_left_mv, *ps_top_mv, *ps_top_left_mv, *ps_top_right_mv;
209 
210     /* Pred modes */
211     WORD32 i4_left_mode, i4_top_mode, i4_top_left_mode, i4_top_right_mode;
212 
213     /* mb part info */
214     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
215 
216     /* mvs */
217     WORD32 mvx, mvy;
218 
219     /* ngbr availability */
220     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
221 
222     /* Current mode */
223     WORD32 i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
224 
225     /* srch range*/
226     WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
227     WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
228     WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
229     WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
230 
231     /* num of candidate search candidates */
232     UWORD32 u4_num_candidates = 0;
233 
234     ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
235     ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_me_info[i4_reflist].s_mv;
236     ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
237     ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_me_info[i4_reflist].s_mv;
238 
239     i4_left_mode = ps_proc->s_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
240     i4_top_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x)->b2_pred_mode != i4_cmpl_predmode;
241     i4_top_left_mode = ps_proc->s_top_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
242     i4_top_right_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->b2_pred_mode != i4_cmpl_predmode;
243 
244     /* Taking the Zero motion vector as one of the candidates   */
245     ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = 0;
246     ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = 0;
247 
248     u4_num_candidates++;
249 
250     /* Taking the Left MV Predictor as one of the candidates    */
251     if (ps_ngbr_avbl->u1_mb_a && i4_left_mode)
252     {
253         mvx      = (ps_left_mv->i2_mvx + 2) >> 2;
254         mvy      = (ps_left_mv->i2_mvy + 2) >> 2;
255 
256         mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
257         mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
258 
259         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
260         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
261 
262         u4_num_candidates ++;
263     }
264 
265     /* Taking the Top MV Predictor as one of the candidates     */
266     if (ps_ngbr_avbl->u1_mb_b && i4_top_mode)
267     {
268         mvx      = (ps_top_mv->i2_mvx + 2) >> 2;
269         mvy      = (ps_top_mv->i2_mvy + 2) >> 2;
270 
271         mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
272         mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
273 
274         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
275         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
276 
277         u4_num_candidates ++;
278 
279         /* Taking the TopRt MV Predictor as one of the candidates   */
280         if (ps_ngbr_avbl->u1_mb_c && i4_top_right_mode)
281         {
282             mvx      = (ps_top_right_mv->i2_mvx + 2) >> 2;
283             mvy      = (ps_top_right_mv->i2_mvy + 2)>> 2;
284 
285             mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
286             mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
287 
288             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
289             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
290 
291             u4_num_candidates ++;
292         }
293         /* Taking the TopLt MV Predictor as one of the candidates   */
294         else if(ps_ngbr_avbl->u1_mb_d && i4_top_left_mode)
295         {
296             mvx      = (ps_top_left_mv->i2_mvx + 2) >> 2;
297             mvy      = (ps_top_left_mv->i2_mvy + 2) >> 2;
298 
299             mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
300             mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
301 
302             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
303             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
304 
305             u4_num_candidates ++;
306         }
307     }
308 
309     /********************************************************************/
310     /*                            MV Prediction                         */
311     /********************************************************************/
312     ih264e_mv_pred_me(ps_proc, i4_reflist);
313 
314     ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvx;
315     ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvy;
316 
317     /* Get the skip motion vector                               */
318     {
319         ps_me_ctxt->i4_skip_type = ps_proc->ps_codec->apf_find_skip_params_me
320                                     [ps_proc->i4_slice_type](ps_proc, i4_reflist);
321 
322         /* Taking the Skip motion vector as one of the candidates   */
323         mvx = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvx + 2) >> 2;
324         mvy = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvy + 2) >> 2;
325 
326         mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
327         mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
328 
329         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
330         ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
331         u4_num_candidates++;
332 
333         if (ps_proc->i4_slice_type == BSLICE)
334         {
335             /* Taking the temporal Skip motion vector as one of the candidates   */
336             mvx = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvx + 2) >> 2;
337             mvy = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvy + 2) >> 2;
338 
339             mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
340             mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
341 
342             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
343             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
344             u4_num_candidates++;
345         }
346     }
347 
348     ASSERT(u4_num_candidates <= 6);
349 
350     ps_me_ctxt->u4_num_candidates[i4_reflist] = u4_num_candidates;
351 }
352 
353 /**
354 *******************************************************************************
355 *
356 * @brief The function computes parameters for a PSKIP MB
357 *
358 * @par Description:
359 *  The function updates the skip motion vector and checks if the current
360 *  MB can be a PSKIP MB or not
361 *
362 * @param[in] ps_proc
363 *  Pointer to process context
364 *
365 * @param[in] i4_ref_list
366 *  Current active reference list
367 *
368 * @returns Flag indicating if the current MB can be marked as skip
369 *
370 *******************************************************************************
371 */
ih264e_find_pskip_params(process_ctxt_t * ps_proc,WORD32 i4_reflist)372 WORD32 ih264e_find_pskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
373 {
374     /* left mb motion vector */
375     enc_pu_t *ps_left_mb_pu ;
376 
377     /* top mb motion vector */
378     enc_pu_t *ps_top_mb_pu ;
379 
380     /* Skip mv */
381     mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
382 
383     UNUSED(i4_reflist);
384 
385     ps_left_mb_pu = &ps_proc->s_left_mb_pu;
386     ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x;
387 
388     if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
389         (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
390         (
391           (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
392           (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
393           (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
394        ) ||
395        (
396           (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
397           (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
398           (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
399        )
400      )
401     {
402         ps_skip_mv->i2_mvx = 0;
403         ps_skip_mv->i2_mvy = 0;
404     }
405     else
406     {
407         ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
408         ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
409     }
410 
411     if ((ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx == ps_skip_mv->i2_mvx)
412      && (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy == ps_skip_mv->i2_mvy))
413     {
414         return 1;
415     }
416 
417     return 0;
418 }
419 
420 /**
421 *******************************************************************************
422 *
423 * @brief The function computes parameters for a PSKIP MB
424 *
425 * @par Description:
426 *  The function updates the skip motion vector and checks if the current
427 *  MB can be a PSKIP MB or not
428 *
429 * @param[in] ps_proc
430 *  Pointer to process context
431 *
432 * @param[in] i4_ref_list
433 *  Current active reference list
434 *
435 * @returns Flag indicating if the current MB can be marked as skip
436 *
437 *******************************************************************************
438 */
ih264e_find_pskip_params_me(process_ctxt_t * ps_proc,WORD32 i4_reflist)439 WORD32 ih264e_find_pskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
440 {
441     /* left mb motion vector */
442     enc_pu_t *ps_left_mb_pu ;
443 
444     /* top mb motion vector */
445     enc_pu_t *ps_top_mb_pu ;
446 
447     /* Skip mv */
448     mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
449 
450     UNUSED(i4_reflist);
451 
452     ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
453     ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
454 
455     if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
456         (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
457         (
458           (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
459           (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
460           (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
461         ) ||
462         (
463           (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
464           (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
465           (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
466         )
467      )
468     {
469         ps_skip_mv->i2_mvx = 0;
470         ps_skip_mv->i2_mvy = 0;
471     }
472     else
473     {
474         ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
475         ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
476     }
477 
478     return PRED_L0;
479 }
480 
481 /**
482 *******************************************************************************
483 *
484 * @brief motion vector predictor
485 *
486 * @par Description:
487 *  The routine calculates the motion vector predictor for a given block,
488 *  given the candidate MV predictors.
489 *
490 * @param[in] ps_left_mb_pu
491 *  pointer to left mb motion vector info
492 *
493 * @param[in] ps_top_row_pu
494 *  pointer to top & top right mb motion vector info
495 *
496 * @param[out] ps_pred_mv
497 *  pointer to candidate predictors for the current block
498 *
499 * @param[in] i4_ref_list
500 *  Current active reference list
501 *
502 * @returns  The x & y components of the MV predictor.
503 *
504 * @remarks The code implements the logic as described in sec 8.4.1.3 in H264
505 *   specification.
506 *   Assumptions : 1. Assumes Single reference frame
507 *                 2. Assumes Only partition of size 16x16
508 *
509 *******************************************************************************
510 */
ih264e_get_mv_predictor(enc_pu_t * ps_left_mb_pu,enc_pu_t * ps_top_row_pu,enc_pu_mv_t * ps_pred_mv,WORD32 i4_ref_list)511 void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu,
512                              enc_pu_t *ps_top_row_pu,
513                              enc_pu_mv_t *ps_pred_mv,
514                              WORD32 i4_ref_list)
515 {
516     /* Indicated the current ref */
517     WORD8 i1_ref_idx;
518 
519     /* For pred L0 */
520     i1_ref_idx = -1;
521     {
522         /* temp var */
523         WORD32 pred_algo = 3, a, b, c;
524 
525         /* If only one of the candidate blocks has a reference frame equal to
526          * the current block then use the same block as the final predictor */
527         a = (ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
528         b = (ps_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
529         c = (ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
530 
531         if (a == 0 && b == -1 && c == -1)
532             pred_algo = 0; /* LEFT */
533         else if(a == -1 && b == 0 && c == -1)
534             pred_algo = 1; /* TOP */
535         else if(a == -1 && b == -1 && c == 0)
536             pred_algo = 2; /* TOP RIGHT */
537 
538         switch (pred_algo)
539         {
540             case 0:
541                 /* left */
542                 ps_pred_mv->s_mv.i2_mvx = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx;
543                 ps_pred_mv->s_mv.i2_mvy = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy;
544                 break;
545             case 1:
546                 /* top */
547                 ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx;
548                 ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy;
549                 break;
550             case 2:
551                 /* top right */
552                 ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx;
553                 ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy;
554                 break;
555             case 3:
556                 /* median */
557                 MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx,
558                        ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx,
559                        ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx,
560                        ps_pred_mv->s_mv.i2_mvx);
561                 MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy,
562                        ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy,
563                        ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy,
564                        ps_pred_mv->s_mv.i2_mvy);
565 
566                 break;
567             default:
568                 break;
569         }
570     }
571 }
572 
573 /**
574 *******************************************************************************
575 *
576 * @brief This function performs MV prediction
577 *
578 * @par Description:
579 *
580 * @param[in] ps_proc
581 *  Process context corresponding to the job
582 *
583 * @param[in] i4_slice_type
584 *  slice type
585 *
586 * @returns  none
587 *
588 * @remarks none
589 *  This function will update the MB availability since intra inter decision
590 *  should be done before the call
591 *
592 *******************************************************************************
593 */
ih264e_mv_pred(process_ctxt_t * ps_proc,WORD32 i4_slice_type)594 void ih264e_mv_pred(process_ctxt_t *ps_proc, WORD32 i4_slice_type)
595 {
596     /* left mb motion vector */
597     enc_pu_t *ps_left_mb_pu;
598 
599     /* top left mb motion vector */
600     enc_pu_t *ps_top_left_mb_pu;
601 
602     /* top row motion vector info */
603     enc_pu_t *ps_top_row_pu;
604 
605     /* predicted motion vector */
606     enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
607 
608     /* zero mv */
609     mv_t zero_mv = { 0, 0 };
610 
611     /*  mb neighbor availability */
612     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
613 
614     /* mb syntax elements of neighbors */
615     mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
616     mb_info_t *ps_top_left_syn;
617     UWORD32 u4_left_is_intra;
618 
619     /* Temp var */
620     WORD32 i4_reflist, max_reflist, i4_cmpl_predmode;
621 
622     ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ele);
623     u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
624     ps_left_mb_pu = &ps_proc->s_left_mb_pu;
625     ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
626     ps_top_row_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
627 
628     /* Number of ref lists to process */
629     max_reflist = (i4_slice_type == PSLICE) ? 1 : 2;
630 
631     for (i4_reflist = 0; i4_reflist < max_reflist; i4_reflist++)
632     {
633         i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
634 
635         /* Before performing mv prediction prepare the ngbr information and
636          * reset motion vectors basing on their availability */
637         if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1)
638                         || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
639         {
640             /* left mv */
641             ps_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx = 0;
642             ps_left_mb_pu->s_me_info[i4_reflist].s_mv = zero_mv;
643         }
644         if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra
645                         || (ps_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
646         {
647             /* top mv */
648             ps_top_row_pu[0].s_me_info[i4_reflist].i1_ref_idx = 0;
649             ps_top_row_pu[0].s_me_info[i4_reflist].s_mv = zero_mv;
650         }
651 
652         if (!ps_ngbr_avbl->u1_mb_c)
653         {
654             /* top right mv - When top right partition is not available for
655              * prediction if top left is available use it for prediction else
656              * set the mv information to -1 and (0, 0)
657              * */
658             if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra
659                             || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
660             {
661                 ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
662                 ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
663             }
664             else
665             {
666                 ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = ps_top_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx;
667                 ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = ps_top_left_mb_pu->s_me_info[i4_reflist].s_mv;
668             }
669         }
670         else if(ps_top_syn[1].u2_is_intra
671                         || (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode))
672         {
673             ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
674             ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
675         }
676 
677         ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, &ps_pred_mv[i4_reflist], i4_reflist);
678     }
679 }
680 
681 /**
682 *******************************************************************************
683 *
684 * @brief This function approximates Pred. MV
685 *
686 * @par Description:
687 *
688 * @param[in] ps_proc
689 *  Process context corresponding to the job
690 *
691 * @param[in] i4_ref_list
692 *  Current active reference list
693 *
694 * @returns  none
695 *
696 * @remarks none
697 *  Motion estimation happens at nmb level. For cost calculations, mv is appro
698 *  ximated using this function
699 *
700 *******************************************************************************
701 */
ih264e_mv_pred_me(process_ctxt_t * ps_proc,WORD32 i4_ref_list)702 void ih264e_mv_pred_me(process_ctxt_t *ps_proc, WORD32 i4_ref_list)
703 {
704     /* left mb motion vector */
705     enc_pu_t *ps_left_mb_pu ;
706 
707     /* top left mb motion vector */
708     enc_pu_t *ps_top_left_mb_pu ;
709 
710     /* top row motion vector info */
711     enc_pu_t *ps_top_row_pu;
712 
713     enc_pu_t s_top_row_pu[2];
714 
715     /* predicted motion vector */
716     enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
717 
718     /* zero mv */
719     mv_t zero_mv = {0, 0};
720 
721     /* Complementary pred mode */
722     WORD32 i4_cmpl_predmode = (i4_ref_list == 0) ? PRED_L1 : PRED_L0;
723 
724     /*  mb neighbor availability */
725     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
726 
727     ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
728     ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
729     ps_top_row_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
730 
731     s_top_row_pu[0] = ps_top_row_pu[0];
732     s_top_row_pu[1] = ps_top_row_pu[1];
733 
734     /*
735      * Before performing mv prediction prepare the ngbr information and
736      * reset motion vectors basing on their availability
737      */
738     if (!ps_ngbr_avbl->u1_mb_a || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
739     {
740         /* left mv */
741         ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx = 0;
742         ps_left_mb_pu->s_me_info[i4_ref_list].s_mv = zero_mv;
743     }
744     if (!ps_ngbr_avbl->u1_mb_b || (s_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
745     {
746         /* top mv */
747         s_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx = 0;
748         s_top_row_pu[0].s_me_info[i4_ref_list].s_mv = zero_mv;
749 
750     }
751     if (!ps_ngbr_avbl->u1_mb_c)
752     {
753         /* top right mv - When top right partition is not available for
754          * prediction if top left is available use it for prediction else
755          * set the mv information to -1 and (0, 0)
756          * */
757         if (!ps_ngbr_avbl->u1_mb_d || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
758         {
759             s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
760             s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
761 
762             s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
763             s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
764         }
765         else
766         {
767             s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = ps_top_left_mb_pu->s_me_info[0].i1_ref_idx;
768             s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = ps_top_left_mb_pu->s_me_info[0].s_mv;
769         }
770     }
771     else if (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode)
772     {
773         ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
774         ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
775     }
776 
777     ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]),
778                             &ps_pred_mv[i4_ref_list], i4_ref_list);
779 }
780 
781 /**
782 *******************************************************************************
783 *
784 * @brief This function initializes me ctxt
785 *
786 * @par Description:
787 *  Before dispatching the current job to me thread, the me context associated
788 *  with the job is initialized.
789 *
790 * @param[in] ps_proc
791 *  Process context corresponding to the job
792 *
793 * @returns  none
794 *
795 * @remarks none
796 *
797 *******************************************************************************
798 */
ih264e_init_me(process_ctxt_t * ps_proc)799 void ih264e_init_me(process_ctxt_t *ps_proc)
800 {
801     /* me ctxt */
802     me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
803 
804     /* codec context */
805     codec_t *ps_codec = ps_proc->ps_codec;
806 
807     ps_me_ctxt->i4_skip_bias[BSLICE] = SKIP_BIAS_B;
808 
809     if (ps_codec->s_cfg.u4_num_bframes == 0)
810     {
811        ps_me_ctxt->i4_skip_bias[PSLICE] = 4 * SKIP_BIAS_P;
812     }
813     else
814     {
815        ps_me_ctxt->i4_skip_bias[PSLICE] =  SKIP_BIAS_P;
816     }
817 
818     /* src ptr */
819     ps_me_ctxt->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma;
820 
821     /* src stride */
822     ps_me_ctxt->i4_src_strd = ps_proc->i4_src_strd;
823 
824     /* ref ptrs and corresponding lagrange params */
825     ps_me_ctxt->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma[0];
826     ps_me_ctxt->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma[1];
827 
828     if (ps_codec->pic_type == PIC_B)
829     {
830         ps_me_ctxt->u4_lambda_motion = gu1_qp_lambdaB[ps_me_ctxt->u1_mb_qp];
831     }
832     else
833     {
834         ps_me_ctxt->u4_lambda_motion = gu1_qp_lambdaIP[ps_me_ctxt->u1_mb_qp];
835     }
836 }
837 
838 
839 /**
840 *******************************************************************************
841 *
842 * @brief This function performs motion estimation for the current mb using
843 *   single reference list
844 *
845 * @par Description:
846 *  The current mb is compared with a list of mb's in the reference frame for
847 *  least cost. The mb that offers least cost is chosen as predicted mb and the
848 *  displacement of the predicted mb from index location of the current mb is
849 *  signaled as mv. The list of the mb's that are chosen in the reference frame
850 *  are dependent on the speed of the ME configured.
851 *
852 * @param[in] ps_proc
853 *  Process context corresponding to the job
854 *
855 * @returns  motion vector of the pred mb, sad, cost.
856 *
857 * @remarks none
858 *
859 *******************************************************************************
860 */
ih264e_compute_me_single_reflist(process_ctxt_t * ps_proc)861 void ih264e_compute_me_single_reflist(process_ctxt_t *ps_proc)
862 {
863     /* me ctxt */
864     me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
865 
866     /* codec context */
867     codec_t *ps_codec = ps_proc->ps_codec;
868 
869     /* recon stride */
870     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
871 
872     /* source buffer for halp pel generation functions */
873     UWORD8 *pu1_hpel_src;
874 
875     /* quantization parameters */
876     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
877 
878     /* Mb part ctxts for SKIP */
879     mb_part_ctxt s_skip_mbpart;
880 
881     /* Sad therholds */
882     ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
883 
884     {
885         WORD32 rows_above, rows_below, columns_left, columns_right;
886 
887         /* During evaluation for motion vectors do not search through padded regions */
888         /* Obtain number of rows and columns that are effective for computing for me evaluation */
889         rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
890         rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
891         columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
892         columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
893 
894         /* init srch range */
895         /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
896          * on all sides.
897          */
898         ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
899         ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
900         ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
901         ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
902 
903         /* this is to facilitate fast sub pel computation with minimal loads */
904         ps_me_ctxt->i4_srch_range_w += 1;
905         ps_me_ctxt->i4_srch_range_e -= 1;
906         ps_me_ctxt->i4_srch_range_n += 1;
907         ps_me_ctxt->i4_srch_range_s -= 1;
908     }
909 
910     /* Compute ME and store the MVs */
911 
912     /***********************************************************************
913      * Compute ME for list L0
914      ***********************************************************************/
915 
916     /* Init SATQD for the current list */
917     ps_me_ctxt->u4_min_sad_reached  = 0;
918     ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
919 
920     /* Get the seed motion vector candidates                    */
921     ih264e_get_search_candidates(ps_proc, ps_me_ctxt, PRED_L0);
922 
923     /*****************************************************************
924      * Evaluate the SKIP for current list
925      *****************************************************************/
926     s_skip_mbpart.s_mv_curr.i2_mvx = 0;
927     s_skip_mbpart.s_mv_curr.i2_mvy = 0;
928     s_skip_mbpart.i4_mb_cost = INT_MAX;
929     s_skip_mbpart.i4_mb_distortion = INT_MAX;
930 
931     ime_compute_skip_cost( ps_me_ctxt,
932                            (ime_mv_t *)(&ps_proc->ps_skip_mv[PRED_L0].s_mv),
933                            &s_skip_mbpart,
934                            ps_proc->ps_codec->s_cfg.u4_enable_satqd,
935                            PRED_L0,
936                            0 /* Not a Bslice */ );
937 
938     s_skip_mbpart.s_mv_curr.i2_mvx <<= 2;
939     s_skip_mbpart.s_mv_curr.i2_mvy <<= 2;
940 
941     /******************************************************************
942      * Evaluate ME For current list
943      *****************************************************************/
944     ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx = 0;
945     ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy = 0;
946     ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = INT_MAX;
947     ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = INT_MAX;
948 
949     /* Init Hpel */
950     ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf = NULL;
951 
952     /* In case we found out the minimum SAD, exit the ME eval */
953     if (!ps_me_ctxt->u4_min_sad_reached)
954     {
955         /* Evaluate search candidates for initial mv pt */
956         ime_evaluate_init_srchposn_16x16(ps_me_ctxt, PRED_L0);
957 
958         /********************************************************************/
959         /*                  full pel motion estimation                      */
960         /********************************************************************/
961         ime_full_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
962 
963         /* Scale the MV to qpel resolution */
964         ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx <<= 2;
965         ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy <<= 2;
966 
967         if (ps_me_ctxt->u4_enable_hpel)
968         {
969             /* moving src pointer to the converged motion vector location*/
970             pu1_hpel_src = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0]
971                            + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx >> 2)
972                            + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy >> 2) * i4_rec_strd;
973 
974             ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
975             ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
976             ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
977 
978             ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
979 
980             /* half  pel search is done for both sides of full pel,
981              * hence half_x of width x height = 17x16 is created
982              * starting from left half_x of converged full pel */
983             pu1_hpel_src -= 1;
984 
985             /* computing half_x */
986             ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
987                                                   ps_me_ctxt->apu1_subpel_buffs[0],
988                                                   i4_rec_strd,
989                                                   ps_me_ctxt->u4_subpel_buf_strd);
990 
991             /*
992              * Halfpel search is done for both sides of full pel,
993              * hence half_y of width x height = 16x17 is created
994              * starting from top half_y of converged full pel
995              * for half_xy top_left is required
996              * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
997              */
998             pu1_hpel_src -= i4_rec_strd;
999 
1000             /* computing half_y , and half_xy*/
1001             ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1002                             pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
1003                             ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
1004                             ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
1005                             ps_me_ctxt->u4_subpel_buf_strd);
1006 
1007             ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
1008         }
1009     }
1010 
1011 
1012     /***********************************************************************
1013      * If a particular skiip Mv is giving better sad, copy to the corresponding
1014      * MBPART
1015      * In B slices this loop should go only to PREDL1: If we found min sad
1016      * we will go to the skip ref list only
1017      * Have to find a way to make it without too much change or new vars
1018      **********************************************************************/
1019     if (s_skip_mbpart.i4_mb_cost < ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost)
1020     {
1021         ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = s_skip_mbpart.i4_mb_cost;
1022         ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = s_skip_mbpart.i4_mb_distortion;
1023         ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = s_skip_mbpart.s_mv_curr;
1024     }
1025     else if (ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf)
1026     {
1027         /* Now we have to copy the buffers */
1028         ps_codec->pf_inter_pred_luma_copy(
1029                         ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf,
1030                         ps_proc->pu1_best_subpel_buf,
1031                         ps_me_ctxt->u4_subpel_buf_strd,
1032                         ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
1033                         NULL, 0);
1034     }
1035 
1036     /**********************************************************************
1037      * Now get the minimum of MB part sads by searching over all ref lists
1038      **********************************************************************/
1039     ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx;
1040     ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy;
1041     ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost;
1042     ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion;
1043     ps_proc->ps_cur_mb->u4_mb_type = P16x16;
1044     ps_proc->ps_pu->b2_pred_mode = PRED_L0 ;
1045 
1046     /* Mark the reflists */
1047     ps_proc->ps_pu->s_me_info[0].i1_ref_idx = -1;
1048     ps_proc->ps_pu->s_me_info[1].i1_ref_idx =  0;
1049 
1050     /* number of partitions */
1051     ps_proc->u4_num_sub_partitions = 1;
1052     *(ps_proc->pu4_mb_pu_cnt) = 1;
1053 
1054     /* position in-terms of PU */
1055     ps_proc->ps_pu->b4_pos_x = 0;
1056     ps_proc->ps_pu->b4_pos_y = 0;
1057 
1058     /* PU size */
1059     ps_proc->ps_pu->b4_wd = 3;
1060     ps_proc->ps_pu->b4_ht = 3;
1061 
1062     /* Update min sad conditions */
1063     if (ps_me_ctxt->u4_min_sad_reached == 1)
1064     {
1065         ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
1066         ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
1067     }
1068 }
1069 
1070 /**
1071 *******************************************************************************
1072 *
1073 * @brief This function performs motion estimation for the current NMB
1074 *
1075 * @par Description:
1076 *  Intializes input and output pointers required by the function ih264e_compute_me
1077 *  and calls the function ih264e_compute_me in a loop to process NMBs.
1078 *
1079 * @param[in] ps_proc
1080 *  Process context corresponding to the job
1081 *
1082 * @param[in] u4_nmb_count
1083 *  Number of mb's to process
1084 *
1085 * @returns
1086 *
1087 * @remarks none
1088 *
1089 *******************************************************************************
1090 */
ih264e_compute_me_nmb(process_ctxt_t * ps_proc,UWORD32 u4_nmb_count)1091 void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
1092 {
1093     /* pic pu */
1094     enc_pu_t *ps_pu_begin = ps_proc->ps_pu;
1095 
1096     /* ME map */
1097     UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs);
1098 
1099     /* temp var */
1100     UWORD32 u4_i;
1101 
1102     ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
1103     ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->s_left_mb_syntax_ele.u2_mb_type == PSKIP);
1104 
1105     for (u4_i = 0; u4_i < u4_nmb_count; u4_i++)
1106     {
1107         /* Wait for ME map */
1108         if (ps_proc->i4_mb_y > 0)
1109         {
1110             /* Wait for top right ME to be done */
1111             UWORD8 *pu1_me_map_tp_rw = ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs;
1112 
1113             while (1)
1114             {
1115                 volatile UWORD8 *pu1_buf;
1116                 WORD32 idx = ps_proc->i4_mb_x + u4_i + 1;
1117 
1118                 idx = MIN(idx, (ps_proc->i4_wd_mbs - 1));
1119                 pu1_buf =  pu1_me_map_tp_rw + idx;
1120                 if(*pu1_buf)
1121                     break;
1122                 ithread_yield();
1123             }
1124         }
1125 
1126         ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].as_skip_mv[0]);
1127         ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
1128         ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].as_pred_mv[0]);
1129 
1130         ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
1131 
1132         ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad;
1133         ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1134 
1135         ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX;
1136         ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX;
1137 
1138         /* Set the best subpel buf to the correct mb so that the buffer can be copied */
1139         ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf;
1140         ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd;
1141 
1142         /* Set the min sad conditions */
1143         ps_proc->ps_cur_mb->u4_min_sad = ps_proc->ps_codec->u4_min_sad;
1144         ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1145 
1146         /* Derive neighbor availability for the current macroblock */
1147         ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
1148 
1149         /* init me */
1150         ih264e_init_me(ps_proc);
1151 
1152         /* Compute ME according to slice type */
1153         ps_proc->ps_codec->apf_compute_me[ps_proc->i4_slice_type](ps_proc);
1154 
1155         /* update top and left structs */
1156         {
1157             mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1158             mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
1159             enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
1160             enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
1161             enc_pu_t *ps_top_mv = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
1162 
1163             *ps_top_left_syn = *ps_top_syn;
1164 
1165             *ps_top_left_mb_pu = *ps_top_mv;
1166             *ps_left_mb_pu = *ps_proc->ps_pu;
1167         }
1168 
1169         ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1170 
1171         /* Copy the min sad reached info */
1172         ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached;
1173         ps_proc->ps_nmb_info[u4_i].u4_min_sad   = ps_proc->ps_cur_mb->u4_min_sad;
1174 
1175         /*
1176          * To make sure that the MV map is properly sync to the
1177          * cache we need to do a DDB
1178          */
1179         {
1180             DATA_SYNC();
1181 
1182             pu1_me_map[ps_proc->i4_mb_x] = 1;
1183         }
1184         ps_proc->i4_mb_x++;
1185 
1186         ps_proc->s_me_ctxt.u4_left_is_intra = 0;
1187         ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type  == PSKIP);
1188 
1189         /* update buffers pointers */
1190         ps_proc->pu1_src_buf_luma += MB_SIZE;
1191         ps_proc->pu1_rec_buf_luma += MB_SIZE;
1192         ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1193         ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1194 
1195         /*
1196          * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1197          * the stride per MB is MB_SIZE
1198          */
1199         ps_proc->pu1_src_buf_chroma += MB_SIZE;
1200         ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1201         ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1202         ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1203 
1204 
1205         ps_proc->pu4_mb_pu_cnt += 1;
1206     }
1207 
1208     ps_proc->ps_pu = ps_pu_begin;
1209     ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count;
1210 
1211     /* update buffers pointers */
1212     ps_proc->pu1_src_buf_luma -= MB_SIZE * u4_nmb_count;
1213     ps_proc->pu1_rec_buf_luma -= MB_SIZE * u4_nmb_count;
1214     ps_proc->apu1_ref_buf_luma[0] -= MB_SIZE * u4_nmb_count;
1215     ps_proc->apu1_ref_buf_luma[1] -= MB_SIZE * u4_nmb_count;
1216 
1217     /*
1218      * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1219      * the stride per MB is MB_SIZE
1220      */
1221     ps_proc->pu1_src_buf_chroma -= MB_SIZE * u4_nmb_count;
1222     ps_proc->pu1_rec_buf_chroma -= MB_SIZE * u4_nmb_count;
1223     ps_proc->apu1_ref_buf_chroma[0] -= MB_SIZE * u4_nmb_count;
1224     ps_proc->apu1_ref_buf_chroma[1] -= MB_SIZE * u4_nmb_count;
1225 
1226     ps_proc->pu4_mb_pu_cnt -= u4_nmb_count;
1227 }
1228 
1229 
1230 /**
1231 *******************************************************************************
1232 *
1233 * @brief The function computes parameters for a BSKIP MB
1234 *
1235 * @par Description:
1236 *  The function updates the skip motion vector for B Mb, check if the Mb can be
1237 *  marked as skip and returns it
1238 *
1239 * @param[in] ps_proc
1240 *  Pointer to process context
1241 *
1242 * @param[in] i4_reflist
1243 *  Current active reference list
1244 *
1245 * @returns Flag indicating if the current Mb can be skip or not
1246 *
1247 * @remarks
1248 *   The code implements the logic as described in sec 8.4.1.2.2
1249 *   It also computes co-located MB parmas according to sec 8.4.1.2.1
1250 *
1251 *   Need to add condition for this fucntion to be used in ME
1252 *
1253 *******************************************************************************
1254 */
ih264e_find_bskip_params_me(process_ctxt_t * ps_proc,WORD32 i4_reflist)1255 WORD32 ih264e_find_bskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
1256 {
1257     /* Colzero for co-located MB */
1258     WORD32 i4_colzeroflag;
1259 
1260     /* motion vectors for neighbouring MBs */
1261     enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
1262 
1263     /* Variables to check if a particular mB is available */
1264     WORD32 i4_a, i4_b, i4_c, i4_c_avail;
1265 
1266     /* Mode availability, init to no modes available     */
1267     WORD32 i4_mode_avail;
1268 
1269     /*  mb neighbor availability */
1270     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1271 
1272     /* Temp var */
1273     WORD32 i, i4_cmpl_mode, i4_skip_type = -1;
1274 
1275     /*
1276      * Colocated motion vector
1277      */
1278     mv_t s_mvcol;
1279 
1280     /*
1281      * Colocated picture idx
1282      */
1283     WORD32 i4_refidxcol;
1284 
1285     UNUSED(i4_reflist);
1286 
1287     /**************************************************************************
1288      *Find co-located MB parameters
1289      *      See sec 8.4.1.2.1  for reference
1290      **************************************************************************/
1291     {
1292         /*
1293          * Find the co-located Mb and update the skip and pred appropriately
1294          * 1) Default colpic is forward ref : Table 8-6
1295          * 2) Default mb col is current MB : Table 8-8
1296          */
1297 
1298         if (ps_proc->ps_colpu->b1_intra_flag)
1299         {
1300             s_mvcol.i2_mvx = 0;
1301             s_mvcol.i2_mvy = 0;
1302             i4_refidxcol = -1;
1303         }
1304         else
1305         {
1306             if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
1307             {
1308                 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
1309                 i4_refidxcol = 0;
1310             }
1311             else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
1312             {
1313                 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
1314                 i4_refidxcol = 0;
1315             }
1316         }
1317 
1318         /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as default */
1319         i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
1320                         && (ABS(s_mvcol.i2_mvy) <= 1));
1321 
1322     }
1323 
1324     /***************************************************************************
1325      * Evaluating skip params : Spatial Skip
1326      **************************************************************************/
1327     {
1328     /* Get the neighbouring MBS according to Section 8.4.1.2.2 */
1329     ps_a_pu = &ps_proc->s_left_mb_pu_ME;
1330     ps_b_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
1331 
1332     i4_c_avail = 0;
1333     if (ps_ngbr_avbl->u1_mb_c)
1334     {
1335         ps_c_pu = &((ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x)[1]);
1336         i4_c_avail = 1;
1337     }
1338     else
1339     {
1340         ps_c_pu = &ps_proc->s_top_left_mb_pu_ME;
1341         i4_c_avail = ps_ngbr_avbl->u1_mb_d;
1342     }
1343 
1344     i4_a = ps_ngbr_avbl->u1_mb_a;
1345     i4_b = ps_ngbr_avbl->u1_mb_b;
1346     i4_c = i4_c_avail;
1347 
1348     /* Init to no mode avail */
1349     i4_mode_avail = 0;
1350     for (i = 0; i < 2; i++)
1351     {
1352         i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
1353 
1354         i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1355         i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1356         i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1357     }
1358 
1359     if (i4_mode_avail == 0x3 || i4_mode_avail == 0x0)
1360     {
1361         i4_skip_type= PRED_BI;
1362     }
1363     else if(i4_mode_avail == 0x1)
1364     {
1365         i4_skip_type = PRED_L0;
1366     }
1367     else if(i4_mode_avail == 0x2)
1368     {
1369         i4_skip_type = PRED_L1;
1370     }
1371 
1372     /* Update skip MV for L0 */
1373     if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
1374     {
1375         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
1376         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
1377     }
1378     else
1379     {
1380         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
1381         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
1382     }
1383 
1384     /* Update skip MV for L1 */
1385     if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
1386     {
1387         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
1388         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
1389     }
1390     else
1391     {
1392         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
1393         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
1394     }
1395 
1396     }
1397 
1398     /***************************************************************************
1399      * Evaluating skip params : Temporal skip
1400      **************************************************************************/
1401     {
1402         pic_buf_t *  ps_ref_pic[MAX_REF_PIC_CNT];
1403         WORD32 i4_td, i4_tx, i4_tb, i4_dist_scale_factor;
1404         enc_pu_mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[2];
1405 
1406         ps_ref_pic[PRED_L0] = ps_proc->aps_ref_pic[PRED_L0];
1407         ps_ref_pic[PRED_L1] = ps_proc->aps_ref_pic[PRED_L1];
1408 
1409         i4_tb = ps_proc->ps_codec->i4_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
1410         i4_td = ps_ref_pic[PRED_L1]->i4_abs_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
1411 
1412         i4_tb = CLIP3(-128, 127, i4_tb);
1413         i4_td = CLIP3(-128, 127, i4_td);
1414 
1415         i4_tx = ( 16384 + ABS( i4_td / 2 ) ) / i4_td ;
1416         i4_dist_scale_factor =  CLIP3( -1024, 1023, ( i4_tb * i4_tx + 32 ) >> 6 );
1417 
1418         /* Motion vectors taken in full pel resolution , hence  -> (& 0xfffc) operation */
1419         ps_skip_mv[PRED_L0].s_mv.i2_mvx = (( i4_dist_scale_factor * s_mvcol.i2_mvx + 128 ) >> 8) & 0xfffc;
1420         ps_skip_mv[PRED_L0].s_mv.i2_mvy = (( i4_dist_scale_factor * s_mvcol.i2_mvy + 128 ) >> 8) & 0xfffc;
1421 
1422         ps_skip_mv[PRED_L1].s_mv.i2_mvx = (ps_skip_mv[PRED_L0].s_mv.i2_mvx - s_mvcol.i2_mvx) & 0xfffc;
1423         ps_skip_mv[PRED_L1].s_mv.i2_mvy = (ps_skip_mv[PRED_L0].s_mv.i2_mvy - s_mvcol.i2_mvy) & 0xfffc;
1424 
1425     }
1426 
1427     return i4_skip_type;
1428 }
1429 
1430 /**
1431 *******************************************************************************
1432 *
1433 * @brief The function computes the skip motion vectoe for B mb
1434 *
1435 * @par Description:
1436 *  The function gives the skip motion vector for B Mb, check if the Mb can be
1437 *  marked as skip
1438 *
1439 * @param[in] ps_proc
1440 *  Pointer to process context
1441 *
1442 * @param[in] i4_reflist
1443 *  Dummy
1444 *
1445 * @returns Flag indicating if the current Mb can be skip or not
1446 *
1447 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
1448 *   specification. It also computes co-located MB parmas according to sec 8.4.1.2.1
1449 *
1450 *******************************************************************************/
ih264e_find_bskip_params(process_ctxt_t * ps_proc,WORD32 i4_reflist)1451 WORD32 ih264e_find_bskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
1452 {
1453     /* Colzero for co-located MB */
1454     WORD32 i4_colzeroflag;
1455 
1456     /* motion vectors */
1457     enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
1458 
1459     /* Syntax elem */
1460     mb_info_t *ps_a_syn, *ps_b_syn, *ps_c_syn;
1461 
1462     /* Variables to check if a particular mB is available */
1463     WORD32 i4_a, i4_b, i4_c, i4_c_avail;
1464 
1465     /* Mode availability, init to no modes available     */
1466     WORD32 i4_mode_avail;
1467 
1468     /*  mb neighbor availability */
1469     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1470 
1471     /* Temp var */
1472     WORD32 i, i4_cmpl_mode;
1473 
1474     UNUSED(i4_reflist);
1475 
1476     /**************************************************************************
1477      * Find co-locates parameters
1478      *      See sec 8.4.1.2.1  for reference
1479      **************************************************************************/
1480     {
1481         /*
1482          * Find the co-located Mb and update the skip and pred appropriately
1483          * 1) Default colpic is forward ref : Table 8-6
1484          * 2) Default mb col is current MB : Table 8-8
1485          */
1486 
1487         mv_t s_mvcol;
1488         WORD32 i4_refidxcol;
1489 
1490         if (ps_proc->ps_colpu->b1_intra_flag)
1491         {
1492             s_mvcol.i2_mvx = 0;
1493             s_mvcol.i2_mvy = 0;
1494             i4_refidxcol = -1;
1495         }
1496         else
1497         {
1498             if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
1499             {
1500                 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
1501                 i4_refidxcol = 0;
1502             }
1503             else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
1504             {
1505                 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
1506                 i4_refidxcol = 0;
1507             }
1508         }
1509 
1510         /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as default */
1511         i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
1512                         && (ABS(s_mvcol.i2_mvy) <= 1));
1513 
1514     }
1515 
1516     /***************************************************************************
1517      * Evaluating skip params
1518      **************************************************************************/
1519     /* Section 8.4.1.2.2 */
1520     ps_a_syn = &ps_proc->s_left_mb_syntax_ele;
1521     ps_a_pu = &ps_proc->s_left_mb_pu;
1522 
1523     ps_b_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1524     ps_b_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
1525 
1526     i4_c_avail = 0;
1527     if (ps_ngbr_avbl->u1_mb_c)
1528     {
1529         ps_c_syn = &((ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x)[1]);
1530         ps_c_pu = &((ps_proc->ps_top_row_pu + ps_proc->i4_mb_x)[1]);
1531         i4_c_avail = 1;
1532     }
1533     else
1534     {
1535         ps_c_syn = &(ps_proc->s_top_left_mb_syntax_ele);
1536         ps_c_pu = &ps_proc->s_top_left_mb_pu;
1537         i4_c_avail = ps_ngbr_avbl->u1_mb_d;
1538     }
1539 
1540 
1541     i4_a = ps_ngbr_avbl->u1_mb_a;
1542     i4_a &= !ps_a_syn->u2_is_intra;
1543 
1544     i4_b = ps_ngbr_avbl->u1_mb_b;
1545     i4_b &= !ps_b_syn->u2_is_intra;
1546 
1547     i4_c = i4_c_avail;
1548     i4_c &= !ps_c_syn->u2_is_intra;
1549 
1550     /* Init to no mode avail */
1551     i4_mode_avail = 0;
1552     for (i = 0; i < 2; i++)
1553     {
1554         i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
1555 
1556         i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1557         i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1558         i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1559     }
1560 
1561     /* Update skip MV for L0 */
1562     if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
1563     {
1564         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
1565         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
1566     }
1567     else
1568     {
1569         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
1570         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
1571     }
1572 
1573     /* Update skip MV for L1 */
1574     if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
1575     {
1576         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
1577         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
1578     }
1579     else
1580     {
1581         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
1582         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
1583     }
1584 
1585     /* Now see if the ME information matches the SKIP information */
1586     switch (ps_proc->ps_pu->b2_pred_mode)
1587     {
1588         case PRED_BI:
1589             if (  (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
1590                && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
1591                && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
1592                && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
1593                && (i4_mode_avail ==  0x3 || i4_mode_avail == 0x0))
1594             {
1595                 return 1;
1596             }
1597             break;
1598 
1599         case PRED_L0:
1600             if ( (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
1601               && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
1602               && (i4_mode_avail == 0x1))
1603             {
1604                 return 1;
1605             }
1606             break;
1607 
1608         case PRED_L1:
1609             if (  (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
1610                && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
1611                && (i4_mode_avail == 0x2))
1612             {
1613                 return 1;
1614             }
1615             break;
1616     }
1617 
1618     return 0;
1619 }
1620 
1621 
1622 /**
1623 *******************************************************************************
1624 *
1625 * @brief This function computes the best motion vector among the tentative mv
1626 * candidates chosen.
1627 *
1628 * @par Description:
1629 *  This function determines the position in the search window at which the motion
1630 *  estimation should begin in order to minimise the number of search iterations.
1631 *
1632 * @param[in] ps_me_ctxt
1633 *  pointer to me context
1634 *
1635 * @param[in] ps_proc
1636 *  process context
1637 *
1638 * @param[in] ps_mb_ctxt_bi
1639 *  pointer to current mb partition ctxt with respect to ME
1640 *
1641 * @returns  mv pair & corresponding distortion and cost
1642 *
1643 * @remarks Currently only 4 search candiates are supported
1644 *
1645 *******************************************************************************
1646 */
ih264e_evaluate_bipred(me_ctxt_t * ps_me_ctxt,process_ctxt_t * ps_proc,mb_part_ctxt * ps_mb_ctxt_bi)1647 void ih264e_evaluate_bipred(me_ctxt_t *ps_me_ctxt,
1648                             process_ctxt_t *ps_proc,
1649                             mb_part_ctxt *ps_mb_ctxt_bi)
1650 {
1651 
1652     UWORD32 i, u4_fast_sad;
1653 
1654     WORD32 i4_dest_buff;
1655 
1656     mv_t *ps_l0_pred_mv, *ps_l1_pred_mv, s_l0_mv, s_l1_mv;
1657 
1658     UWORD8 *pu1_ref_mb_l0, *pu1_ref_mb_l1;
1659 
1660     UWORD8 *pu1_dst_buf;
1661 
1662     WORD32 i4_ref_l0_stride, i4_ref_l1_stride;
1663 
1664     WORD32 i4_mb_distortion, i4_mb_cost;
1665 
1666     u4_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
1667 
1668     i4_dest_buff = 0;
1669 
1670     for (i = 0; i < ps_me_ctxt->u4_num_candidates[PRED_BI]; i += 2)
1671     {
1672         pu1_dst_buf = ps_me_ctxt->apu1_subpel_buffs[i4_dest_buff];
1673 
1674         s_l0_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx >> 2;
1675         s_l0_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy >> 2;
1676         s_l1_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx >> 2;
1677         s_l1_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy >> 2;
1678 
1679         ps_l0_pred_mv = &ps_proc->ps_pred_mv[PRED_L0].s_mv;
1680         ps_l1_pred_mv = &ps_proc->ps_pred_mv[PRED_L1].s_mv;
1681 
1682         if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx & 0x3)||
1683                         (ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy & 0x3))
1684         {
1685             pu1_ref_mb_l0 = ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf;
1686             i4_ref_l0_stride = ps_me_ctxt->u4_subpel_buf_strd;
1687         }
1688         else
1689         {
1690             pu1_ref_mb_l0 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0] + (s_l0_mv.i2_mvx) + ((s_l0_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
1691             i4_ref_l0_stride = ps_me_ctxt->i4_rec_strd;
1692         }
1693 
1694 
1695         if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx & 0x3) ||
1696                         (ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy & 0x3))
1697         {
1698             pu1_ref_mb_l1 = ps_me_ctxt->as_mb_part[PRED_L1].pu1_best_hpel_buf;
1699             i4_ref_l1_stride = ps_me_ctxt->u4_subpel_buf_strd;
1700         }
1701         else
1702         {
1703             pu1_ref_mb_l1 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L1] + (s_l1_mv.i2_mvx) + ((s_l1_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
1704             i4_ref_l1_stride = ps_me_ctxt->i4_rec_strd;
1705         }
1706 
1707         ps_proc->ps_codec->pf_inter_pred_luma_bilinear(
1708                         pu1_ref_mb_l0, pu1_ref_mb_l1, pu1_dst_buf,
1709                         i4_ref_l0_stride, i4_ref_l1_stride,
1710                         ps_me_ctxt->u4_subpel_buf_strd, MB_SIZE, MB_SIZE);
1711 
1712         ps_me_ctxt->pf_ime_compute_sad_16x16[u4_fast_sad](
1713                         ps_me_ctxt->pu1_src_buf_luma, pu1_dst_buf,
1714                         ps_me_ctxt->i4_src_strd, ps_me_ctxt->u4_subpel_buf_strd,
1715                         INT_MAX, &i4_mb_distortion);
1716 
1717         /* compute cost */
1718         i4_mb_cost =  ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx - ps_l0_pred_mv->i2_mvx];
1719         i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy - ps_l0_pred_mv->i2_mvy];
1720         i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx - ps_l1_pred_mv->i2_mvx];
1721         i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy - ps_l1_pred_mv->i2_mvy];
1722 
1723         i4_mb_cost -= (ps_me_ctxt->i4_skip_bias[BSLICE]) * (ps_me_ctxt->i4_skip_type == PRED_BI) * (i == 0);
1724 
1725 
1726         i4_mb_cost *= ps_me_ctxt->u4_lambda_motion;
1727         i4_mb_cost += i4_mb_distortion;
1728 
1729         if (i4_mb_cost < ps_mb_ctxt_bi->i4_mb_cost)
1730         {
1731             ps_mb_ctxt_bi->i4_srch_pos_idx = (i>>1);
1732             ps_mb_ctxt_bi->i4_mb_cost = i4_mb_cost;
1733             ps_mb_ctxt_bi->i4_mb_distortion = i4_mb_distortion;
1734             ps_mb_ctxt_bi->pu1_best_hpel_buf = pu1_dst_buf;
1735             i4_dest_buff = (i4_dest_buff + 1) % 2;
1736         }
1737     }
1738 
1739 }
1740 
1741 /**
1742 *******************************************************************************
1743 *
1744 * @brief This function performs motion estimation for the current mb
1745 *
1746 * @par Description:
1747 *  The current mb is compared with a list of mb's in the reference frame for
1748 *  least cost. The mb that offers least cost is chosen as predicted mb and the
1749 *  displacement of the predicted mb from index location of the current mb is
1750 *  signaled as mv. The list of the mb's that are chosen in the reference frame
1751 *  are dependent on the speed of the ME configured.
1752 *
1753 * @param[in] ps_proc
1754 *  Process context corresponding to the job
1755 *
1756 * @returns  motion vector of the pred mb, sad, cost.
1757 *
1758 * @remarks none
1759 *
1760 *******************************************************************************
1761 */
ih264e_compute_me_multi_reflist(process_ctxt_t * ps_proc)1762 void ih264e_compute_me_multi_reflist(process_ctxt_t *ps_proc)
1763 {
1764     /* me ctxt */
1765     me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
1766 
1767     /* codec context */
1768     codec_t *ps_codec = ps_proc->ps_codec;
1769 
1770     /* Temp variables for looping over ref lists */
1771     WORD32 i4_reflist, i4_max_reflist;
1772 
1773     /* recon stride */
1774     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1775 
1776     /* source buffer for halp pel generation functions */
1777     UWORD8 *pu1_hpel_src;
1778 
1779     /* quantization parameters */
1780     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1781 
1782     /* Mb part ctxts for SKIP */
1783     mb_part_ctxt as_skip_mbpart[2];
1784 
1785     /* Sad therholds */
1786     ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
1787 
1788     {
1789         WORD32 rows_above, rows_below, columns_left, columns_right;
1790 
1791         /* During evaluation for motion vectors do not search through padded regions */
1792         /* Obtain number of rows and columns that are effective for computing for me evaluation */
1793         rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
1794         rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
1795         columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
1796         columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
1797 
1798         /* init srch range */
1799         /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
1800          * on all sides.
1801          */
1802         ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1803         ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1804         ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1805         ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1806 
1807         /* this is to facilitate fast sub pel computation with minimal loads */
1808         if (ps_me_ctxt->u4_enable_hpel)
1809         {
1810             ps_me_ctxt->i4_srch_range_w += 1;
1811             ps_me_ctxt->i4_srch_range_e -= 1;
1812             ps_me_ctxt->i4_srch_range_n += 1;
1813             ps_me_ctxt->i4_srch_range_s -= 1;
1814         }
1815     }
1816 
1817     /* Compute ME and store the MVs */
1818     {
1819         /***********************************************************************
1820          * Compute ME for lists L0 and L1
1821          *  For L0 -> L0 skip + L0
1822          *  for L1 -> L0 skip + L0 + L1 skip + L1
1823          ***********************************************************************/
1824         i4_max_reflist = (ps_proc->i4_slice_type == PSLICE) ? PRED_L0 : PRED_L1;
1825 
1826         /* Init SATQD for the current list */
1827         ps_me_ctxt->u4_min_sad_reached  = 0;
1828         ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
1829 
1830         for (i4_reflist = PRED_L0; i4_reflist <= i4_max_reflist; i4_reflist++)
1831         {
1832 
1833             /* Get the seed motion vector candidates                    */
1834             ih264e_get_search_candidates(ps_proc, ps_me_ctxt, i4_reflist);
1835 
1836             /* ****************************************************************
1837              *Evaluate the SKIP for current list
1838              * ****************************************************************/
1839             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx = 0;
1840             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy = 0;
1841             as_skip_mbpart[i4_reflist].i4_mb_cost = INT_MAX;
1842             as_skip_mbpart[i4_reflist].i4_mb_distortion = INT_MAX;
1843 
1844             if (ps_me_ctxt->i4_skip_type == i4_reflist)
1845             {
1846                 ime_compute_skip_cost( ps_me_ctxt,
1847                                        (ime_mv_t *)(&ps_proc->ps_skip_mv[i4_reflist].s_mv),
1848                                        &as_skip_mbpart[i4_reflist],
1849                                        ps_proc->ps_codec->s_cfg.u4_enable_satqd,
1850                                        i4_reflist,
1851                                        (ps_proc->i4_slice_type == BSLICE) );
1852             }
1853 
1854             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx <<= 2;
1855             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy <<= 2;
1856 
1857             /******************************************************************
1858              * Evaluate ME For current list
1859              *****************************************************************/
1860             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx = 0;
1861             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy = 0;
1862             ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = INT_MAX;
1863             ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = INT_MAX;
1864 
1865             /* Init Hpel */
1866             ps_me_ctxt->as_mb_part[i4_reflist].pu1_best_hpel_buf = NULL;
1867 
1868             /* In case we found out the minimum SAD, exit the ME eval */
1869             if (ps_me_ctxt->u4_min_sad_reached)
1870             {
1871                 i4_max_reflist = i4_reflist;
1872                 break;
1873             }
1874 
1875 
1876             /* Evaluate search candidates for initial mv pt */
1877             ime_evaluate_init_srchposn_16x16(ps_me_ctxt, i4_reflist);
1878 
1879             /********************************************************************/
1880             /*                  full pel motion estimation                      */
1881             /********************************************************************/
1882             ime_full_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
1883 
1884             DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2),
1885                                    (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2));
1886 
1887             DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion, 1);
1888 
1889             /* Scale the MV to qpel resolution */
1890             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx <<= 2;
1891             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy <<= 2;
1892 
1893             if (ps_me_ctxt->u4_enable_hpel)
1894             {
1895                 /* moving src pointer to the converged motion vector location */
1896                 pu1_hpel_src =   ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]
1897                                + (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2)
1898                                + ((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2)* i4_rec_strd);
1899 
1900                 ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
1901                 ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
1902                 ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
1903 
1904                 /* Init the search position to an invalid number */
1905                 ps_me_ctxt->as_mb_part[i4_reflist].i4_srch_pos_idx = 3;
1906 
1907                 /* Incase a buffer is still in use by L0, replace it with spare buff */
1908                 ps_me_ctxt->apu1_subpel_buffs[ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx] =
1909                                 ps_proc->apu1_subpel_buffs[3];
1910 
1911 
1912                 ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
1913 
1914                 /* half  pel search is done for both sides of full pel,
1915                  * hence half_x of width x height = 17x16 is created
1916                  * starting from left half_x of converged full pel */
1917                 pu1_hpel_src -= 1;
1918 
1919                 /* computing half_x */
1920                 ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
1921                                                       ps_me_ctxt->apu1_subpel_buffs[0],
1922                                                       i4_rec_strd,
1923                                                       ps_me_ctxt->u4_subpel_buf_strd);
1924 
1925                 /*
1926                  * Halfpel search is done for both sides of full pel,
1927                  * hence half_y of width x height = 16x17 is created
1928                  * starting from top half_y of converged full pel
1929                  * for half_xy top_left is required
1930                  * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
1931                  */
1932                 pu1_hpel_src -= i4_rec_strd;
1933 
1934                 /* computing half_y and half_xy */
1935                 ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1936                                 pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
1937                                 ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
1938                                 ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
1939                                 ps_me_ctxt->u4_subpel_buf_strd);
1940 
1941                 ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
1942 
1943             }
1944         }
1945 
1946         /***********************************************************************
1947          * If a particular skiip Mv is giving better sad, copy to the corresponding
1948          * MBPART
1949          * In B slices this loop should go only to PREDL1: If we found min sad
1950          * we will go to the skip ref list only
1951          * Have to find a way to make it without too much change or new vars
1952          **********************************************************************/
1953         for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
1954         {
1955             if (as_skip_mbpart[i4_reflist].i4_mb_cost < ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost)
1956             {
1957                 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = as_skip_mbpart[i4_reflist].i4_mb_cost;
1958                 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = as_skip_mbpart[i4_reflist].i4_mb_distortion;
1959                 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr = as_skip_mbpart[i4_reflist].s_mv_curr;
1960             }
1961         }
1962 
1963         /***********************************************************************
1964          * Compute ME for BI
1965          *  In case of BI we do ME for two candidates
1966          *   1) The best L0 and L1 Mvs
1967          *   2) Skip L0 and L1 MVs
1968          *
1969          *   TODO
1970          *   one of the search candidates is skip. Hence it may be duplicated
1971          ***********************************************************************/
1972         if (i4_max_reflist == PRED_L1 && ps_me_ctxt->u4_min_sad_reached == 0)
1973         {
1974             WORD32 i, j = 0;
1975             WORD32 l0_srch_pos_idx, l1_srch_pos_idx;
1976             WORD32 i4_l0_skip_mv_idx, i4_l1_skip_mv_idx;
1977 
1978             /* Get the free buffers */
1979             l0_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx;
1980             l1_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L1].i4_srch_pos_idx;
1981 
1982             /* Search for the two free buffers in subpel list */
1983             for (i = 0; i < SUBPEL_BUFF_CNT; i++)
1984             {
1985                 if (i != l0_srch_pos_idx && i != l1_srch_pos_idx)
1986                 {
1987                     ps_me_ctxt->apu1_subpel_buffs[j] = ps_proc->apu1_subpel_buffs[i];
1988                     j++;
1989                 }
1990             }
1991             ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
1992 
1993             /* Copy the statial SKIP MV of each list */
1994             i4_l0_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L0] - 2;
1995             i4_l1_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L1] - 2;
1996             ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
1997             ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
1998             ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
1999             ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2000 
2001             /* Copy the SKIP MV temporal of each list */
2002             i4_l0_skip_mv_idx++;
2003             i4_l1_skip_mv_idx++;
2004             ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2005             ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2006             ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2007             ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2008 
2009             /* Copy the best MV after ME */
2010             ps_me_ctxt->as_mv_init_search[PRED_BI][4] = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr;
2011             ps_me_ctxt->as_mv_init_search[PRED_BI][5] = ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr;
2012 
2013             ps_me_ctxt->u4_num_candidates[PRED_BI] = 6;
2014 
2015             ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_cost = INT_MAX;
2016             ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_distortion = INT_MAX;
2017 
2018             ih264e_evaluate_bipred(ps_me_ctxt, ps_proc,
2019                                    &ps_me_ctxt->as_mb_part[PRED_BI]);
2020 
2021             i4_max_reflist = PRED_BI;
2022         }
2023 
2024         /**********************************************************************
2025          * Now get the minimum of MB part sads by searching over all ref lists
2026          **********************************************************************/
2027         ps_proc->ps_pu->b2_pred_mode = 0x3;
2028 
2029         for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
2030         {
2031             if (ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
2032             {
2033                 ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost;
2034                 ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion;
2035                 ps_proc->ps_cur_mb->u4_mb_type = (ps_proc->i4_slice_type == PSLICE) ? P16x16 : B16x16;
2036                 ps_proc->ps_pu->b2_pred_mode = i4_reflist ;
2037             }
2038         }
2039 
2040         /**********************************************************************
2041          * In case we have a BI MB, we have to copy the buffers and set proer MV's
2042          *  1)In case its BI, we need to get the best MVs given by BI and update
2043          *    to their corresponding MB part
2044          *  2)We also need to copy the buffer in which bipred buff is populated
2045          *
2046          *  Not that if we have
2047          **********************************************************************/
2048         if (ps_proc->ps_pu->b2_pred_mode == PRED_BI)
2049         {
2050             WORD32 i4_srch_pos = ps_me_ctxt->as_mb_part[PRED_BI].i4_srch_pos_idx;
2051             UWORD8 *pu1_bi_buf = ps_me_ctxt->as_mb_part[PRED_BI].pu1_best_hpel_buf;
2052 
2053             ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][i4_srch_pos << 1];
2054             ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][(i4_srch_pos << 1) + 1];
2055 
2056             /* Now we have to copy the buffers */
2057             ps_codec->pf_inter_pred_luma_copy(pu1_bi_buf,
2058                                               ps_proc->pu1_best_subpel_buf,
2059                                               ps_me_ctxt->u4_subpel_buf_strd,
2060                                               ps_proc->u4_bst_spel_buf_strd,
2061                                               MB_SIZE, MB_SIZE, NULL, 0);
2062 
2063         }
2064         else if (ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf)
2065         {
2066             /* Now we have to copy the buffers */
2067             ps_codec->pf_inter_pred_luma_copy(
2068                             ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf,
2069                             ps_proc->pu1_best_subpel_buf,
2070                             ps_me_ctxt->u4_subpel_buf_strd,
2071                             ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
2072                             NULL, 0);
2073         }
2074     }
2075 
2076     /**************************************************************************
2077      *Now copy the MVs to the current PU with qpel scaling
2078      ***************************************************************************/
2079     ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx);
2080     ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy);
2081     ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvx);
2082     ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvy);
2083 
2084 
2085     ps_proc->ps_pu->s_me_info[0].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L1)? -1:0;
2086     ps_proc->ps_pu->s_me_info[1].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L0)? -1:0;
2087 
2088     /* number of partitions */
2089     ps_proc->u4_num_sub_partitions = 1;
2090     *(ps_proc->pu4_mb_pu_cnt) = 1;
2091 
2092     /* position in-terms of PU */
2093     ps_proc->ps_pu->b4_pos_x = 0;
2094     ps_proc->ps_pu->b4_pos_y = 0;
2095 
2096     /* PU size */
2097     ps_proc->ps_pu->b4_wd = 3;
2098     ps_proc->ps_pu->b4_ht = 3;
2099 
2100     /* Update min sad conditions */
2101     if (ps_me_ctxt->u4_min_sad_reached == 1)
2102     {
2103         ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
2104         ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
2105     }
2106 }
2107 
2108