1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264e_me.c
25 *
26 * @brief
27 * Contains definition of functions for motion estimation
28 *
29 * @author
30 * ittiam
31 *
32 * @par List of Functions:
33 * - ih264e_init_mv_bits
34 * - ih264e_get_search_candidates
35 * - ih264e_find_pskip_params
36 * - ih264e_find_pskip_params_me
37 * - ih264e_get_mv_predictor
38 * - ih264e_mv_pred
39 * - ih264e_mv_pred_me
40 * - ih264e_compute_me_single_reflist
41 * - ih264e_compute_me_nmb
42 * - ih264e_find_bskip_params_me
43 * - ih264e_find_bskip_params
44 * - ih264e_evaluate_bipred
45 * - ih264e_compute_me_multi_reflist
46 *
47 * @remarks
48 * none
49 *
50 *******************************************************************************
51 */
52
53 /*****************************************************************************/
54 /* File Includes */
55 /*****************************************************************************/
56
57 /* System Include Files */
58 #include <stdio.h>
59 #include <assert.h>
60 #include <limits.h>
61
62 /* User Include Files */
63 #include "ih264_typedefs.h"
64 #include "iv2.h"
65 #include "ive2.h"
66 #include "ithread.h"
67
68 #include "ih264_debug.h"
69 #include "ih264_macros.h"
70 #include "ih264_defs.h"
71 #include "ih264_mem_fns.h"
72 #include "ih264_padding.h"
73 #include "ih264_structs.h"
74 #include "ih264_trans_quant_itrans_iquant.h"
75 #include "ih264_inter_pred_filters.h"
76 #include "ih264_intra_pred_filters.h"
77 #include "ih264_deblk_edge_filters.h"
78 #include "ih264_cabac_tables.h"
79 #include "ih264_platform_macros.h"
80
81 #include "ime_defs.h"
82 #include "ime_distortion_metrics.h"
83 #include "ime_structs.h"
84 #include "ime.h"
85 #include "ime_statistics.h"
86
87 #include "irc_cntrl_param.h"
88 #include "irc_frame_info_collector.h"
89
90 #include "ih264e_error.h"
91 #include "ih264e_defs.h"
92 #include "ih264e_globals.h"
93 #include "ih264e_rate_control.h"
94 #include "ih264e_bitstream.h"
95 #include "ih264e_cabac_structs.h"
96 #include "ih264e_structs.h"
97 #include "ih264e_mc.h"
98 #include "ih264e_me.h"
99 #include "ih264e_half_pel.h"
100 #include "ih264e_intra_modes_eval.h"
101 #include "ih264e_core_coding.h"
102 #include "ih264e_platform_macros.h"
103
104
105 /*****************************************************************************/
106 /* Function Definitions */
107 /*****************************************************************************/
108
109 /**
110 *******************************************************************************
111 *
112 * @brief
113 * This function populates the length of the codewords for motion vectors in the
114 * range (-search range, search range) in pixels
115 *
116 * @param[in] ps_me
117 * Pointer to me ctxt
118 *
119 * @param[out] pu1_mv_bits
120 * length of the codeword for all mv's
121 *
122 * @remarks The length of the code words are derived from signed exponential
123 * goloumb codes.
124 *
125 *******************************************************************************
126 */
ih264e_init_mv_bits(me_ctxt_t * ps_me_ctxt)127 void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt)
128 {
129 /* temp var */
130 WORD32 i, codesize = 3, diff, limit;
131 UWORD32 u4_code_num, u4_range;
132 UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max;
133
134 /* max srch range */
135 diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y);
136 /* sub pel */
137 diff <<= 2;
138 /* delta mv */
139 diff <<= 1;
140
141 /* codeNum for positive integer = 2x-1 : Table9-3 */
142 u4_code_num = (diff << 1);
143
144 /* get range of the bit string and put using put_bits() */
145 GETRANGE(u4_range, u4_code_num);
146
147 limit = 2*u4_range - 1;
148
149 /* init mv bits */
150 ps_me_ctxt->pu1_mv_bits[0] = 1;
151
152 while (codesize < limit)
153 {
154 u4_uev_min = (1 << (codesize >> 1));
155 u4_uev_max = 2*u4_uev_min - 1;
156
157 u4_sev_min = u4_uev_min >> 1;
158 u4_sev_max = u4_uev_max >> 1;
159
160 DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize);
161
162 for (i = u4_sev_min; i <= (WORD32)u4_sev_max; i++)
163 {
164 ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize;
165 }
166
167 codesize += 2;
168 }
169 }
170
171 /**
172 *******************************************************************************
173 *
174 * @brief Determines the valid candidates for which the initial search shall happen.
175 * The best of these candidates is used to center the diamond pixel search.
176 *
177 * @par Description The function sends the skip, (0,0), left, top and top-right
178 * neighbouring MBs MVs. The left, top and top-right MBs MVs are used because
179 * these are the same MVs that are used to form the MV predictor. This initial MV
180 * search candidates need not take care of slice boundaries and hence neighbor
181 * availability checks are not made here.
182 *
183 * @param[in] ps_proc
184 * Pointer to process context
185 *
186 * @param[in] ps_me_ctxt
187 * pointer to me context
188 *
189 * @param[in] i4_ref_list
190 * Current active reference list
191 *
192 * @returns The list of MVs to be used of priming the full pel search and the
193 * number of such MVs
194 *
195 * @remarks
196 * Assumptions : 1. Assumes Only partition of size 16x16
197 *
198 *******************************************************************************
199 */
ih264e_get_search_candidates(process_ctxt_t * ps_proc,me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)200 static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
201 me_ctxt_t *ps_me_ctxt,
202 WORD32 i4_reflist)
203 {
204 /* curr mb indices */
205 WORD32 i4_mb_x = ps_proc->i4_mb_x;
206
207 /* Motion vector */
208 mv_t *ps_left_mv, *ps_top_mv, *ps_top_left_mv, *ps_top_right_mv;
209
210 /* Pred modes */
211 WORD32 i4_left_mode, i4_top_mode, i4_top_left_mode, i4_top_right_mode;
212
213 /* mb part info */
214 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
215
216 /* mvs */
217 WORD32 mvx, mvy;
218
219 /* ngbr availability */
220 block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
221
222 /* Current mode */
223 WORD32 i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
224
225 /* srch range*/
226 WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
227 WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
228 WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
229 WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
230
231 /* num of candidate search candidates */
232 UWORD32 u4_num_candidates = 0;
233
234 ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
235 ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_me_info[i4_reflist].s_mv;
236 ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
237 ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_me_info[i4_reflist].s_mv;
238
239 i4_left_mode = ps_proc->s_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
240 i4_top_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x)->b2_pred_mode != i4_cmpl_predmode;
241 i4_top_left_mode = ps_proc->s_top_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
242 i4_top_right_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->b2_pred_mode != i4_cmpl_predmode;
243
244 /* Taking the Zero motion vector as one of the candidates */
245 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = 0;
246 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = 0;
247
248 u4_num_candidates++;
249
250 /* Taking the Left MV Predictor as one of the candidates */
251 if (ps_ngbr_avbl->u1_mb_a && i4_left_mode)
252 {
253 mvx = (ps_left_mv->i2_mvx + 2) >> 2;
254 mvy = (ps_left_mv->i2_mvy + 2) >> 2;
255
256 mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
257 mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
258
259 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
260 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
261
262 u4_num_candidates ++;
263 }
264
265 /* Taking the Top MV Predictor as one of the candidates */
266 if (ps_ngbr_avbl->u1_mb_b && i4_top_mode)
267 {
268 mvx = (ps_top_mv->i2_mvx + 2) >> 2;
269 mvy = (ps_top_mv->i2_mvy + 2) >> 2;
270
271 mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
272 mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
273
274 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
275 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
276
277 u4_num_candidates ++;
278
279 /* Taking the TopRt MV Predictor as one of the candidates */
280 if (ps_ngbr_avbl->u1_mb_c && i4_top_right_mode)
281 {
282 mvx = (ps_top_right_mv->i2_mvx + 2) >> 2;
283 mvy = (ps_top_right_mv->i2_mvy + 2)>> 2;
284
285 mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
286 mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
287
288 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
289 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
290
291 u4_num_candidates ++;
292 }
293 /* Taking the TopLt MV Predictor as one of the candidates */
294 else if(ps_ngbr_avbl->u1_mb_d && i4_top_left_mode)
295 {
296 mvx = (ps_top_left_mv->i2_mvx + 2) >> 2;
297 mvy = (ps_top_left_mv->i2_mvy + 2) >> 2;
298
299 mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
300 mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
301
302 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
303 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
304
305 u4_num_candidates ++;
306 }
307 }
308
309 /********************************************************************/
310 /* MV Prediction */
311 /********************************************************************/
312 ih264e_mv_pred_me(ps_proc, i4_reflist);
313
314 ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvx;
315 ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvy;
316
317 /* Get the skip motion vector */
318 {
319 ps_me_ctxt->i4_skip_type = ps_proc->ps_codec->apf_find_skip_params_me
320 [ps_proc->i4_slice_type](ps_proc, i4_reflist);
321
322 /* Taking the Skip motion vector as one of the candidates */
323 mvx = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvx + 2) >> 2;
324 mvy = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvy + 2) >> 2;
325
326 mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
327 mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
328
329 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
330 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
331 u4_num_candidates++;
332
333 if (ps_proc->i4_slice_type == BSLICE)
334 {
335 /* Taking the temporal Skip motion vector as one of the candidates */
336 mvx = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvx + 2) >> 2;
337 mvy = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvy + 2) >> 2;
338
339 mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
340 mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
341
342 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
343 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
344 u4_num_candidates++;
345 }
346 }
347
348 ASSERT(u4_num_candidates <= 6);
349
350 ps_me_ctxt->u4_num_candidates[i4_reflist] = u4_num_candidates;
351 }
352
353 /**
354 *******************************************************************************
355 *
356 * @brief The function computes parameters for a PSKIP MB
357 *
358 * @par Description:
359 * The function updates the skip motion vector and checks if the current
360 * MB can be a PSKIP MB or not
361 *
362 * @param[in] ps_proc
363 * Pointer to process context
364 *
365 * @param[in] i4_ref_list
366 * Current active reference list
367 *
368 * @returns Flag indicating if the current MB can be marked as skip
369 *
370 *******************************************************************************
371 */
ih264e_find_pskip_params(process_ctxt_t * ps_proc,WORD32 i4_reflist)372 WORD32 ih264e_find_pskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
373 {
374 /* left mb motion vector */
375 enc_pu_t *ps_left_mb_pu ;
376
377 /* top mb motion vector */
378 enc_pu_t *ps_top_mb_pu ;
379
380 /* Skip mv */
381 mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
382
383 UNUSED(i4_reflist);
384
385 ps_left_mb_pu = &ps_proc->s_left_mb_pu;
386 ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x;
387
388 if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
389 (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
390 (
391 (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
392 (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
393 (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
394 ) ||
395 (
396 (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
397 (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
398 (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
399 )
400 )
401 {
402 ps_skip_mv->i2_mvx = 0;
403 ps_skip_mv->i2_mvy = 0;
404 }
405 else
406 {
407 ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
408 ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
409 }
410
411 if ((ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx == ps_skip_mv->i2_mvx)
412 && (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy == ps_skip_mv->i2_mvy))
413 {
414 return 1;
415 }
416
417 return 0;
418 }
419
420 /**
421 *******************************************************************************
422 *
423 * @brief The function computes parameters for a PSKIP MB
424 *
425 * @par Description:
426 * The function updates the skip motion vector and checks if the current
427 * MB can be a PSKIP MB or not
428 *
429 * @param[in] ps_proc
430 * Pointer to process context
431 *
432 * @param[in] i4_ref_list
433 * Current active reference list
434 *
435 * @returns Flag indicating if the current MB can be marked as skip
436 *
437 *******************************************************************************
438 */
ih264e_find_pskip_params_me(process_ctxt_t * ps_proc,WORD32 i4_reflist)439 WORD32 ih264e_find_pskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
440 {
441 /* left mb motion vector */
442 enc_pu_t *ps_left_mb_pu ;
443
444 /* top mb motion vector */
445 enc_pu_t *ps_top_mb_pu ;
446
447 /* Skip mv */
448 mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
449
450 UNUSED(i4_reflist);
451
452 ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
453 ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
454
455 if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
456 (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
457 (
458 (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
459 (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
460 (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
461 ) ||
462 (
463 (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
464 (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
465 (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
466 )
467 )
468 {
469 ps_skip_mv->i2_mvx = 0;
470 ps_skip_mv->i2_mvy = 0;
471 }
472 else
473 {
474 ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
475 ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
476 }
477
478 return PRED_L0;
479 }
480
481 /**
482 *******************************************************************************
483 *
484 * @brief motion vector predictor
485 *
486 * @par Description:
487 * The routine calculates the motion vector predictor for a given block,
488 * given the candidate MV predictors.
489 *
490 * @param[in] ps_left_mb_pu
491 * pointer to left mb motion vector info
492 *
493 * @param[in] ps_top_row_pu
494 * pointer to top & top right mb motion vector info
495 *
496 * @param[out] ps_pred_mv
497 * pointer to candidate predictors for the current block
498 *
499 * @param[in] i4_ref_list
500 * Current active reference list
501 *
502 * @returns The x & y components of the MV predictor.
503 *
504 * @remarks The code implements the logic as described in sec 8.4.1.3 in H264
505 * specification.
506 * Assumptions : 1. Assumes Single reference frame
507 * 2. Assumes Only partition of size 16x16
508 *
509 *******************************************************************************
510 */
ih264e_get_mv_predictor(enc_pu_t * ps_left_mb_pu,enc_pu_t * ps_top_row_pu,enc_pu_mv_t * ps_pred_mv,WORD32 i4_ref_list)511 void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu,
512 enc_pu_t *ps_top_row_pu,
513 enc_pu_mv_t *ps_pred_mv,
514 WORD32 i4_ref_list)
515 {
516 /* Indicated the current ref */
517 WORD8 i1_ref_idx;
518
519 /* For pred L0 */
520 i1_ref_idx = -1;
521 {
522 /* temp var */
523 WORD32 pred_algo = 3, a, b, c;
524
525 /* If only one of the candidate blocks has a reference frame equal to
526 * the current block then use the same block as the final predictor */
527 a = (ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
528 b = (ps_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
529 c = (ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
530
531 if (a == 0 && b == -1 && c == -1)
532 pred_algo = 0; /* LEFT */
533 else if(a == -1 && b == 0 && c == -1)
534 pred_algo = 1; /* TOP */
535 else if(a == -1 && b == -1 && c == 0)
536 pred_algo = 2; /* TOP RIGHT */
537
538 switch (pred_algo)
539 {
540 case 0:
541 /* left */
542 ps_pred_mv->s_mv.i2_mvx = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx;
543 ps_pred_mv->s_mv.i2_mvy = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy;
544 break;
545 case 1:
546 /* top */
547 ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx;
548 ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy;
549 break;
550 case 2:
551 /* top right */
552 ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx;
553 ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy;
554 break;
555 case 3:
556 /* median */
557 MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx,
558 ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx,
559 ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx,
560 ps_pred_mv->s_mv.i2_mvx);
561 MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy,
562 ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy,
563 ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy,
564 ps_pred_mv->s_mv.i2_mvy);
565
566 break;
567 default:
568 break;
569 }
570 }
571 }
572
573 /**
574 *******************************************************************************
575 *
576 * @brief This function performs MV prediction
577 *
578 * @par Description:
579 *
580 * @param[in] ps_proc
581 * Process context corresponding to the job
582 *
583 * @param[in] i4_slice_type
584 * slice type
585 *
586 * @returns none
587 *
588 * @remarks none
589 * This function will update the MB availability since intra inter decision
590 * should be done before the call
591 *
592 *******************************************************************************
593 */
ih264e_mv_pred(process_ctxt_t * ps_proc,WORD32 i4_slice_type)594 void ih264e_mv_pred(process_ctxt_t *ps_proc, WORD32 i4_slice_type)
595 {
596 /* left mb motion vector */
597 enc_pu_t *ps_left_mb_pu;
598
599 /* top left mb motion vector */
600 enc_pu_t *ps_top_left_mb_pu;
601
602 /* top row motion vector info */
603 enc_pu_t *ps_top_row_pu;
604
605 /* predicted motion vector */
606 enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
607
608 /* zero mv */
609 mv_t zero_mv = { 0, 0 };
610
611 /* mb neighbor availability */
612 block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
613
614 /* mb syntax elements of neighbors */
615 mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
616 mb_info_t *ps_top_left_syn;
617 UWORD32 u4_left_is_intra;
618
619 /* Temp var */
620 WORD32 i4_reflist, max_reflist, i4_cmpl_predmode;
621
622 ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ele);
623 u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
624 ps_left_mb_pu = &ps_proc->s_left_mb_pu;
625 ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
626 ps_top_row_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
627
628 /* Number of ref lists to process */
629 max_reflist = (i4_slice_type == PSLICE) ? 1 : 2;
630
631 for (i4_reflist = 0; i4_reflist < max_reflist; i4_reflist++)
632 {
633 i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
634
635 /* Before performing mv prediction prepare the ngbr information and
636 * reset motion vectors basing on their availability */
637 if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1)
638 || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
639 {
640 /* left mv */
641 ps_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx = 0;
642 ps_left_mb_pu->s_me_info[i4_reflist].s_mv = zero_mv;
643 }
644 if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra
645 || (ps_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
646 {
647 /* top mv */
648 ps_top_row_pu[0].s_me_info[i4_reflist].i1_ref_idx = 0;
649 ps_top_row_pu[0].s_me_info[i4_reflist].s_mv = zero_mv;
650 }
651
652 if (!ps_ngbr_avbl->u1_mb_c)
653 {
654 /* top right mv - When top right partition is not available for
655 * prediction if top left is available use it for prediction else
656 * set the mv information to -1 and (0, 0)
657 * */
658 if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra
659 || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
660 {
661 ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
662 ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
663 }
664 else
665 {
666 ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = ps_top_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx;
667 ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = ps_top_left_mb_pu->s_me_info[i4_reflist].s_mv;
668 }
669 }
670 else if(ps_top_syn[1].u2_is_intra
671 || (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode))
672 {
673 ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
674 ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
675 }
676
677 ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, &ps_pred_mv[i4_reflist], i4_reflist);
678 }
679 }
680
681 /**
682 *******************************************************************************
683 *
684 * @brief This function approximates Pred. MV
685 *
686 * @par Description:
687 *
688 * @param[in] ps_proc
689 * Process context corresponding to the job
690 *
691 * @param[in] i4_ref_list
692 * Current active reference list
693 *
694 * @returns none
695 *
696 * @remarks none
697 * Motion estimation happens at nmb level. For cost calculations, mv is appro
698 * ximated using this function
699 *
700 *******************************************************************************
701 */
ih264e_mv_pred_me(process_ctxt_t * ps_proc,WORD32 i4_ref_list)702 void ih264e_mv_pred_me(process_ctxt_t *ps_proc, WORD32 i4_ref_list)
703 {
704 /* left mb motion vector */
705 enc_pu_t *ps_left_mb_pu ;
706
707 /* top left mb motion vector */
708 enc_pu_t *ps_top_left_mb_pu ;
709
710 /* top row motion vector info */
711 enc_pu_t *ps_top_row_pu;
712
713 enc_pu_t s_top_row_pu[2];
714
715 /* predicted motion vector */
716 enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
717
718 /* zero mv */
719 mv_t zero_mv = {0, 0};
720
721 /* Complementary pred mode */
722 WORD32 i4_cmpl_predmode = (i4_ref_list == 0) ? PRED_L1 : PRED_L0;
723
724 /* mb neighbor availability */
725 block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
726
727 ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
728 ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
729 ps_top_row_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
730
731 s_top_row_pu[0] = ps_top_row_pu[0];
732 s_top_row_pu[1] = ps_top_row_pu[1];
733
734 /*
735 * Before performing mv prediction prepare the ngbr information and
736 * reset motion vectors basing on their availability
737 */
738 if (!ps_ngbr_avbl->u1_mb_a || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
739 {
740 /* left mv */
741 ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx = 0;
742 ps_left_mb_pu->s_me_info[i4_ref_list].s_mv = zero_mv;
743 }
744 if (!ps_ngbr_avbl->u1_mb_b || (s_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
745 {
746 /* top mv */
747 s_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx = 0;
748 s_top_row_pu[0].s_me_info[i4_ref_list].s_mv = zero_mv;
749
750 }
751 if (!ps_ngbr_avbl->u1_mb_c)
752 {
753 /* top right mv - When top right partition is not available for
754 * prediction if top left is available use it for prediction else
755 * set the mv information to -1 and (0, 0)
756 * */
757 if (!ps_ngbr_avbl->u1_mb_d || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
758 {
759 s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
760 s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
761
762 s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
763 s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
764 }
765 else
766 {
767 s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = ps_top_left_mb_pu->s_me_info[0].i1_ref_idx;
768 s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = ps_top_left_mb_pu->s_me_info[0].s_mv;
769 }
770 }
771 else if (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode)
772 {
773 ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
774 ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
775 }
776
777 ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]),
778 &ps_pred_mv[i4_ref_list], i4_ref_list);
779 }
780
781 /**
782 *******************************************************************************
783 *
784 * @brief This function initializes me ctxt
785 *
786 * @par Description:
787 * Before dispatching the current job to me thread, the me context associated
788 * with the job is initialized.
789 *
790 * @param[in] ps_proc
791 * Process context corresponding to the job
792 *
793 * @returns none
794 *
795 * @remarks none
796 *
797 *******************************************************************************
798 */
ih264e_init_me(process_ctxt_t * ps_proc)799 void ih264e_init_me(process_ctxt_t *ps_proc)
800 {
801 /* me ctxt */
802 me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
803
804 /* codec context */
805 codec_t *ps_codec = ps_proc->ps_codec;
806
807 ps_me_ctxt->i4_skip_bias[BSLICE] = SKIP_BIAS_B;
808
809 if (ps_codec->s_cfg.u4_num_bframes == 0)
810 {
811 ps_me_ctxt->i4_skip_bias[PSLICE] = 4 * SKIP_BIAS_P;
812 }
813 else
814 {
815 ps_me_ctxt->i4_skip_bias[PSLICE] = SKIP_BIAS_P;
816 }
817
818 /* src ptr */
819 ps_me_ctxt->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma;
820
821 /* src stride */
822 ps_me_ctxt->i4_src_strd = ps_proc->i4_src_strd;
823
824 /* ref ptrs and corresponding lagrange params */
825 ps_me_ctxt->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma[0];
826 ps_me_ctxt->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma[1];
827
828 if (ps_codec->pic_type == PIC_B)
829 {
830 ps_me_ctxt->u4_lambda_motion = gu1_qp_lambdaB[ps_me_ctxt->u1_mb_qp];
831 }
832 else
833 {
834 ps_me_ctxt->u4_lambda_motion = gu1_qp_lambdaIP[ps_me_ctxt->u1_mb_qp];
835 }
836 }
837
838
839 /**
840 *******************************************************************************
841 *
842 * @brief This function performs motion estimation for the current mb using
843 * single reference list
844 *
845 * @par Description:
846 * The current mb is compared with a list of mb's in the reference frame for
847 * least cost. The mb that offers least cost is chosen as predicted mb and the
848 * displacement of the predicted mb from index location of the current mb is
849 * signaled as mv. The list of the mb's that are chosen in the reference frame
850 * are dependent on the speed of the ME configured.
851 *
852 * @param[in] ps_proc
853 * Process context corresponding to the job
854 *
855 * @returns motion vector of the pred mb, sad, cost.
856 *
857 * @remarks none
858 *
859 *******************************************************************************
860 */
ih264e_compute_me_single_reflist(process_ctxt_t * ps_proc)861 void ih264e_compute_me_single_reflist(process_ctxt_t *ps_proc)
862 {
863 /* me ctxt */
864 me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
865
866 /* codec context */
867 codec_t *ps_codec = ps_proc->ps_codec;
868
869 /* recon stride */
870 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
871
872 /* source buffer for halp pel generation functions */
873 UWORD8 *pu1_hpel_src;
874
875 /* quantization parameters */
876 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
877
878 /* Mb part ctxts for SKIP */
879 mb_part_ctxt s_skip_mbpart;
880
881 /* Sad therholds */
882 ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
883
884 {
885 WORD32 rows_above, rows_below, columns_left, columns_right;
886
887 /* During evaluation for motion vectors do not search through padded regions */
888 /* Obtain number of rows and columns that are effective for computing for me evaluation */
889 rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
890 rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
891 columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
892 columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
893
894 /* init srch range */
895 /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
896 * on all sides.
897 */
898 ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
899 ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
900 ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
901 ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
902
903 /* this is to facilitate fast sub pel computation with minimal loads */
904 ps_me_ctxt->i4_srch_range_w += 1;
905 ps_me_ctxt->i4_srch_range_e -= 1;
906 ps_me_ctxt->i4_srch_range_n += 1;
907 ps_me_ctxt->i4_srch_range_s -= 1;
908 }
909
910 /* Compute ME and store the MVs */
911
912 /***********************************************************************
913 * Compute ME for list L0
914 ***********************************************************************/
915
916 /* Init SATQD for the current list */
917 ps_me_ctxt->u4_min_sad_reached = 0;
918 ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
919
920 /* Get the seed motion vector candidates */
921 ih264e_get_search_candidates(ps_proc, ps_me_ctxt, PRED_L0);
922
923 /*****************************************************************
924 * Evaluate the SKIP for current list
925 *****************************************************************/
926 s_skip_mbpart.s_mv_curr.i2_mvx = 0;
927 s_skip_mbpart.s_mv_curr.i2_mvy = 0;
928 s_skip_mbpart.i4_mb_cost = INT_MAX;
929 s_skip_mbpart.i4_mb_distortion = INT_MAX;
930
931 ime_compute_skip_cost( ps_me_ctxt,
932 (ime_mv_t *)(&ps_proc->ps_skip_mv[PRED_L0].s_mv),
933 &s_skip_mbpart,
934 ps_proc->ps_codec->s_cfg.u4_enable_satqd,
935 PRED_L0,
936 0 /* Not a Bslice */ );
937
938 s_skip_mbpart.s_mv_curr.i2_mvx <<= 2;
939 s_skip_mbpart.s_mv_curr.i2_mvy <<= 2;
940
941 /******************************************************************
942 * Evaluate ME For current list
943 *****************************************************************/
944 ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx = 0;
945 ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy = 0;
946 ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = INT_MAX;
947 ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = INT_MAX;
948
949 /* Init Hpel */
950 ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf = NULL;
951
952 /* In case we found out the minimum SAD, exit the ME eval */
953 if (!ps_me_ctxt->u4_min_sad_reached)
954 {
955 /* Evaluate search candidates for initial mv pt */
956 ime_evaluate_init_srchposn_16x16(ps_me_ctxt, PRED_L0);
957
958 /********************************************************************/
959 /* full pel motion estimation */
960 /********************************************************************/
961 ime_full_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
962
963 /* Scale the MV to qpel resolution */
964 ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx <<= 2;
965 ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy <<= 2;
966
967 if (ps_me_ctxt->u4_enable_hpel)
968 {
969 /* moving src pointer to the converged motion vector location*/
970 pu1_hpel_src = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0]
971 + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx >> 2)
972 + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy >> 2) * i4_rec_strd;
973
974 ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
975 ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
976 ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
977
978 ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
979
980 /* half pel search is done for both sides of full pel,
981 * hence half_x of width x height = 17x16 is created
982 * starting from left half_x of converged full pel */
983 pu1_hpel_src -= 1;
984
985 /* computing half_x */
986 ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
987 ps_me_ctxt->apu1_subpel_buffs[0],
988 i4_rec_strd,
989 ps_me_ctxt->u4_subpel_buf_strd);
990
991 /*
992 * Halfpel search is done for both sides of full pel,
993 * hence half_y of width x height = 16x17 is created
994 * starting from top half_y of converged full pel
995 * for half_xy top_left is required
996 * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
997 */
998 pu1_hpel_src -= i4_rec_strd;
999
1000 /* computing half_y , and half_xy*/
1001 ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1002 pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
1003 ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
1004 ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
1005 ps_me_ctxt->u4_subpel_buf_strd);
1006
1007 ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
1008 }
1009 }
1010
1011
1012 /***********************************************************************
1013 * If a particular skiip Mv is giving better sad, copy to the corresponding
1014 * MBPART
1015 * In B slices this loop should go only to PREDL1: If we found min sad
1016 * we will go to the skip ref list only
1017 * Have to find a way to make it without too much change or new vars
1018 **********************************************************************/
1019 if (s_skip_mbpart.i4_mb_cost < ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost)
1020 {
1021 ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = s_skip_mbpart.i4_mb_cost;
1022 ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = s_skip_mbpart.i4_mb_distortion;
1023 ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = s_skip_mbpart.s_mv_curr;
1024 }
1025 else if (ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf)
1026 {
1027 /* Now we have to copy the buffers */
1028 ps_codec->pf_inter_pred_luma_copy(
1029 ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf,
1030 ps_proc->pu1_best_subpel_buf,
1031 ps_me_ctxt->u4_subpel_buf_strd,
1032 ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
1033 NULL, 0);
1034 }
1035
1036 /**********************************************************************
1037 * Now get the minimum of MB part sads by searching over all ref lists
1038 **********************************************************************/
1039 ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx;
1040 ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy;
1041 ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost;
1042 ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion;
1043 ps_proc->ps_cur_mb->u4_mb_type = P16x16;
1044 ps_proc->ps_pu->b2_pred_mode = PRED_L0 ;
1045
1046 /* Mark the reflists */
1047 ps_proc->ps_pu->s_me_info[0].i1_ref_idx = -1;
1048 ps_proc->ps_pu->s_me_info[1].i1_ref_idx = 0;
1049
1050 /* number of partitions */
1051 ps_proc->u4_num_sub_partitions = 1;
1052 *(ps_proc->pu4_mb_pu_cnt) = 1;
1053
1054 /* position in-terms of PU */
1055 ps_proc->ps_pu->b4_pos_x = 0;
1056 ps_proc->ps_pu->b4_pos_y = 0;
1057
1058 /* PU size */
1059 ps_proc->ps_pu->b4_wd = 3;
1060 ps_proc->ps_pu->b4_ht = 3;
1061
1062 /* Update min sad conditions */
1063 if (ps_me_ctxt->u4_min_sad_reached == 1)
1064 {
1065 ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
1066 ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
1067 }
1068 }
1069
1070 /**
1071 *******************************************************************************
1072 *
1073 * @brief This function performs motion estimation for the current NMB
1074 *
1075 * @par Description:
1076 * Intializes input and output pointers required by the function ih264e_compute_me
1077 * and calls the function ih264e_compute_me in a loop to process NMBs.
1078 *
1079 * @param[in] ps_proc
1080 * Process context corresponding to the job
1081 *
1082 * @param[in] u4_nmb_count
1083 * Number of mb's to process
1084 *
1085 * @returns
1086 *
1087 * @remarks none
1088 *
1089 *******************************************************************************
1090 */
ih264e_compute_me_nmb(process_ctxt_t * ps_proc,UWORD32 u4_nmb_count)1091 void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
1092 {
1093 /* pic pu */
1094 enc_pu_t *ps_pu_begin = ps_proc->ps_pu;
1095
1096 /* ME map */
1097 UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs);
1098
1099 /* temp var */
1100 UWORD32 u4_i;
1101
1102 ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
1103 ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->s_left_mb_syntax_ele.u2_mb_type == PSKIP);
1104
1105 for (u4_i = 0; u4_i < u4_nmb_count; u4_i++)
1106 {
1107 /* Wait for ME map */
1108 if (ps_proc->i4_mb_y > 0)
1109 {
1110 /* Wait for top right ME to be done */
1111 UWORD8 *pu1_me_map_tp_rw = ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs;
1112
1113 while (1)
1114 {
1115 volatile UWORD8 *pu1_buf;
1116 WORD32 idx = ps_proc->i4_mb_x + u4_i + 1;
1117
1118 idx = MIN(idx, (ps_proc->i4_wd_mbs - 1));
1119 pu1_buf = pu1_me_map_tp_rw + idx;
1120 if(*pu1_buf)
1121 break;
1122 ithread_yield();
1123 }
1124 }
1125
1126 ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].as_skip_mv[0]);
1127 ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
1128 ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].as_pred_mv[0]);
1129
1130 ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
1131
1132 ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad;
1133 ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1134
1135 ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX;
1136 ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX;
1137
1138 /* Set the best subpel buf to the correct mb so that the buffer can be copied */
1139 ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf;
1140 ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd;
1141
1142 /* Set the min sad conditions */
1143 ps_proc->ps_cur_mb->u4_min_sad = ps_proc->ps_codec->u4_min_sad;
1144 ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1145
1146 /* Derive neighbor availability for the current macroblock */
1147 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
1148
1149 /* init me */
1150 ih264e_init_me(ps_proc);
1151
1152 /* Compute ME according to slice type */
1153 ps_proc->ps_codec->apf_compute_me[ps_proc->i4_slice_type](ps_proc);
1154
1155 /* update top and left structs */
1156 {
1157 mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1158 mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
1159 enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
1160 enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
1161 enc_pu_t *ps_top_mv = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
1162
1163 *ps_top_left_syn = *ps_top_syn;
1164
1165 *ps_top_left_mb_pu = *ps_top_mv;
1166 *ps_left_mb_pu = *ps_proc->ps_pu;
1167 }
1168
1169 ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1170
1171 /* Copy the min sad reached info */
1172 ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached;
1173 ps_proc->ps_nmb_info[u4_i].u4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
1174
1175 /*
1176 * To make sure that the MV map is properly sync to the
1177 * cache we need to do a DDB
1178 */
1179 {
1180 DATA_SYNC();
1181
1182 pu1_me_map[ps_proc->i4_mb_x] = 1;
1183 }
1184 ps_proc->i4_mb_x++;
1185
1186 ps_proc->s_me_ctxt.u4_left_is_intra = 0;
1187 ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type == PSKIP);
1188
1189 /* update buffers pointers */
1190 ps_proc->pu1_src_buf_luma += MB_SIZE;
1191 ps_proc->pu1_rec_buf_luma += MB_SIZE;
1192 ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1193 ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1194
1195 /*
1196 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1197 * the stride per MB is MB_SIZE
1198 */
1199 ps_proc->pu1_src_buf_chroma += MB_SIZE;
1200 ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1201 ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1202 ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1203
1204
1205 ps_proc->pu4_mb_pu_cnt += 1;
1206 }
1207
1208 ps_proc->ps_pu = ps_pu_begin;
1209 ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count;
1210
1211 /* update buffers pointers */
1212 ps_proc->pu1_src_buf_luma -= MB_SIZE * u4_nmb_count;
1213 ps_proc->pu1_rec_buf_luma -= MB_SIZE * u4_nmb_count;
1214 ps_proc->apu1_ref_buf_luma[0] -= MB_SIZE * u4_nmb_count;
1215 ps_proc->apu1_ref_buf_luma[1] -= MB_SIZE * u4_nmb_count;
1216
1217 /*
1218 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1219 * the stride per MB is MB_SIZE
1220 */
1221 ps_proc->pu1_src_buf_chroma -= MB_SIZE * u4_nmb_count;
1222 ps_proc->pu1_rec_buf_chroma -= MB_SIZE * u4_nmb_count;
1223 ps_proc->apu1_ref_buf_chroma[0] -= MB_SIZE * u4_nmb_count;
1224 ps_proc->apu1_ref_buf_chroma[1] -= MB_SIZE * u4_nmb_count;
1225
1226 ps_proc->pu4_mb_pu_cnt -= u4_nmb_count;
1227 }
1228
1229
1230 /**
1231 *******************************************************************************
1232 *
1233 * @brief The function computes parameters for a BSKIP MB
1234 *
1235 * @par Description:
1236 * The function updates the skip motion vector for B Mb, check if the Mb can be
1237 * marked as skip and returns it
1238 *
1239 * @param[in] ps_proc
1240 * Pointer to process context
1241 *
1242 * @param[in] i4_reflist
1243 * Current active reference list
1244 *
1245 * @returns Flag indicating if the current Mb can be skip or not
1246 *
1247 * @remarks
1248 * The code implements the logic as described in sec 8.4.1.2.2
1249 * It also computes co-located MB parmas according to sec 8.4.1.2.1
1250 *
1251 * Need to add condition for this fucntion to be used in ME
1252 *
1253 *******************************************************************************
1254 */
ih264e_find_bskip_params_me(process_ctxt_t * ps_proc,WORD32 i4_reflist)1255 WORD32 ih264e_find_bskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
1256 {
1257 /* Colzero for co-located MB */
1258 WORD32 i4_colzeroflag;
1259
1260 /* motion vectors for neighbouring MBs */
1261 enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
1262
1263 /* Variables to check if a particular mB is available */
1264 WORD32 i4_a, i4_b, i4_c, i4_c_avail;
1265
1266 /* Mode availability, init to no modes available */
1267 WORD32 i4_mode_avail;
1268
1269 /* mb neighbor availability */
1270 block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1271
1272 /* Temp var */
1273 WORD32 i, i4_cmpl_mode, i4_skip_type = -1;
1274
1275 /*
1276 * Colocated motion vector
1277 */
1278 mv_t s_mvcol;
1279
1280 /*
1281 * Colocated picture idx
1282 */
1283 WORD32 i4_refidxcol;
1284
1285 UNUSED(i4_reflist);
1286
1287 /**************************************************************************
1288 *Find co-located MB parameters
1289 * See sec 8.4.1.2.1 for reference
1290 **************************************************************************/
1291 {
1292 /*
1293 * Find the co-located Mb and update the skip and pred appropriately
1294 * 1) Default colpic is forward ref : Table 8-6
1295 * 2) Default mb col is current MB : Table 8-8
1296 */
1297
1298 if (ps_proc->ps_colpu->b1_intra_flag)
1299 {
1300 s_mvcol.i2_mvx = 0;
1301 s_mvcol.i2_mvy = 0;
1302 i4_refidxcol = -1;
1303 }
1304 else
1305 {
1306 if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
1307 {
1308 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
1309 i4_refidxcol = 0;
1310 }
1311 else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
1312 {
1313 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
1314 i4_refidxcol = 0;
1315 }
1316 }
1317
1318 /* RefPicList1[ 0 ] is marked as "used for short-term reference", as default */
1319 i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
1320 && (ABS(s_mvcol.i2_mvy) <= 1));
1321
1322 }
1323
1324 /***************************************************************************
1325 * Evaluating skip params : Spatial Skip
1326 **************************************************************************/
1327 {
1328 /* Get the neighbouring MBS according to Section 8.4.1.2.2 */
1329 ps_a_pu = &ps_proc->s_left_mb_pu_ME;
1330 ps_b_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
1331
1332 i4_c_avail = 0;
1333 if (ps_ngbr_avbl->u1_mb_c)
1334 {
1335 ps_c_pu = &((ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x)[1]);
1336 i4_c_avail = 1;
1337 }
1338 else
1339 {
1340 ps_c_pu = &ps_proc->s_top_left_mb_pu_ME;
1341 i4_c_avail = ps_ngbr_avbl->u1_mb_d;
1342 }
1343
1344 i4_a = ps_ngbr_avbl->u1_mb_a;
1345 i4_b = ps_ngbr_avbl->u1_mb_b;
1346 i4_c = i4_c_avail;
1347
1348 /* Init to no mode avail */
1349 i4_mode_avail = 0;
1350 for (i = 0; i < 2; i++)
1351 {
1352 i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
1353
1354 i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1355 i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1356 i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1357 }
1358
1359 if (i4_mode_avail == 0x3 || i4_mode_avail == 0x0)
1360 {
1361 i4_skip_type= PRED_BI;
1362 }
1363 else if(i4_mode_avail == 0x1)
1364 {
1365 i4_skip_type = PRED_L0;
1366 }
1367 else if(i4_mode_avail == 0x2)
1368 {
1369 i4_skip_type = PRED_L1;
1370 }
1371
1372 /* Update skip MV for L0 */
1373 if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
1374 {
1375 ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
1376 ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
1377 }
1378 else
1379 {
1380 ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
1381 ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
1382 }
1383
1384 /* Update skip MV for L1 */
1385 if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
1386 {
1387 ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
1388 ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
1389 }
1390 else
1391 {
1392 ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
1393 ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
1394 }
1395
1396 }
1397
1398 /***************************************************************************
1399 * Evaluating skip params : Temporal skip
1400 **************************************************************************/
1401 {
1402 pic_buf_t * ps_ref_pic[MAX_REF_PIC_CNT];
1403 WORD32 i4_td, i4_tx, i4_tb, i4_dist_scale_factor;
1404 enc_pu_mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[2];
1405
1406 ps_ref_pic[PRED_L0] = ps_proc->aps_ref_pic[PRED_L0];
1407 ps_ref_pic[PRED_L1] = ps_proc->aps_ref_pic[PRED_L1];
1408
1409 i4_tb = ps_proc->ps_codec->i4_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
1410 i4_td = ps_ref_pic[PRED_L1]->i4_abs_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
1411
1412 i4_tb = CLIP3(-128, 127, i4_tb);
1413 i4_td = CLIP3(-128, 127, i4_td);
1414
1415 i4_tx = ( 16384 + ABS( i4_td / 2 ) ) / i4_td ;
1416 i4_dist_scale_factor = CLIP3( -1024, 1023, ( i4_tb * i4_tx + 32 ) >> 6 );
1417
1418 /* Motion vectors taken in full pel resolution , hence -> (& 0xfffc) operation */
1419 ps_skip_mv[PRED_L0].s_mv.i2_mvx = (( i4_dist_scale_factor * s_mvcol.i2_mvx + 128 ) >> 8) & 0xfffc;
1420 ps_skip_mv[PRED_L0].s_mv.i2_mvy = (( i4_dist_scale_factor * s_mvcol.i2_mvy + 128 ) >> 8) & 0xfffc;
1421
1422 ps_skip_mv[PRED_L1].s_mv.i2_mvx = (ps_skip_mv[PRED_L0].s_mv.i2_mvx - s_mvcol.i2_mvx) & 0xfffc;
1423 ps_skip_mv[PRED_L1].s_mv.i2_mvy = (ps_skip_mv[PRED_L0].s_mv.i2_mvy - s_mvcol.i2_mvy) & 0xfffc;
1424
1425 }
1426
1427 return i4_skip_type;
1428 }
1429
1430 /**
1431 *******************************************************************************
1432 *
1433 * @brief The function computes the skip motion vectoe for B mb
1434 *
1435 * @par Description:
1436 * The function gives the skip motion vector for B Mb, check if the Mb can be
1437 * marked as skip
1438 *
1439 * @param[in] ps_proc
1440 * Pointer to process context
1441 *
1442 * @param[in] i4_reflist
1443 * Dummy
1444 *
1445 * @returns Flag indicating if the current Mb can be skip or not
1446 *
1447 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
1448 * specification. It also computes co-located MB parmas according to sec 8.4.1.2.1
1449 *
1450 *******************************************************************************/
ih264e_find_bskip_params(process_ctxt_t * ps_proc,WORD32 i4_reflist)1451 WORD32 ih264e_find_bskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
1452 {
1453 /* Colzero for co-located MB */
1454 WORD32 i4_colzeroflag;
1455
1456 /* motion vectors */
1457 enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
1458
1459 /* Syntax elem */
1460 mb_info_t *ps_a_syn, *ps_b_syn, *ps_c_syn;
1461
1462 /* Variables to check if a particular mB is available */
1463 WORD32 i4_a, i4_b, i4_c, i4_c_avail;
1464
1465 /* Mode availability, init to no modes available */
1466 WORD32 i4_mode_avail;
1467
1468 /* mb neighbor availability */
1469 block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1470
1471 /* Temp var */
1472 WORD32 i, i4_cmpl_mode;
1473
1474 UNUSED(i4_reflist);
1475
1476 /**************************************************************************
1477 * Find co-locates parameters
1478 * See sec 8.4.1.2.1 for reference
1479 **************************************************************************/
1480 {
1481 /*
1482 * Find the co-located Mb and update the skip and pred appropriately
1483 * 1) Default colpic is forward ref : Table 8-6
1484 * 2) Default mb col is current MB : Table 8-8
1485 */
1486
1487 mv_t s_mvcol;
1488 WORD32 i4_refidxcol;
1489
1490 if (ps_proc->ps_colpu->b1_intra_flag)
1491 {
1492 s_mvcol.i2_mvx = 0;
1493 s_mvcol.i2_mvy = 0;
1494 i4_refidxcol = -1;
1495 }
1496 else
1497 {
1498 if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
1499 {
1500 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
1501 i4_refidxcol = 0;
1502 }
1503 else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
1504 {
1505 s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
1506 i4_refidxcol = 0;
1507 }
1508 }
1509
1510 /* RefPicList1[ 0 ] is marked as "used for short-term reference", as default */
1511 i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
1512 && (ABS(s_mvcol.i2_mvy) <= 1));
1513
1514 }
1515
1516 /***************************************************************************
1517 * Evaluating skip params
1518 **************************************************************************/
1519 /* Section 8.4.1.2.2 */
1520 ps_a_syn = &ps_proc->s_left_mb_syntax_ele;
1521 ps_a_pu = &ps_proc->s_left_mb_pu;
1522
1523 ps_b_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1524 ps_b_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
1525
1526 i4_c_avail = 0;
1527 if (ps_ngbr_avbl->u1_mb_c)
1528 {
1529 ps_c_syn = &((ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x)[1]);
1530 ps_c_pu = &((ps_proc->ps_top_row_pu + ps_proc->i4_mb_x)[1]);
1531 i4_c_avail = 1;
1532 }
1533 else
1534 {
1535 ps_c_syn = &(ps_proc->s_top_left_mb_syntax_ele);
1536 ps_c_pu = &ps_proc->s_top_left_mb_pu;
1537 i4_c_avail = ps_ngbr_avbl->u1_mb_d;
1538 }
1539
1540
1541 i4_a = ps_ngbr_avbl->u1_mb_a;
1542 i4_a &= !ps_a_syn->u2_is_intra;
1543
1544 i4_b = ps_ngbr_avbl->u1_mb_b;
1545 i4_b &= !ps_b_syn->u2_is_intra;
1546
1547 i4_c = i4_c_avail;
1548 i4_c &= !ps_c_syn->u2_is_intra;
1549
1550 /* Init to no mode avail */
1551 i4_mode_avail = 0;
1552 for (i = 0; i < 2; i++)
1553 {
1554 i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
1555
1556 i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1557 i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1558 i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1559 }
1560
1561 /* Update skip MV for L0 */
1562 if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
1563 {
1564 ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
1565 ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
1566 }
1567 else
1568 {
1569 ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
1570 ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
1571 }
1572
1573 /* Update skip MV for L1 */
1574 if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
1575 {
1576 ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
1577 ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
1578 }
1579 else
1580 {
1581 ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
1582 ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
1583 }
1584
1585 /* Now see if the ME information matches the SKIP information */
1586 switch (ps_proc->ps_pu->b2_pred_mode)
1587 {
1588 case PRED_BI:
1589 if ( (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
1590 && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
1591 && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
1592 && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
1593 && (i4_mode_avail == 0x3 || i4_mode_avail == 0x0))
1594 {
1595 return 1;
1596 }
1597 break;
1598
1599 case PRED_L0:
1600 if ( (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
1601 && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
1602 && (i4_mode_avail == 0x1))
1603 {
1604 return 1;
1605 }
1606 break;
1607
1608 case PRED_L1:
1609 if ( (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
1610 && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
1611 && (i4_mode_avail == 0x2))
1612 {
1613 return 1;
1614 }
1615 break;
1616 }
1617
1618 return 0;
1619 }
1620
1621
1622 /**
1623 *******************************************************************************
1624 *
1625 * @brief This function computes the best motion vector among the tentative mv
1626 * candidates chosen.
1627 *
1628 * @par Description:
1629 * This function determines the position in the search window at which the motion
1630 * estimation should begin in order to minimise the number of search iterations.
1631 *
1632 * @param[in] ps_me_ctxt
1633 * pointer to me context
1634 *
1635 * @param[in] ps_proc
1636 * process context
1637 *
1638 * @param[in] ps_mb_ctxt_bi
1639 * pointer to current mb partition ctxt with respect to ME
1640 *
1641 * @returns mv pair & corresponding distortion and cost
1642 *
1643 * @remarks Currently only 4 search candiates are supported
1644 *
1645 *******************************************************************************
1646 */
ih264e_evaluate_bipred(me_ctxt_t * ps_me_ctxt,process_ctxt_t * ps_proc,mb_part_ctxt * ps_mb_ctxt_bi)1647 void ih264e_evaluate_bipred(me_ctxt_t *ps_me_ctxt,
1648 process_ctxt_t *ps_proc,
1649 mb_part_ctxt *ps_mb_ctxt_bi)
1650 {
1651
1652 UWORD32 i, u4_fast_sad;
1653
1654 WORD32 i4_dest_buff;
1655
1656 mv_t *ps_l0_pred_mv, *ps_l1_pred_mv, s_l0_mv, s_l1_mv;
1657
1658 UWORD8 *pu1_ref_mb_l0, *pu1_ref_mb_l1;
1659
1660 UWORD8 *pu1_dst_buf;
1661
1662 WORD32 i4_ref_l0_stride, i4_ref_l1_stride;
1663
1664 WORD32 i4_mb_distortion, i4_mb_cost;
1665
1666 u4_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
1667
1668 i4_dest_buff = 0;
1669
1670 for (i = 0; i < ps_me_ctxt->u4_num_candidates[PRED_BI]; i += 2)
1671 {
1672 pu1_dst_buf = ps_me_ctxt->apu1_subpel_buffs[i4_dest_buff];
1673
1674 s_l0_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx >> 2;
1675 s_l0_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy >> 2;
1676 s_l1_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx >> 2;
1677 s_l1_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy >> 2;
1678
1679 ps_l0_pred_mv = &ps_proc->ps_pred_mv[PRED_L0].s_mv;
1680 ps_l1_pred_mv = &ps_proc->ps_pred_mv[PRED_L1].s_mv;
1681
1682 if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx & 0x3)||
1683 (ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy & 0x3))
1684 {
1685 pu1_ref_mb_l0 = ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf;
1686 i4_ref_l0_stride = ps_me_ctxt->u4_subpel_buf_strd;
1687 }
1688 else
1689 {
1690 pu1_ref_mb_l0 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0] + (s_l0_mv.i2_mvx) + ((s_l0_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
1691 i4_ref_l0_stride = ps_me_ctxt->i4_rec_strd;
1692 }
1693
1694
1695 if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx & 0x3) ||
1696 (ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy & 0x3))
1697 {
1698 pu1_ref_mb_l1 = ps_me_ctxt->as_mb_part[PRED_L1].pu1_best_hpel_buf;
1699 i4_ref_l1_stride = ps_me_ctxt->u4_subpel_buf_strd;
1700 }
1701 else
1702 {
1703 pu1_ref_mb_l1 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L1] + (s_l1_mv.i2_mvx) + ((s_l1_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
1704 i4_ref_l1_stride = ps_me_ctxt->i4_rec_strd;
1705 }
1706
1707 ps_proc->ps_codec->pf_inter_pred_luma_bilinear(
1708 pu1_ref_mb_l0, pu1_ref_mb_l1, pu1_dst_buf,
1709 i4_ref_l0_stride, i4_ref_l1_stride,
1710 ps_me_ctxt->u4_subpel_buf_strd, MB_SIZE, MB_SIZE);
1711
1712 ps_me_ctxt->pf_ime_compute_sad_16x16[u4_fast_sad](
1713 ps_me_ctxt->pu1_src_buf_luma, pu1_dst_buf,
1714 ps_me_ctxt->i4_src_strd, ps_me_ctxt->u4_subpel_buf_strd,
1715 INT_MAX, &i4_mb_distortion);
1716
1717 /* compute cost */
1718 i4_mb_cost = ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx - ps_l0_pred_mv->i2_mvx];
1719 i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy - ps_l0_pred_mv->i2_mvy];
1720 i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx - ps_l1_pred_mv->i2_mvx];
1721 i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy - ps_l1_pred_mv->i2_mvy];
1722
1723 i4_mb_cost -= (ps_me_ctxt->i4_skip_bias[BSLICE]) * (ps_me_ctxt->i4_skip_type == PRED_BI) * (i == 0);
1724
1725
1726 i4_mb_cost *= ps_me_ctxt->u4_lambda_motion;
1727 i4_mb_cost += i4_mb_distortion;
1728
1729 if (i4_mb_cost < ps_mb_ctxt_bi->i4_mb_cost)
1730 {
1731 ps_mb_ctxt_bi->i4_srch_pos_idx = (i>>1);
1732 ps_mb_ctxt_bi->i4_mb_cost = i4_mb_cost;
1733 ps_mb_ctxt_bi->i4_mb_distortion = i4_mb_distortion;
1734 ps_mb_ctxt_bi->pu1_best_hpel_buf = pu1_dst_buf;
1735 i4_dest_buff = (i4_dest_buff + 1) % 2;
1736 }
1737 }
1738
1739 }
1740
1741 /**
1742 *******************************************************************************
1743 *
1744 * @brief This function performs motion estimation for the current mb
1745 *
1746 * @par Description:
1747 * The current mb is compared with a list of mb's in the reference frame for
1748 * least cost. The mb that offers least cost is chosen as predicted mb and the
1749 * displacement of the predicted mb from index location of the current mb is
1750 * signaled as mv. The list of the mb's that are chosen in the reference frame
1751 * are dependent on the speed of the ME configured.
1752 *
1753 * @param[in] ps_proc
1754 * Process context corresponding to the job
1755 *
1756 * @returns motion vector of the pred mb, sad, cost.
1757 *
1758 * @remarks none
1759 *
1760 *******************************************************************************
1761 */
ih264e_compute_me_multi_reflist(process_ctxt_t * ps_proc)1762 void ih264e_compute_me_multi_reflist(process_ctxt_t *ps_proc)
1763 {
1764 /* me ctxt */
1765 me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
1766
1767 /* codec context */
1768 codec_t *ps_codec = ps_proc->ps_codec;
1769
1770 /* Temp variables for looping over ref lists */
1771 WORD32 i4_reflist, i4_max_reflist;
1772
1773 /* recon stride */
1774 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1775
1776 /* source buffer for halp pel generation functions */
1777 UWORD8 *pu1_hpel_src;
1778
1779 /* quantization parameters */
1780 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1781
1782 /* Mb part ctxts for SKIP */
1783 mb_part_ctxt as_skip_mbpart[2];
1784
1785 /* Sad therholds */
1786 ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
1787
1788 {
1789 WORD32 rows_above, rows_below, columns_left, columns_right;
1790
1791 /* During evaluation for motion vectors do not search through padded regions */
1792 /* Obtain number of rows and columns that are effective for computing for me evaluation */
1793 rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
1794 rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
1795 columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
1796 columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
1797
1798 /* init srch range */
1799 /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
1800 * on all sides.
1801 */
1802 ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1803 ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1804 ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1805 ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1806
1807 /* this is to facilitate fast sub pel computation with minimal loads */
1808 if (ps_me_ctxt->u4_enable_hpel)
1809 {
1810 ps_me_ctxt->i4_srch_range_w += 1;
1811 ps_me_ctxt->i4_srch_range_e -= 1;
1812 ps_me_ctxt->i4_srch_range_n += 1;
1813 ps_me_ctxt->i4_srch_range_s -= 1;
1814 }
1815 }
1816
1817 /* Compute ME and store the MVs */
1818 {
1819 /***********************************************************************
1820 * Compute ME for lists L0 and L1
1821 * For L0 -> L0 skip + L0
1822 * for L1 -> L0 skip + L0 + L1 skip + L1
1823 ***********************************************************************/
1824 i4_max_reflist = (ps_proc->i4_slice_type == PSLICE) ? PRED_L0 : PRED_L1;
1825
1826 /* Init SATQD for the current list */
1827 ps_me_ctxt->u4_min_sad_reached = 0;
1828 ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
1829
1830 for (i4_reflist = PRED_L0; i4_reflist <= i4_max_reflist; i4_reflist++)
1831 {
1832
1833 /* Get the seed motion vector candidates */
1834 ih264e_get_search_candidates(ps_proc, ps_me_ctxt, i4_reflist);
1835
1836 /* ****************************************************************
1837 *Evaluate the SKIP for current list
1838 * ****************************************************************/
1839 as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx = 0;
1840 as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy = 0;
1841 as_skip_mbpart[i4_reflist].i4_mb_cost = INT_MAX;
1842 as_skip_mbpart[i4_reflist].i4_mb_distortion = INT_MAX;
1843
1844 if (ps_me_ctxt->i4_skip_type == i4_reflist)
1845 {
1846 ime_compute_skip_cost( ps_me_ctxt,
1847 (ime_mv_t *)(&ps_proc->ps_skip_mv[i4_reflist].s_mv),
1848 &as_skip_mbpart[i4_reflist],
1849 ps_proc->ps_codec->s_cfg.u4_enable_satqd,
1850 i4_reflist,
1851 (ps_proc->i4_slice_type == BSLICE) );
1852 }
1853
1854 as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx <<= 2;
1855 as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy <<= 2;
1856
1857 /******************************************************************
1858 * Evaluate ME For current list
1859 *****************************************************************/
1860 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx = 0;
1861 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy = 0;
1862 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = INT_MAX;
1863 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = INT_MAX;
1864
1865 /* Init Hpel */
1866 ps_me_ctxt->as_mb_part[i4_reflist].pu1_best_hpel_buf = NULL;
1867
1868 /* In case we found out the minimum SAD, exit the ME eval */
1869 if (ps_me_ctxt->u4_min_sad_reached)
1870 {
1871 i4_max_reflist = i4_reflist;
1872 break;
1873 }
1874
1875
1876 /* Evaluate search candidates for initial mv pt */
1877 ime_evaluate_init_srchposn_16x16(ps_me_ctxt, i4_reflist);
1878
1879 /********************************************************************/
1880 /* full pel motion estimation */
1881 /********************************************************************/
1882 ime_full_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
1883
1884 DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2),
1885 (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2));
1886
1887 DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion, 1);
1888
1889 /* Scale the MV to qpel resolution */
1890 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx <<= 2;
1891 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy <<= 2;
1892
1893 if (ps_me_ctxt->u4_enable_hpel)
1894 {
1895 /* moving src pointer to the converged motion vector location */
1896 pu1_hpel_src = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]
1897 + (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2)
1898 + ((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2)* i4_rec_strd);
1899
1900 ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
1901 ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
1902 ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
1903
1904 /* Init the search position to an invalid number */
1905 ps_me_ctxt->as_mb_part[i4_reflist].i4_srch_pos_idx = 3;
1906
1907 /* Incase a buffer is still in use by L0, replace it with spare buff */
1908 ps_me_ctxt->apu1_subpel_buffs[ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx] =
1909 ps_proc->apu1_subpel_buffs[3];
1910
1911
1912 ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
1913
1914 /* half pel search is done for both sides of full pel,
1915 * hence half_x of width x height = 17x16 is created
1916 * starting from left half_x of converged full pel */
1917 pu1_hpel_src -= 1;
1918
1919 /* computing half_x */
1920 ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
1921 ps_me_ctxt->apu1_subpel_buffs[0],
1922 i4_rec_strd,
1923 ps_me_ctxt->u4_subpel_buf_strd);
1924
1925 /*
1926 * Halfpel search is done for both sides of full pel,
1927 * hence half_y of width x height = 16x17 is created
1928 * starting from top half_y of converged full pel
1929 * for half_xy top_left is required
1930 * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
1931 */
1932 pu1_hpel_src -= i4_rec_strd;
1933
1934 /* computing half_y and half_xy */
1935 ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1936 pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
1937 ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
1938 ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
1939 ps_me_ctxt->u4_subpel_buf_strd);
1940
1941 ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
1942
1943 }
1944 }
1945
1946 /***********************************************************************
1947 * If a particular skiip Mv is giving better sad, copy to the corresponding
1948 * MBPART
1949 * In B slices this loop should go only to PREDL1: If we found min sad
1950 * we will go to the skip ref list only
1951 * Have to find a way to make it without too much change or new vars
1952 **********************************************************************/
1953 for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
1954 {
1955 if (as_skip_mbpart[i4_reflist].i4_mb_cost < ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost)
1956 {
1957 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = as_skip_mbpart[i4_reflist].i4_mb_cost;
1958 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = as_skip_mbpart[i4_reflist].i4_mb_distortion;
1959 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr = as_skip_mbpart[i4_reflist].s_mv_curr;
1960 }
1961 }
1962
1963 /***********************************************************************
1964 * Compute ME for BI
1965 * In case of BI we do ME for two candidates
1966 * 1) The best L0 and L1 Mvs
1967 * 2) Skip L0 and L1 MVs
1968 *
1969 * TODO
1970 * one of the search candidates is skip. Hence it may be duplicated
1971 ***********************************************************************/
1972 if (i4_max_reflist == PRED_L1 && ps_me_ctxt->u4_min_sad_reached == 0)
1973 {
1974 WORD32 i, j = 0;
1975 WORD32 l0_srch_pos_idx, l1_srch_pos_idx;
1976 WORD32 i4_l0_skip_mv_idx, i4_l1_skip_mv_idx;
1977
1978 /* Get the free buffers */
1979 l0_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx;
1980 l1_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L1].i4_srch_pos_idx;
1981
1982 /* Search for the two free buffers in subpel list */
1983 for (i = 0; i < SUBPEL_BUFF_CNT; i++)
1984 {
1985 if (i != l0_srch_pos_idx && i != l1_srch_pos_idx)
1986 {
1987 ps_me_ctxt->apu1_subpel_buffs[j] = ps_proc->apu1_subpel_buffs[i];
1988 j++;
1989 }
1990 }
1991 ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
1992
1993 /* Copy the statial SKIP MV of each list */
1994 i4_l0_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L0] - 2;
1995 i4_l1_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L1] - 2;
1996 ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
1997 ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
1998 ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
1999 ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2000
2001 /* Copy the SKIP MV temporal of each list */
2002 i4_l0_skip_mv_idx++;
2003 i4_l1_skip_mv_idx++;
2004 ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2005 ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2006 ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2007 ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2008
2009 /* Copy the best MV after ME */
2010 ps_me_ctxt->as_mv_init_search[PRED_BI][4] = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr;
2011 ps_me_ctxt->as_mv_init_search[PRED_BI][5] = ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr;
2012
2013 ps_me_ctxt->u4_num_candidates[PRED_BI] = 6;
2014
2015 ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_cost = INT_MAX;
2016 ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_distortion = INT_MAX;
2017
2018 ih264e_evaluate_bipred(ps_me_ctxt, ps_proc,
2019 &ps_me_ctxt->as_mb_part[PRED_BI]);
2020
2021 i4_max_reflist = PRED_BI;
2022 }
2023
2024 /**********************************************************************
2025 * Now get the minimum of MB part sads by searching over all ref lists
2026 **********************************************************************/
2027 ps_proc->ps_pu->b2_pred_mode = 0x3;
2028
2029 for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
2030 {
2031 if (ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
2032 {
2033 ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost;
2034 ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion;
2035 ps_proc->ps_cur_mb->u4_mb_type = (ps_proc->i4_slice_type == PSLICE) ? P16x16 : B16x16;
2036 ps_proc->ps_pu->b2_pred_mode = i4_reflist ;
2037 }
2038 }
2039
2040 /**********************************************************************
2041 * In case we have a BI MB, we have to copy the buffers and set proer MV's
2042 * 1)In case its BI, we need to get the best MVs given by BI and update
2043 * to their corresponding MB part
2044 * 2)We also need to copy the buffer in which bipred buff is populated
2045 *
2046 * Not that if we have
2047 **********************************************************************/
2048 if (ps_proc->ps_pu->b2_pred_mode == PRED_BI)
2049 {
2050 WORD32 i4_srch_pos = ps_me_ctxt->as_mb_part[PRED_BI].i4_srch_pos_idx;
2051 UWORD8 *pu1_bi_buf = ps_me_ctxt->as_mb_part[PRED_BI].pu1_best_hpel_buf;
2052
2053 ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][i4_srch_pos << 1];
2054 ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][(i4_srch_pos << 1) + 1];
2055
2056 /* Now we have to copy the buffers */
2057 ps_codec->pf_inter_pred_luma_copy(pu1_bi_buf,
2058 ps_proc->pu1_best_subpel_buf,
2059 ps_me_ctxt->u4_subpel_buf_strd,
2060 ps_proc->u4_bst_spel_buf_strd,
2061 MB_SIZE, MB_SIZE, NULL, 0);
2062
2063 }
2064 else if (ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf)
2065 {
2066 /* Now we have to copy the buffers */
2067 ps_codec->pf_inter_pred_luma_copy(
2068 ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf,
2069 ps_proc->pu1_best_subpel_buf,
2070 ps_me_ctxt->u4_subpel_buf_strd,
2071 ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
2072 NULL, 0);
2073 }
2074 }
2075
2076 /**************************************************************************
2077 *Now copy the MVs to the current PU with qpel scaling
2078 ***************************************************************************/
2079 ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx);
2080 ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy);
2081 ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvx);
2082 ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvy);
2083
2084
2085 ps_proc->ps_pu->s_me_info[0].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L1)? -1:0;
2086 ps_proc->ps_pu->s_me_info[1].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L0)? -1:0;
2087
2088 /* number of partitions */
2089 ps_proc->u4_num_sub_partitions = 1;
2090 *(ps_proc->pu4_mb_pu_cnt) = 1;
2091
2092 /* position in-terms of PU */
2093 ps_proc->ps_pu->b4_pos_x = 0;
2094 ps_proc->ps_pu->b4_pos_y = 0;
2095
2096 /* PU size */
2097 ps_proc->ps_pu->b4_wd = 3;
2098 ps_proc->ps_pu->b4_ht = 3;
2099
2100 /* Update min sad conditions */
2101 if (ps_me_ctxt->u4_min_sad_reached == 1)
2102 {
2103 ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
2104 ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
2105 }
2106 }
2107
2108