xref: /aosp_15_r20/external/libavc/encoder/ih264e_mc.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  ih264e_mc.c
25 *
26 * @brief
27 *  Contains definition of functions for motion compensation
28 *
29 * @author
30 *  ittiam
31 *
32 * @par List of Functions:
33 *  - ih264e_motion_comp_luma
34 *  - ih264e_motion_comp_chroma
35 *
36 * @remarks
37 *  none
38 *
39 *******************************************************************************
40 */
41 
42 /*****************************************************************************/
43 /* File Includes                                                             */
44 /*****************************************************************************/
45 
46 /* System Include Files */
47 #include <stdio.h>
48 
49 /* User Include Files */
50 #include "ih264_typedefs.h"
51 #include "iv2.h"
52 #include "ive2.h"
53 
54 #include "ih264_defs.h"
55 #include "ih264_mem_fns.h"
56 #include "ih264_padding.h"
57 #include "ih264_structs.h"
58 #include "ih264_trans_quant_itrans_iquant.h"
59 #include "ih264_inter_pred_filters.h"
60 #include "ih264_intra_pred_filters.h"
61 #include "ih264_deblk_edge_filters.h"
62 #include "ih264_cabac_tables.h"
63 
64 #include "ime_defs.h"
65 #include "ime_distortion_metrics.h"
66 #include "ime_structs.h"
67 
68 #include "irc_cntrl_param.h"
69 #include "irc_frame_info_collector.h"
70 
71 #include "ih264e_error.h"
72 #include "ih264e_defs.h"
73 #include "ih264e_rate_control.h"
74 #include "ih264e_bitstream.h"
75 #include "ih264e_cabac_structs.h"
76 #include "ih264e_structs.h"
77 #include "ih264e_mc.h"
78 #include "ih264e_half_pel.h"
79 
80 
81 /*****************************************************************************/
82 /* Function Definitions                                                      */
83 /*****************************************************************************/
84 
85 /**
86 ******************************************************************************
87 *
88 * @brief
89 *  performs motion compensation for a luma mb for the given mv.
90 *
91 * @par Description
92 *  This routine performs motion compensation of an inter mb. When the inter
93 *  mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
94 *  to pred buffer. In this case the function returns pointer and stride of the
95 *  ref. buffer and this info is used in place of pred buffer else where.
96 *  In other cases, the pred buffer is populated via copy / filtering + copy
97 *  (q pel cases) and returned.
98 *
99 * @param[in] ps_proc
100 *  pointer to current proc ctxt
101 *
102 * @param[out] pu1_pseudo_pred
103 *  pseudo prediction buffer
104 *
105 * @param[out] u4_pseudo_pred_strd
106 *  pseudo pred buffer stride
107 *
108 * @return  none
109 *
110 * @remarks Assumes half pel buffers for the entire frame are populated.
111 *
112 ******************************************************************************
113 */
ih264e_motion_comp_luma(process_ctxt_t * ps_proc,UWORD8 ** pu1_pseudo_pred,WORD32 * pi4_pseudo_pred_strd)114 void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, UWORD8 **pu1_pseudo_pred,
115                              WORD32 *pi4_pseudo_pred_strd)
116 {
117     /* codec context */
118     codec_t *ps_codec = ps_proc->ps_codec;
119 
120     /* me ctxt */
121     me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
122 
123     /* Pointer to the structure having motion vectors, size and position of curr partitions */
124     enc_pu_t *ps_curr_pu;
125 
126     /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */
127     UWORD8 *pu1_ref[4];
128 
129     /* pred buffer ptr */
130     UWORD8 *pu1_pred;
131 
132     /* strides of full pel, half pel x, half pel y, half pel xy reference buffer */
133     WORD32 i4_ref_strd[4];
134 
135     /* pred buffer stride */
136     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
137 
138     /* full pel motion vectors */
139     WORD32 u4_mv_x_full, u4_mv_y_full;
140 
141     /* half pel motion vectors */
142     WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
143 
144     /* quarter pel motion vectors */
145     WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
146 
147     /* width & height of the partition */
148     UWORD32 wd, ht;
149 
150     /* partition idx */
151     UWORD32 u4_num_prtn;
152 
153     /* half / qpel coefficient */
154     UWORD32 u4_subpel_factor;
155 
156     /* BIPRED Flag */
157     WORD32 i4_bipred_flag;
158 
159     /* temp var */
160     UWORD32 u4_lkup_idx1;
161 
162     /* Init */
163     i4_ref_strd[0] = ps_proc->i4_rec_strd;
164 
165     i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] =
166                     ps_me_ctxt->u4_subpel_buf_strd;
167 
168     for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions;
169          u4_num_prtn++)
170     {
171         mv_t *ps_curr_mv;
172 
173         /* update ptr to curr partition */
174         ps_curr_pu = ps_proc->ps_pu + u4_num_prtn;
175 
176         /* Set no no bipred */
177         i4_bipred_flag = 0;
178 
179         switch (ps_curr_pu->b2_pred_mode)
180         {
181             case PRED_L0:
182                 ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
183                 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
184                 break;
185 
186             case PRED_L1:
187                 ps_curr_mv = &ps_curr_pu->s_me_info[1].s_mv;
188                 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[1];
189                 break;
190 
191             case PRED_BI:
192                 /*
193                  * In case of PRED_BI, we only need to ensure that
194                  * the reference buffer that gets selected is
195                  * ps_proc->pu1_best_subpel_buf
196                  */
197 
198                 /* Dummy */
199                 ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
200                 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
201 
202                 i4_bipred_flag = 1;
203                 break;
204 
205             default:
206                 ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
207                 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
208                 break;
209 
210         }
211 
212         /* get full pel mv's (full pel units) */
213         u4_mv_x_full = ps_curr_mv->i2_mvx >> 2;
214         u4_mv_y_full = ps_curr_mv->i2_mvy >> 2;
215 
216         /* get half pel mv's */
217         u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
218         u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
219 
220         /* get quarter pel mv's */
221         u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
222         u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
223 
224         /* width and height of partition */
225         wd = (ps_curr_pu->b4_wd + 1) << 2;
226         ht = (ps_curr_pu->b4_ht + 1) << 2;
227 
228         /* decision ? qpel/hpel, fpel */
229         u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2)
230                         + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
231 
232         /* Move ref to position given by MV */
233         pu1_ref[0] += ((u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full);
234 
235         /* Sub pel ptrs/ Biperd pointers init */
236         pu1_ref[1] = ps_proc->pu1_best_subpel_buf;
237         i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd;
238 
239         /* update pred buff ptr */
240         pu1_pred = ps_proc->pu1_pred_mb
241                         + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
242                         + 4 * ps_curr_pu->b4_pos_x;
243 
244         /* u4_lkup_idx1 will be non zero for half pel and bipred */
245         u4_lkup_idx1 = ((u4_subpel_factor >> 2) != 0) || i4_bipred_flag;
246 
247         {
248             /********************************************************************/
249             /* if the block is P16x16 MB and mv are not quarter pel motion      */
250             /* vectors, there is no need to copy 16x16 unit from reference frame*/
251             /* to pred buffer. We might as well send the reference frame buffer */
252             /* pointer as pred buffer (ofc with updated stride) to fwd transform*/
253             /* and inverse transform unit.                                      */
254             /********************************************************************/
255             if (ps_proc->u4_num_sub_partitions == 1)
256             {
257                 *pu1_pseudo_pred = pu1_ref[u4_lkup_idx1];
258                 *pi4_pseudo_pred_strd = i4_ref_strd[u4_lkup_idx1];
259 
260             }
261             /*
262              * Copying half pel or full pel to prediction buffer
263              * Currently ps_proc->u4_num_sub_partitions will always be 1 as we
264              * only support 16x16 in P mbs
265              */
266             else
267             {
268                 ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1],
269                                                   pu1_pred,
270                                                   i4_ref_strd[u4_lkup_idx1],
271                                                   i4_pred_strd, ht, wd, NULL,
272                                                   0);
273             }
274         }
275     }
276 }
277 
278 /**
279 ******************************************************************************
280 *
281 * @brief
282 *  performs motion compensation for chroma mb
283 *
284 * @par   Description
285 *  Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
286 *  according to the motion vectors given
287 *
288 * @param[in] ps_proc
289 *  pointer to current proc ctxt
290 *
291 * @return  none
292 *
293 * @remarks Assumes half pel and quarter pel buffers for the entire frame are
294 *  populated.
295 ******************************************************************************
296 */
ih264e_motion_comp_chroma(process_ctxt_t * ps_proc)297 void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc)
298 {
299     /* codec context */
300     codec_t *ps_codec = ps_proc->ps_codec;
301 
302     /* Pointer to the structure having motion vectors, size and position of curr partitions */
303     enc_pu_t *ps_curr_pu;
304 
305     /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */
306     UWORD8 *pu1_ref;
307 
308     /* pred buffer ptr */
309     UWORD8 *pu1_pred;
310 
311     /* strides of full pel reference buffer */
312     WORD32 i4_ref_strd = ps_proc->i4_rec_strd;
313 
314     /* pred buffer stride */
315     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
316 
317     /* full pel motion vectors */
318     WORD32 u4_mv_x_full, u4_mv_y_full;
319 
320     /* half pel motion vectors */
321     WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
322 
323     /* quarter pel motion vectors */
324     WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
325 
326     /* width & height of the partition */
327     UWORD32 wd, ht;
328 
329     /* partition idx */
330     UWORD32 u4_num_prtn;
331 
332     WORD32 u4_mv_x;
333     WORD32 u4_mv_y;
334     UWORD8 u1_dx, u1_dy;
335 
336     for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions;
337          u4_num_prtn++)
338     {
339         mv_t *ps_curr_mv;
340 
341         ps_curr_pu = ps_proc->ps_pu + u4_num_prtn;
342 
343         if (ps_curr_pu->b2_pred_mode != PRED_BI)
344         {
345             ps_curr_mv = &ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv;
346             pu1_ref = ps_proc->apu1_ref_buf_chroma[ps_curr_pu->b2_pred_mode];
347 
348             u4_mv_x = ps_curr_mv->i2_mvx >> 3;
349             u4_mv_y = ps_curr_mv->i2_mvy >> 3;
350 
351             /*  corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed wiith dx, dy =4 */
352             u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
353             u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
354 
355             /* get half pel mv's */
356             u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
357             u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
358 
359             /* get quarter pel mv's */
360             u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
361             u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
362 
363             /* width and height of sub macro block */
364             wd = (ps_curr_pu->b4_wd + 1) << 1;
365             ht = (ps_curr_pu->b4_ht + 1) << 1;
366 
367             /* move the pointers so that they point to the motion compensated locations */
368             pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
369 
370             pu1_pred = ps_proc->pu1_pred_mb
371                             + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
372                             + 2 * ps_curr_pu->b4_pos_x;
373 
374             u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
375             u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
376 
377             /* cases where u1_dx = 0 or u1_dy = 0 are dealt separately in neon with
378              * separate functions for better performance
379              *
380              * ih264_inter_pred_chroma_dx_zero_a9q
381              * and
382              * ih264_inter_pred_chroma_dy_zero_a9q
383              */
384 
385             ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd,
386                                            i4_pred_strd, u1_dx, u1_dy, ht, wd);
387         }
388         else /* If the pred mode is PRED_BI */
389         {
390             /*
391              * We need to interpolate the L0 and L1 ref pics with the chorma MV
392              * then use them to average for bilinrar interpred
393              */
394             WORD32 i4_predmode;
395             UWORD8 *pu1_ref_buf[2];
396 
397             /* Temporary buffers to store the interpolated value from L0 and L1 */
398             pu1_ref_buf[PRED_L0] = ps_proc->apu1_subpel_buffs[0];
399             pu1_ref_buf[PRED_L1] = ps_proc->apu1_subpel_buffs[1];
400 
401 
402             for (i4_predmode = 0; i4_predmode < PRED_BI; i4_predmode++)
403             {
404                 ps_curr_mv = &ps_curr_pu->s_me_info[i4_predmode].s_mv;
405                 pu1_ref = ps_proc->apu1_ref_buf_chroma[i4_predmode];
406 
407                 u4_mv_x = ps_curr_mv->i2_mvx >> 3;
408                 u4_mv_y = ps_curr_mv->i2_mvy >> 3;
409 
410                 /*
411                  * corresponds to full pel motion vector in luma, but in chroma
412                  * corresponds to pel formed wiith dx, dy =4
413                  */
414                 u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
415                 u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
416 
417                 /* get half pel mv's */
418                 u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
419                 u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
420 
421                 /* get quarter pel mv's */
422                 u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
423                 u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
424 
425                 /* width and height of sub macro block */
426                 wd = (ps_curr_pu->b4_wd + 1) << 1;
427                 ht = (ps_curr_pu->b4_ht + 1) << 1;
428 
429                 /* move the pointers so that they point to the motion compensated locations */
430                 pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
431 
432                 pu1_pred = ps_proc->pu1_pred_mb
433                                 + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
434                                 + 2 * ps_curr_pu->b4_pos_x;
435 
436                 u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1)
437                                 + (u4_mv_x_qpel);
438                 u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1)
439                                 + (u4_mv_y_qpel);
440 
441                 ps_codec->pf_inter_pred_chroma(pu1_ref,
442                                                pu1_ref_buf[i4_predmode],
443                                                i4_ref_strd, MB_SIZE, u1_dx,
444                                                u1_dy, ht, wd);
445             }
446 
447             ps_codec->pf_inter_pred_luma_bilinear(pu1_ref_buf[PRED_L0],
448                                                   pu1_ref_buf[PRED_L1], pu1_pred,
449                                                   MB_SIZE, MB_SIZE,
450                                                   i4_pred_strd, MB_SIZE >> 1,
451                                                   MB_SIZE);
452         }
453     }
454 }
455