1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar *****************************************************************************
18*c83a76b0SSuyog Pawar * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar
21*c83a76b0SSuyog Pawar /*!
22*c83a76b0SSuyog Pawar ******************************************************************************
23*c83a76b0SSuyog Pawar * \file ihevce_sub_pic_rc.c
24*c83a76b0SSuyog Pawar *
25*c83a76b0SSuyog Pawar * \brief
26*c83a76b0SSuyog Pawar *
27*c83a76b0SSuyog Pawar * \date
28*c83a76b0SSuyog Pawar * 18/09/2012
29*c83a76b0SSuyog Pawar *
30*c83a76b0SSuyog Pawar * \author
31*c83a76b0SSuyog Pawar * Ittiam
32*c83a76b0SSuyog Pawar *
33*c83a76b0SSuyog Pawar * List of Functions
34*c83a76b0SSuyog Pawar *
35*c83a76b0SSuyog Pawar ******************************************************************************
36*c83a76b0SSuyog Pawar */
37*c83a76b0SSuyog Pawar
38*c83a76b0SSuyog Pawar /*****************************************************************************/
39*c83a76b0SSuyog Pawar /* File Includes */
40*c83a76b0SSuyog Pawar /*****************************************************************************/
41*c83a76b0SSuyog Pawar /* System include files */
42*c83a76b0SSuyog Pawar #include <stdio.h>
43*c83a76b0SSuyog Pawar #include <string.h>
44*c83a76b0SSuyog Pawar #include <stdlib.h>
45*c83a76b0SSuyog Pawar #include <assert.h>
46*c83a76b0SSuyog Pawar #include <stdarg.h>
47*c83a76b0SSuyog Pawar #include <math.h>
48*c83a76b0SSuyog Pawar
49*c83a76b0SSuyog Pawar /* User include files */
50*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
51*c83a76b0SSuyog Pawar #include "itt_video_api.h"
52*c83a76b0SSuyog Pawar #include "ihevce_api.h"
53*c83a76b0SSuyog Pawar
54*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
55*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
56*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
57*c83a76b0SSuyog Pawar
58*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
59*c83a76b0SSuyog Pawar #include "ihevc_debug.h"
60*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
61*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
62*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
63*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
64*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
65*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
66*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
67*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
68*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
69*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
70*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
71*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
72*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
73*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
74*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
75*c83a76b0SSuyog Pawar
76*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
77*c83a76b0SSuyog Pawar #include "ihevce_buffer_que_interface.h"
78*c83a76b0SSuyog Pawar #include "ihevce_hle_interface.h"
79*c83a76b0SSuyog Pawar #include "ihevce_hle_q_func.h"
80*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
81*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
82*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
83*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
84*c83a76b0SSuyog Pawar #include "ihevce_error_checks.h"
85*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
86*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
87*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
88*c83a76b0SSuyog Pawar #include "ihevce_trace.h"
89*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
90*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
91*c83a76b0SSuyog Pawar #include "ihevce_global_tables.h"
92*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
93*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
94*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
95*c83a76b0SSuyog Pawar #include "ihevce_entropy_interface.h"
96*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
97*c83a76b0SSuyog Pawar #include "hme_datatype.h"
98*c83a76b0SSuyog Pawar #include "hme_interface.h"
99*c83a76b0SSuyog Pawar #include "hme_common_defs.h"
100*c83a76b0SSuyog Pawar #include "hme_defs.h"
101*c83a76b0SSuyog Pawar #include "ihevce_rc_enc_structs.h"
102*c83a76b0SSuyog Pawar #include "ihevce_rc_interface.h"
103*c83a76b0SSuyog Pawar #include "ihevce_sub_pic_rc.h"
104*c83a76b0SSuyog Pawar
105*c83a76b0SSuyog Pawar #include "cast_types.h"
106*c83a76b0SSuyog Pawar #include "osal.h"
107*c83a76b0SSuyog Pawar #include "osal_defaults.h"
108*c83a76b0SSuyog Pawar
109*c83a76b0SSuyog Pawar /*****************************************************************************/
110*c83a76b0SSuyog Pawar /* Globals */
111*c83a76b0SSuyog Pawar /*****************************************************************************/
112*c83a76b0SSuyog Pawar /* @ brief : Qp deviation of -6 to 6 is mapped */
113*c83a76b0SSuyog Pawar float qp_scale_dev[13] = { 0.5, 0.56, 0.630, 0.707, 0.794, 0.891, 1.00,
114*c83a76b0SSuyog Pawar 1.122, 1.259, 1.414, 1.587, 1.782, 2.00 };
115*c83a76b0SSuyog Pawar
116*c83a76b0SSuyog Pawar /*****************************************************************************/
117*c83a76b0SSuyog Pawar /* Constant Macros */
118*c83a76b0SSuyog Pawar /*****************************************************************************/
119*c83a76b0SSuyog Pawar #define IN_FRAME_RC_PRINT 0
120*c83a76b0SSuyog Pawar #define IN_FRAME_RC_FRAME_NUM 4
121*c83a76b0SSuyog Pawar
122*c83a76b0SSuyog Pawar /*****************************************************************************/
123*c83a76b0SSuyog Pawar /* Function Definitions */
124*c83a76b0SSuyog Pawar /*****************************************************************************/
125*c83a76b0SSuyog Pawar
126*c83a76b0SSuyog Pawar /*!
127*c83a76b0SSuyog Pawar ******************************************************************************
128*c83a76b0SSuyog Pawar * \if Function name : ihevce_sub_pic_rc_bits_fill \endif
129*c83a76b0SSuyog Pawar *
130*c83a76b0SSuyog Pawar * \brief
131*c83a76b0SSuyog Pawar * Sub-pic RC thread interface function
132*c83a76b0SSuyog Pawar *
133*c83a76b0SSuyog Pawar * \param[in] Frame process pointer
134*c83a76b0SSuyog Pawar *
135*c83a76b0SSuyog Pawar * \return
136*c83a76b0SSuyog Pawar * None
137*c83a76b0SSuyog Pawar *
138*c83a76b0SSuyog Pawar * \author
139*c83a76b0SSuyog Pawar * Ittiam
140*c83a76b0SSuyog Pawar *
141*c83a76b0SSuyog Pawar *****************************************************************************
142*c83a76b0SSuyog Pawar */
ihevce_sub_pic_rc_in_data(void * pv_multi_thrd_ctxt,void * pv_ctxt,void * pv_ctb_ipe_analyse,void * pv_frm_ctb_prms)143*c83a76b0SSuyog Pawar void ihevce_sub_pic_rc_in_data(
144*c83a76b0SSuyog Pawar void *pv_multi_thrd_ctxt, void *pv_ctxt, void *pv_ctb_ipe_analyse, void *pv_frm_ctb_prms)
145*c83a76b0SSuyog Pawar {
146*c83a76b0SSuyog Pawar multi_thrd_ctxt_t *ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
147*c83a76b0SSuyog Pawar ihevce_enc_loop_ctxt_t *ps_ctxt = (ihevce_enc_loop_ctxt_t *)pv_ctxt;
148*c83a76b0SSuyog Pawar ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse =
149*c83a76b0SSuyog Pawar (ipe_l0_ctb_analyse_for_me_t *)pv_ctb_ipe_analyse;
150*c83a76b0SSuyog Pawar frm_ctb_ctxt_t *ps_frm_ctb_prms = (frm_ctb_ctxt_t *)pv_frm_ctb_prms;
151*c83a76b0SSuyog Pawar
152*c83a76b0SSuyog Pawar WORD32 j = 0;
153*c83a76b0SSuyog Pawar WORD32 i4_frm_id = ps_ctxt->i4_enc_frm_id;
154*c83a76b0SSuyog Pawar WORD32 i4_br_id = ps_ctxt->i4_bitrate_instance_num;
155*c83a76b0SSuyog Pawar WORD32 i4_thrd_id = ps_ctxt->thrd_id;
156*c83a76b0SSuyog Pawar WORD32 i4_ctb_count_flag = 0;
157*c83a76b0SSuyog Pawar WORD32 i4_is_intra_pic = (ISLICE == ps_ctxt->i1_slice_type);
158*c83a76b0SSuyog Pawar
159*c83a76b0SSuyog Pawar /*Accumalate all the variables in shared memory */
160*c83a76b0SSuyog Pawar for(j = 0; j < (MAX_CU_IN_CTB >> 2); j++)
161*c83a76b0SSuyog Pawar {
162*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_nctb_ipe_sad[i4_frm_id][i4_br_id][i4_thrd_id] +=
163*c83a76b0SSuyog Pawar ps_ctb_ipe_analyse->ai4_best_sad_8x8_l1_ipe[j];
164*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_nctb_me_sad[i4_frm_id][i4_br_id][i4_thrd_id] +=
165*c83a76b0SSuyog Pawar ps_ctb_ipe_analyse->ai4_best_sad_8x8_l1_me[j];
166*c83a76b0SSuyog Pawar
167*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_nctb_act_factor[i4_frm_id][i4_br_id][i4_thrd_id] +=
168*c83a76b0SSuyog Pawar ps_ctb_ipe_analyse->ai4_8x8_act_factor[j];
169*c83a76b0SSuyog Pawar }
170*c83a76b0SSuyog Pawar
171*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_nctb_l0_ipe_sad[i4_frm_id][i4_br_id][i4_thrd_id] +=
172*c83a76b0SSuyog Pawar ps_ctb_ipe_analyse->i4_ctb_acc_satd;
173*c83a76b0SSuyog Pawar
174*c83a76b0SSuyog Pawar /*Accumalte L0 MPM bits for N CTB*/
175*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_nctb_mpm_bits_consumed[i4_frm_id][i4_br_id][i4_thrd_id] +=
176*c83a76b0SSuyog Pawar ps_ctb_ipe_analyse->i4_ctb_acc_mpm_bits;
177*c83a76b0SSuyog Pawar
178*c83a76b0SSuyog Pawar /*Accumate the total bits and hdr bits for N Ctbs*/
179*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_nctb_bits_consumed[i4_frm_id][i4_br_id][i4_thrd_id] +=
180*c83a76b0SSuyog Pawar ps_ctxt->u4_total_cu_bits;
181*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_acc_bits_consumed[i4_frm_id][i4_br_id][i4_thrd_id] +=
182*c83a76b0SSuyog Pawar ps_ctxt->u4_total_cu_bits;
183*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_acc_bits_mul_qs_consumed[i4_frm_id][i4_br_id][i4_thrd_id] +=
184*c83a76b0SSuyog Pawar ps_ctxt->u4_total_cu_bits_mul_qs;
185*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_nctb_hdr_bits_consumed[i4_frm_id][i4_br_id][i4_thrd_id] +=
186*c83a76b0SSuyog Pawar ps_ctxt->u4_total_cu_hdr_bits;
187*c83a76b0SSuyog Pawar
188*c83a76b0SSuyog Pawar /*Reset the total CU bits, accumalated for all CTBS*/
189*c83a76b0SSuyog Pawar ps_ctxt->u4_total_cu_bits = 0;
190*c83a76b0SSuyog Pawar ps_ctxt->u4_total_cu_hdr_bits = 0;
191*c83a76b0SSuyog Pawar ps_ctxt->u4_total_cu_bits_mul_qs = 0;
192*c83a76b0SSuyog Pawar
193*c83a76b0SSuyog Pawar /*Put mutex lock for incrementing cb count */
194*c83a76b0SSuyog Pawar osal_mutex_lock(ps_multi_thrd_ctxt->pv_sub_pic_rc_mutex_lock_hdl);
195*c83a76b0SSuyog Pawar
196*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai4_acc_ctb_ctr[i4_frm_id][i4_br_id] += 1;
197*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai4_ctb_ctr[i4_frm_id][i4_br_id] += 1;
198*c83a76b0SSuyog Pawar
199*c83a76b0SSuyog Pawar /*Check if the acc ctb counter across thread has reached the required threshold */
200*c83a76b0SSuyog Pawar if(ps_multi_thrd_ctxt->ai4_acc_ctb_ctr[i4_frm_id][i4_br_id] >=
201*c83a76b0SSuyog Pawar ps_ctxt->i4_num_ctb_for_out_scale)
202*c83a76b0SSuyog Pawar {
203*c83a76b0SSuyog Pawar i4_ctb_count_flag = 1;
204*c83a76b0SSuyog Pawar /*Reset accumalated CTB counter appropriately s */
205*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai4_acc_ctb_ctr[i4_frm_id][i4_br_id] = 0;
206*c83a76b0SSuyog Pawar }
207*c83a76b0SSuyog Pawar
208*c83a76b0SSuyog Pawar /*Variables to be sent in the queue after required ctb count is reached */
209*c83a76b0SSuyog Pawar if(1 == i4_ctb_count_flag)
210*c83a76b0SSuyog Pawar {
211*c83a76b0SSuyog Pawar WORD32 i4_temp_thrd_id;
212*c83a76b0SSuyog Pawar LWORD64 i8_nctb_l1_me_sad = 0, i8_nctb_l1_ipe_sad = 0;
213*c83a76b0SSuyog Pawar LWORD64 i8_nctb_l0_ipe_satd = 0, i8_nctb_l1_activity_fact = 0;
214*c83a76b0SSuyog Pawar LWORD64 i8_nctb_hdr_bits_consumed = 0, i8_nctb_l0_mpm_bits = 0;
215*c83a76b0SSuyog Pawar LWORD64 i8_nctb_bits_consumed = 0, i8_acc_bits_consumed = 0,
216*c83a76b0SSuyog Pawar i8_acc_bits_mul_qs_consumed = 0;
217*c83a76b0SSuyog Pawar LWORD64 i8_frame_l1_ipe_sad, i8_frame_l0_ipe_satd, i8_frame_l1_me_sad;
218*c83a76b0SSuyog Pawar LWORD64 i8_frame_l1_activity_fact, i8_frame_bits_estimated;
219*c83a76b0SSuyog Pawar
220*c83a76b0SSuyog Pawar for(i4_temp_thrd_id = 0; i4_temp_thrd_id < ps_ctxt->i4_num_proc_thrds; i4_temp_thrd_id++)
221*c83a76b0SSuyog Pawar {
222*c83a76b0SSuyog Pawar /*Accumalte only if thread id is valid */
223*c83a76b0SSuyog Pawar if(ps_multi_thrd_ctxt->ai4_thrd_id_valid_flag[i4_frm_id][i4_br_id][i4_temp_thrd_id] ==
224*c83a76b0SSuyog Pawar 1)
225*c83a76b0SSuyog Pawar {
226*c83a76b0SSuyog Pawar /*store complexities for the ctbs across all threads till then */
227*c83a76b0SSuyog Pawar i8_nctb_l1_me_sad +=
228*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_nctb_me_sad[i4_frm_id][i4_br_id][i4_temp_thrd_id];
229*c83a76b0SSuyog Pawar i8_nctb_l1_ipe_sad +=
230*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_nctb_ipe_sad[i4_frm_id][i4_br_id][i4_temp_thrd_id];
231*c83a76b0SSuyog Pawar i8_nctb_l0_ipe_satd +=
232*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_nctb_l0_ipe_sad[i4_frm_id][i4_br_id][i4_temp_thrd_id];
233*c83a76b0SSuyog Pawar i8_nctb_l1_activity_fact +=
234*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_nctb_act_factor[i4_frm_id][i4_br_id][i4_temp_thrd_id];
235*c83a76b0SSuyog Pawar
236*c83a76b0SSuyog Pawar /*Set encoder total and hdr bits and mpm bits for that N ctbs */
237*c83a76b0SSuyog Pawar i8_nctb_hdr_bits_consumed +=
238*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt
239*c83a76b0SSuyog Pawar ->ai8_nctb_hdr_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id];
240*c83a76b0SSuyog Pawar i8_nctb_l0_mpm_bits +=
241*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt
242*c83a76b0SSuyog Pawar ->ai8_nctb_mpm_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id];
243*c83a76b0SSuyog Pawar i8_nctb_bits_consumed +=
244*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_nctb_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id];
245*c83a76b0SSuyog Pawar
246*c83a76b0SSuyog Pawar /*Set encoder total bits for ctbs till then */
247*c83a76b0SSuyog Pawar i8_acc_bits_consumed +=
248*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_acc_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id];
249*c83a76b0SSuyog Pawar i8_acc_bits_mul_qs_consumed +=
250*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt
251*c83a76b0SSuyog Pawar ->ai8_acc_bits_mul_qs_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id];
252*c83a76b0SSuyog Pawar
253*c83a76b0SSuyog Pawar /*Reset NCTB total and hdr, mpm bits counter to zero */
254*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai8_nctb_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id] =
255*c83a76b0SSuyog Pawar 0;
256*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt
257*c83a76b0SSuyog Pawar ->ai8_nctb_hdr_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id] = 0;
258*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt
259*c83a76b0SSuyog Pawar ->ai8_nctb_mpm_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id] = 0;
260*c83a76b0SSuyog Pawar }
261*c83a76b0SSuyog Pawar }
262*c83a76b0SSuyog Pawar
263*c83a76b0SSuyog Pawar /*Store all frame level params */
264*c83a76b0SSuyog Pawar i8_frame_l1_ipe_sad = ps_ctxt->i8_frame_l1_ipe_sad;
265*c83a76b0SSuyog Pawar i8_frame_l0_ipe_satd = ps_ctxt->i8_frame_l0_ipe_satd;
266*c83a76b0SSuyog Pawar i8_frame_l1_me_sad = ps_ctxt->i8_frame_l1_me_sad;
267*c83a76b0SSuyog Pawar i8_frame_l1_activity_fact = ps_ctxt->i8_frame_l1_activity_fact;
268*c83a76b0SSuyog Pawar i8_frame_bits_estimated = ps_ctxt->ai4_frame_bits_estimated[i4_frm_id][i4_br_id];
269*c83a76b0SSuyog Pawar
270*c83a76b0SSuyog Pawar /*If CU level RC is disabled reset the nctb and frame level factor */
271*c83a76b0SSuyog Pawar if(0 == ps_ctxt->i4_qp_mod)
272*c83a76b0SSuyog Pawar {
273*c83a76b0SSuyog Pawar i8_frame_l1_activity_fact = 0;
274*c83a76b0SSuyog Pawar }
275*c83a76b0SSuyog Pawar
276*c83a76b0SSuyog Pawar ASSERT(ps_ctxt->ai4_frame_bits_estimated[i4_frm_id][i4_br_id] != 0);
277*c83a76b0SSuyog Pawar
278*c83a76b0SSuyog Pawar {
279*c83a76b0SSuyog Pawar float bits_estimated, activity_ratio = 1;
280*c83a76b0SSuyog Pawar WORD32 i8_ctb_bits_estimated;
281*c83a76b0SSuyog Pawar float f_bit_deviation;
282*c83a76b0SSuyog Pawar WORD32 i4_tot_frame_ctb =
283*c83a76b0SSuyog Pawar ps_frm_ctb_prms->i4_num_ctbs_vert * ps_frm_ctb_prms->i4_num_ctbs_horz;
284*c83a76b0SSuyog Pawar
285*c83a76b0SSuyog Pawar /*The QP limit can only increment/decrement by 3/1 */
286*c83a76b0SSuyog Pawar float f_qp_increase_limit = (1.414);
287*c83a76b0SSuyog Pawar //float f_qp_decrease_limit = (0.891);
288*c83a76b0SSuyog Pawar
289*c83a76b0SSuyog Pawar /*Frame level activity is set to 0 for cu-level rc off*/
290*c83a76b0SSuyog Pawar if(i8_frame_l1_activity_fact != 0)
291*c83a76b0SSuyog Pawar activity_ratio =
292*c83a76b0SSuyog Pawar (float)(i8_frame_l1_activity_fact) / (float)(i8_nctb_l1_activity_fact);
293*c83a76b0SSuyog Pawar
294*c83a76b0SSuyog Pawar activity_ratio = 1;
295*c83a76b0SSuyog Pawar
296*c83a76b0SSuyog Pawar /*Estimate the bits to be consumed based on the intra and inter complexity */
297*c83a76b0SSuyog Pawar if(i4_is_intra_pic)
298*c83a76b0SSuyog Pawar {
299*c83a76b0SSuyog Pawar float sad_ratio = (float)(i8_nctb_l0_ipe_satd) / (float)(i8_frame_l0_ipe_satd);
300*c83a76b0SSuyog Pawar bits_estimated = sad_ratio * activity_ratio * ((float)i8_frame_bits_estimated);
301*c83a76b0SSuyog Pawar }
302*c83a76b0SSuyog Pawar else
303*c83a76b0SSuyog Pawar {
304*c83a76b0SSuyog Pawar float sad_ratio = (float)(i8_nctb_l1_me_sad) / (float)(i8_frame_l1_me_sad);
305*c83a76b0SSuyog Pawar bits_estimated = sad_ratio * activity_ratio * ((float)i8_frame_bits_estimated);
306*c83a76b0SSuyog Pawar }
307*c83a76b0SSuyog Pawar
308*c83a76b0SSuyog Pawar i8_ctb_bits_estimated = (i8_frame_bits_estimated / i4_tot_frame_ctb);
309*c83a76b0SSuyog Pawar
310*c83a76b0SSuyog Pawar /*for better control on both sides*/
311*c83a76b0SSuyog Pawar f_bit_deviation = ((i8_acc_bits_consumed * 1.0) / bits_estimated);
312*c83a76b0SSuyog Pawar //printf("\n dev = %f\t",f_bit_deviation);
313*c83a76b0SSuyog Pawar /* if consumed bits is higher than 7.5% or consumed bits is lower by 20%)*/
314*c83a76b0SSuyog Pawar if((f_bit_deviation > 1.075) ||
315*c83a76b0SSuyog Pawar ((f_bit_deviation < 0.8) &&
316*c83a76b0SSuyog Pawar (ps_ctxt->i4_is_model_valid == 0 ||
317*c83a76b0SSuyog Pawar (ps_multi_thrd_ctxt->ai4_threshold_reached[i4_frm_id][i4_br_id]))))
318*c83a76b0SSuyog Pawar {
319*c83a76b0SSuyog Pawar float f_qscale_avg_factor;
320*c83a76b0SSuyog Pawar WORD32 i4_cu_qp_sub_pic_rc_curr;
321*c83a76b0SSuyog Pawar /*get the Qscale of Frame QP*/
322*c83a76b0SSuyog Pawar WORD32 i4_frm_qs_q3 =
323*c83a76b0SSuyog Pawar (ps_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale
324*c83a76b0SSuyog Pawar [ps_ctxt->i4_frame_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
325*c83a76b0SSuyog Pawar WORD32 i4_prev_qp = ps_ctxt->i4_frame_mod_qp;
326*c83a76b0SSuyog Pawar
327*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai4_threshold_reached[i4_frm_id][i4_br_id] = 1;
328*c83a76b0SSuyog Pawar
329*c83a76b0SSuyog Pawar /*Calculating Intra scale factor */
330*c83a76b0SSuyog Pawar if(i4_is_intra_pic)
331*c83a76b0SSuyog Pawar {
332*c83a76b0SSuyog Pawar /*In case of lower QP, Qscale increase at every step is very low, which doesn't allow QP increase
333*c83a76b0SSuyog Pawar to meet the rate, hence disable deviation clip below QP 4 for all bitdepth*/
334*c83a76b0SSuyog Pawar if(i4_prev_qp > MIN_QP_NO_CLIP_DEV)
335*c83a76b0SSuyog Pawar {
336*c83a76b0SSuyog Pawar /* Clip the bits deviation such that it never cross +3 qp shifts from average QP so far coded with in-frame rc*/
337*c83a76b0SSuyog Pawar if(f_bit_deviation > f_qp_increase_limit)
338*c83a76b0SSuyog Pawar {
339*c83a76b0SSuyog Pawar f_bit_deviation = f_qp_increase_limit;
340*c83a76b0SSuyog Pawar }
341*c83a76b0SSuyog Pawar }
342*c83a76b0SSuyog Pawar
343*c83a76b0SSuyog Pawar /*The current qscale should do not deviate +/- 3 QP from the previous qscale */
344*c83a76b0SSuyog Pawar f_qscale_avg_factor =
345*c83a76b0SSuyog Pawar (((float)(i8_acc_bits_mul_qs_consumed * (1 << QSCALE_Q_FAC_3))) /
346*c83a76b0SSuyog Pawar (i8_acc_bits_consumed * i4_frm_qs_q3));
347*c83a76b0SSuyog Pawar i4_cu_qp_sub_pic_rc_curr =
348*c83a76b0SSuyog Pawar f_qscale_avg_factor * f_bit_deviation * (1 << QP_LEVEL_MOD_ACT_FACTOR);
349*c83a76b0SSuyog Pawar }
350*c83a76b0SSuyog Pawar else /*Calculating Inter scale factor */
351*c83a76b0SSuyog Pawar {
352*c83a76b0SSuyog Pawar /*In case of lower QP, Qscale increase at every step is very low, which doesn't allow QP increase
353*c83a76b0SSuyog Pawar to meet the rate, hence disable deviation clip below QP 4 for all bitdepth*/
354*c83a76b0SSuyog Pawar if(i4_prev_qp > MIN_QP_NO_CLIP_DEV)
355*c83a76b0SSuyog Pawar {
356*c83a76b0SSuyog Pawar /* Clip the bits deviation such that it never cross +3 qp shifts from average QP so far coded with in-frame rc*/
357*c83a76b0SSuyog Pawar if(f_bit_deviation > f_qp_increase_limit)
358*c83a76b0SSuyog Pawar {
359*c83a76b0SSuyog Pawar f_bit_deviation = f_qp_increase_limit;
360*c83a76b0SSuyog Pawar }
361*c83a76b0SSuyog Pawar }
362*c83a76b0SSuyog Pawar
363*c83a76b0SSuyog Pawar /*The current qscale should do not deviate +/- 3 QP from the previous qscale */
364*c83a76b0SSuyog Pawar f_qscale_avg_factor =
365*c83a76b0SSuyog Pawar (((float)(i8_acc_bits_mul_qs_consumed * (1 << QSCALE_Q_FAC_3))) /
366*c83a76b0SSuyog Pawar (i8_acc_bits_consumed * i4_frm_qs_q3));
367*c83a76b0SSuyog Pawar i4_cu_qp_sub_pic_rc_curr =
368*c83a76b0SSuyog Pawar f_qscale_avg_factor * f_bit_deviation * (1 << QP_LEVEL_MOD_ACT_FACTOR);
369*c83a76b0SSuyog Pawar }
370*c83a76b0SSuyog Pawar //printf("Avg_qscale = %f\t qs_inq3 = %d",f_qscale_avg_factor,i4_frm_qs_q3);
371*c83a76b0SSuyog Pawar /*update of previous chunk QP in multi-thread context, so that all threads can use it from now onwards*/
372*c83a76b0SSuyog Pawar {
373*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai4_prev_chunk_qp[i4_frm_id][i4_br_id] =
374*c83a76b0SSuyog Pawar ps_ctxt->i4_frame_mod_qp;
375*c83a76b0SSuyog Pawar }
376*c83a76b0SSuyog Pawar /*Limit the qp from decreasing less than 6 compared to frame qp */
377*c83a76b0SSuyog Pawar {
378*c83a76b0SSuyog Pawar osal_mutex_lock(ps_multi_thrd_ctxt->pv_sub_pic_rc_for_qp_update_mutex_lock_hdl);
379*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai4_curr_qp_estimated[i4_frm_id][i4_br_id] =
380*c83a76b0SSuyog Pawar i4_cu_qp_sub_pic_rc_curr;
381*c83a76b0SSuyog Pawar osal_mutex_unlock(
382*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->pv_sub_pic_rc_for_qp_update_mutex_lock_hdl);
383*c83a76b0SSuyog Pawar }
384*c83a76b0SSuyog Pawar }
385*c83a76b0SSuyog Pawar }
386*c83a76b0SSuyog Pawar }
387*c83a76b0SSuyog Pawar osal_mutex_unlock(ps_multi_thrd_ctxt->pv_sub_pic_rc_mutex_lock_hdl);
388*c83a76b0SSuyog Pawar return;
389*c83a76b0SSuyog Pawar }
390*c83a76b0SSuyog Pawar
391*c83a76b0SSuyog Pawar /*!
392*c83a76b0SSuyog Pawar ******************************************************************************
393*c83a76b0SSuyog Pawar * \if Function name : ihevce_sub_pic_rc_qp_query \endif
394*c83a76b0SSuyog Pawar *
395*c83a76b0SSuyog Pawar * \brief
396*c83a76b0SSuyog Pawar * Sub-pic RC thread interface function
397*c83a76b0SSuyog Pawar *
398*c83a76b0SSuyog Pawar * \param[in] Frame process pointer
399*c83a76b0SSuyog Pawar *
400*c83a76b0SSuyog Pawar * \return
401*c83a76b0SSuyog Pawar * None
402*c83a76b0SSuyog Pawar *
403*c83a76b0SSuyog Pawar * \author
404*c83a76b0SSuyog Pawar * Ittiam
405*c83a76b0SSuyog Pawar *
406*c83a76b0SSuyog Pawar *****************************************************************************
407*c83a76b0SSuyog Pawar */
ihevce_sub_pic_rc_scale_query(void * pv_multi_thrd_ctxt,void * pv_ctxt)408*c83a76b0SSuyog Pawar void ihevce_sub_pic_rc_scale_query(void *pv_multi_thrd_ctxt, void *pv_ctxt)
409*c83a76b0SSuyog Pawar {
410*c83a76b0SSuyog Pawar multi_thrd_ctxt_t *ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
411*c83a76b0SSuyog Pawar ihevce_enc_loop_ctxt_t *ps_ctxt = (ihevce_enc_loop_ctxt_t *)pv_ctxt;
412*c83a76b0SSuyog Pawar WORD32 i4_mod_qp, i4_prev_qs;
413*c83a76b0SSuyog Pawar WORD32 i4_previous_chunk_qp;
414*c83a76b0SSuyog Pawar
415*c83a76b0SSuyog Pawar WORD32 i4_qp_delata_max_limit, i4_qp_delata_min_limit;
416*c83a76b0SSuyog Pawar
417*c83a76b0SSuyog Pawar osal_mutex_lock(ps_multi_thrd_ctxt->pv_sub_pic_rc_for_qp_update_mutex_lock_hdl);
418*c83a76b0SSuyog Pawar
419*c83a76b0SSuyog Pawar i4_mod_qp =
420*c83a76b0SSuyog Pawar (ps_ctxt->ps_rc_quant_ctxt
421*c83a76b0SSuyog Pawar ->pi4_qp_to_qscale[ps_ctxt->i4_frame_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
422*c83a76b0SSuyog Pawar i4_previous_chunk_qp =
423*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt
424*c83a76b0SSuyog Pawar ->ai4_prev_chunk_qp[ps_ctxt->i4_enc_frm_id][ps_ctxt->i4_bitrate_instance_num];
425*c83a76b0SSuyog Pawar i4_prev_qs =
426*c83a76b0SSuyog Pawar (ps_ctxt->ps_rc_quant_ctxt
427*c83a76b0SSuyog Pawar ->pi4_qp_to_qscale[i4_previous_chunk_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
428*c83a76b0SSuyog Pawar /*Limit the qp_delta_scale if it exceeds the limit of QP51 and QP 1 */
429*c83a76b0SSuyog Pawar
430*c83a76b0SSuyog Pawar i4_qp_delata_max_limit =
431*c83a76b0SSuyog Pawar ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale * (1 << QP_LEVEL_MOD_ACT_FACTOR);
432*c83a76b0SSuyog Pawar i4_qp_delata_max_limit = i4_qp_delata_max_limit / i4_mod_qp;
433*c83a76b0SSuyog Pawar
434*c83a76b0SSuyog Pawar i4_qp_delata_min_limit =
435*c83a76b0SSuyog Pawar ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale * (1 << QP_LEVEL_MOD_ACT_FACTOR);
436*c83a76b0SSuyog Pawar i4_qp_delata_min_limit = i4_qp_delata_min_limit / i4_mod_qp;
437*c83a76b0SSuyog Pawar {
438*c83a76b0SSuyog Pawar /*For Non-I SCD and Frames after SCD*/
439*c83a76b0SSuyog Pawar /*The scale is tweeked to only increase qp (increased by 6) if the bits consumed is higher than bits
440*c83a76b0SSuyog Pawar estimated */
441*c83a76b0SSuyog Pawar ps_ctxt->i4_cu_qp_sub_pic_rc =
442*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt
443*c83a76b0SSuyog Pawar ->ai4_curr_qp_estimated[ps_ctxt->i4_enc_frm_id][ps_ctxt->i4_bitrate_instance_num];
444*c83a76b0SSuyog Pawar /*Limit the Qscale */
445*c83a76b0SSuyog Pawar if(ps_ctxt->i4_cu_qp_sub_pic_rc > i4_qp_delata_max_limit)
446*c83a76b0SSuyog Pawar {
447*c83a76b0SSuyog Pawar ps_ctxt->i4_cu_qp_sub_pic_rc = i4_qp_delata_max_limit;
448*c83a76b0SSuyog Pawar }
449*c83a76b0SSuyog Pawar else if(ps_ctxt->i4_cu_qp_sub_pic_rc < i4_qp_delata_min_limit)
450*c83a76b0SSuyog Pawar {
451*c83a76b0SSuyog Pawar ps_ctxt->i4_cu_qp_sub_pic_rc = i4_qp_delata_min_limit;
452*c83a76b0SSuyog Pawar }
453*c83a76b0SSuyog Pawar
454*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt
455*c83a76b0SSuyog Pawar ->ai4_curr_qp_estimated[ps_ctxt->i4_enc_frm_id][ps_ctxt->i4_bitrate_instance_num] =
456*c83a76b0SSuyog Pawar ps_ctxt->i4_cu_qp_sub_pic_rc;
457*c83a76b0SSuyog Pawar }
458*c83a76b0SSuyog Pawar
459*c83a76b0SSuyog Pawar /*Accumalate the CTB level QP here and feed to rc as average qp*/
460*c83a76b0SSuyog Pawar {
461*c83a76b0SSuyog Pawar WORD32 i4_mod_cur_qp, i4_mod_prev_qp;
462*c83a76b0SSuyog Pawar
463*c83a76b0SSuyog Pawar i4_mod_cur_qp =
464*c83a76b0SSuyog Pawar ((i4_mod_qp * ps_ctxt->i4_cu_qp_sub_pic_rc) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
465*c83a76b0SSuyog Pawar QP_LEVEL_MOD_ACT_FACTOR;
466*c83a76b0SSuyog Pawar
467*c83a76b0SSuyog Pawar /*Limit the qscale and qp */
468*c83a76b0SSuyog Pawar if(i4_mod_cur_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
469*c83a76b0SSuyog Pawar {
470*c83a76b0SSuyog Pawar i4_mod_cur_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
471*c83a76b0SSuyog Pawar ASSERT(0);
472*c83a76b0SSuyog Pawar }
473*c83a76b0SSuyog Pawar else if(i4_mod_cur_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
474*c83a76b0SSuyog Pawar {
475*c83a76b0SSuyog Pawar i4_mod_cur_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
476*c83a76b0SSuyog Pawar ASSERT(0);
477*c83a76b0SSuyog Pawar }
478*c83a76b0SSuyog Pawar
479*c83a76b0SSuyog Pawar i4_mod_cur_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_mod_cur_qp];
480*c83a76b0SSuyog Pawar /*limit the prev qs*/
481*c83a76b0SSuyog Pawar if(i4_prev_qs > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
482*c83a76b0SSuyog Pawar {
483*c83a76b0SSuyog Pawar i4_prev_qs = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
484*c83a76b0SSuyog Pawar }
485*c83a76b0SSuyog Pawar else if(i4_prev_qs < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
486*c83a76b0SSuyog Pawar {
487*c83a76b0SSuyog Pawar i4_prev_qs = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
488*c83a76b0SSuyog Pawar }
489*c83a76b0SSuyog Pawar
490*c83a76b0SSuyog Pawar i4_mod_prev_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_prev_qs];
491*c83a76b0SSuyog Pawar
492*c83a76b0SSuyog Pawar /*cur qp < prev qp, then allow only -1*/
493*c83a76b0SSuyog Pawar if(i4_mod_cur_qp < i4_mod_prev_qp)
494*c83a76b0SSuyog Pawar {
495*c83a76b0SSuyog Pawar i4_mod_cur_qp = i4_mod_prev_qp - 1;
496*c83a76b0SSuyog Pawar if(i4_mod_cur_qp < (ps_ctxt->i4_frame_qp - 6))
497*c83a76b0SSuyog Pawar {
498*c83a76b0SSuyog Pawar i4_mod_cur_qp = (ps_ctxt->i4_frame_qp - 6);
499*c83a76b0SSuyog Pawar }
500*c83a76b0SSuyog Pawar }
501*c83a76b0SSuyog Pawar
502*c83a76b0SSuyog Pawar /*In case of lower QP, Qscale increase at every step is very low, which doesn't allow QP increase
503*c83a76b0SSuyog Pawar to meet the rate, hence disable deviation clip below QP 4 for all bitdepth*/
504*c83a76b0SSuyog Pawar if(i4_mod_cur_qp > i4_mod_prev_qp)
505*c83a76b0SSuyog Pawar {
506*c83a76b0SSuyog Pawar i4_mod_cur_qp = MIN(i4_mod_prev_qp + 3, i4_mod_cur_qp);
507*c83a76b0SSuyog Pawar }
508*c83a76b0SSuyog Pawar
509*c83a76b0SSuyog Pawar /* CLIP to maintain Qp between user configured and min and max Qp values*/
510*c83a76b0SSuyog Pawar if(i4_mod_cur_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
511*c83a76b0SSuyog Pawar i4_mod_cur_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
512*c83a76b0SSuyog Pawar else if(i4_mod_cur_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
513*c83a76b0SSuyog Pawar i4_mod_cur_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
514*c83a76b0SSuyog Pawar
515*c83a76b0SSuyog Pawar /*Modify the qp based on delta*/
516*c83a76b0SSuyog Pawar ps_ctxt->i4_frame_mod_qp = i4_mod_cur_qp;
517*c83a76b0SSuyog Pawar ps_ctxt->i4_is_first_query = 0;
518*c83a76b0SSuyog Pawar if(ps_ctxt->i4_frame_mod_qp != ps_ctxt->i4_frame_qp)
519*c83a76b0SSuyog Pawar {
520*c83a76b0SSuyog Pawar ps_ctxt->i4_is_ctb_qp_modified = 1;
521*c83a76b0SSuyog Pawar }
522*c83a76b0SSuyog Pawar }
523*c83a76b0SSuyog Pawar
524*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->ai4_curr_qp_acc[ps_ctxt->i4_enc_frm_id][ps_ctxt->i4_bitrate_instance_num] +=
525*c83a76b0SSuyog Pawar ps_ctxt->i4_frame_mod_qp;
526*c83a76b0SSuyog Pawar
527*c83a76b0SSuyog Pawar osal_mutex_unlock(ps_multi_thrd_ctxt->pv_sub_pic_rc_for_qp_update_mutex_lock_hdl);
528*c83a76b0SSuyog Pawar
529*c83a76b0SSuyog Pawar return;
530*c83a76b0SSuyog Pawar }
531