xref: /aosp_15_r20/external/libxaac/encoder/ixheaace_sbr_noise_floor_est.c (revision 15dc779a375ca8b5125643b829a8aa4b70d7f451)
1 /******************************************************************************
2  *                                                                            *
3  * Copyright (C) 2023 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 
21 #include <string.h>
22 #include <math.h>
23 
24 #include "ixheaac_type_def.h"
25 #include "ixheaac_constants.h"
26 #include "ixheaac_error_standards.h"
27 #include "ixheaace_error_codes.h"
28 #include "ixheaace_aac_constants.h"
29 #include "ixheaac_basic_ops32.h"
30 #include "ixheaac_basic_ops16.h"
31 #include "ixheaac_basic_ops40.h"
32 #include "ixheaac_basic_ops.h"
33 
34 #include "ixheaace_sbr_header.h"
35 #include "ixheaace_sbr_def.h"
36 #include "ixheaace_resampler.h"
37 #include "ixheaace_sbr_rom.h"
38 #include "ixheaace_common_rom.h"
39 #include "ixheaace_sbr_hbe.h"
40 #include "ixheaace_sbr_qmf_enc.h"
41 #include "ixheaace_sbr_tran_det.h"
42 #include "ixheaace_sbr_frame_info_gen.h"
43 #include "ixheaace_sbr_env_est.h"
44 #include "ixheaace_sbr_code_envelope.h"
45 #include "ixheaace_sbr_main.h"
46 #include "ixheaace_sbr_missing_harmonics_det.h"
47 #include "ixheaace_sbr_inv_filtering_estimation.h"
48 #include "ixheaace_sbr_noise_floor_est.h"
49 
50 #include "ixheaace_sbr_ton_corr.h"
51 #include "iusace_esbr_pvc.h"
52 #include "iusace_esbr_inter_tes.h"
53 #include "ixheaace_sbr.h"
54 #include "ixheaace_common_utils.h"
55 
ia_enhaacplus_enc_smoothing_noise_levels(FLOAT32 * ptr_noise_lvls,WORD32 num_env,WORD32 num_noise_bands,FLOAT32 prev_noise_lvls[IXHEAACE_NF_SMOOTHING_LENGTH][MAXIMUM_NUM_NOISE_VALUES],const FLOAT32 * ptr_smooth_filter,WORD32 transient_flag,WORD32 is_ld_sbr)56 static VOID ia_enhaacplus_enc_smoothing_noise_levels(
57     FLOAT32 *ptr_noise_lvls, WORD32 num_env, WORD32 num_noise_bands,
58     FLOAT32 prev_noise_lvls[IXHEAACE_NF_SMOOTHING_LENGTH][MAXIMUM_NUM_NOISE_VALUES],
59     const FLOAT32 *ptr_smooth_filter, WORD32 transient_flag, WORD32 is_ld_sbr) {
60   WORD32 i, band, env;
61 
62   for (env = 0; env < num_env; env++) {
63     if (is_ld_sbr) {
64       if (transient_flag) {
65         for (i = 0; i < IXHEAACE_NF_SMOOTHING_LENGTH; i++) {
66           memcpy(prev_noise_lvls[i], ptr_noise_lvls + env * num_noise_bands,
67                  num_noise_bands * sizeof(prev_noise_lvls[i][0]));
68         }
69       } else {
70         for (i = 1; i < IXHEAACE_NF_SMOOTHING_LENGTH; i++) {
71           memcpy(prev_noise_lvls[i - 1], prev_noise_lvls[i],
72                  num_noise_bands * sizeof(prev_noise_lvls[i - 1][0]));
73         }
74       }
75     } else {
76       for (i = 1; i < IXHEAACE_NF_SMOOTHING_LENGTH; i++) {
77         memcpy(prev_noise_lvls[i - 1], prev_noise_lvls[i],
78                num_noise_bands * sizeof(prev_noise_lvls[i - 1][0]));
79       }
80     }
81     memcpy(prev_noise_lvls[IXHEAACE_NF_SMOOTHING_LENGTH - 1],
82            ptr_noise_lvls + env * num_noise_bands,
83            num_noise_bands * sizeof(prev_noise_lvls[IXHEAACE_NF_SMOOTHING_LENGTH - 1][0]));
84 
85     for (band = 0; band < num_noise_bands; band++) {
86       ptr_noise_lvls[band + env * num_noise_bands] = 0;
87 
88       for (i = 0; i < IXHEAACE_NF_SMOOTHING_LENGTH; i++) {
89         ptr_noise_lvls[band + env * num_noise_bands] +=
90             ptr_smooth_filter[i] * prev_noise_lvls[i][band];
91       }
92     }
93   }
94 }
95 
ia_enhaacplus_enc_qmf_based_noise_floor_detection(FLOAT32 * ptr_noise_lvl,FLOAT32 ** ptr_quota_orig,FLOAT32 weight_fac,FLOAT32 max_lvl,FLOAT32 noise_floor_offset,WORD8 * ptr_idx_vx,WORD32 start_index,WORD32 stop_index,WORD32 start_channel,WORD32 stop_channel,WORD32 missing_harmonic_flag,ixheaace_invf_mode thr_offset,ixheaace_invf_mode inv_filtering_lvl)96 static VOID ia_enhaacplus_enc_qmf_based_noise_floor_detection(
97     FLOAT32 *ptr_noise_lvl, FLOAT32 **ptr_quota_orig, FLOAT32 weight_fac, FLOAT32 max_lvl,
98     FLOAT32 noise_floor_offset, WORD8 *ptr_idx_vx, WORD32 start_index, WORD32 stop_index,
99     WORD32 start_channel, WORD32 stop_channel, WORD32 missing_harmonic_flag,
100     ixheaace_invf_mode thr_offset, ixheaace_invf_mode inv_filtering_lvl) {
101   WORD32 ch, idx;
102   FLOAT32 ton_org, ton_sbr, mean_org = 0, mean_sbr = 0, diff;
103 
104   if (1 == missing_harmonic_flag) {
105     for (ch = start_channel; ch < stop_channel; ch++) {
106       ton_org = 0;
107       ton_sbr = 0;
108       for (idx = start_index; idx < stop_index; idx++) {
109         ton_org += ptr_quota_orig[idx][ch];
110         ton_sbr += ptr_quota_orig[idx][ptr_idx_vx[ch]];
111       }
112 
113       ton_org /= (stop_index - start_index);
114       ton_sbr /= (stop_index - start_index);
115 
116       if (ton_org > mean_org) {
117         mean_org = ton_org;
118       }
119 
120       if (ton_sbr > mean_sbr) {
121         mean_sbr = ton_sbr;
122       }
123     }
124   } else {
125     for (ch = start_channel; ch < stop_channel; ch++) {
126       ton_org = 0;
127       ton_sbr = 0;
128       for (idx = start_index; idx < stop_index; idx++) {
129         ton_org += ptr_quota_orig[idx][ch];
130         ton_sbr += ptr_quota_orig[idx][ptr_idx_vx[ch]];
131       }
132 
133       ton_org /= (stop_index - start_index);
134       ton_sbr /= (stop_index - start_index);
135 
136       mean_org += ton_org;
137       mean_sbr += ton_sbr;
138     }
139     mean_org /= (stop_channel - start_channel);
140     mean_sbr /= (stop_channel - start_channel);
141   }
142 
143   if (mean_org < SBR_TON_MEAN_P0009 && mean_sbr < SBR_TON_MEAN_P0009) {
144     mean_org = mean_sbr = SBR_TON_MEAN_101P59;
145   }
146 
147   if (mean_org < 1.0f) {
148     mean_org = 1.0f;
149   }
150 
151   if (mean_sbr < 1.0f) {
152     mean_sbr = 1.0f;
153   }
154 
155   if (1 == missing_harmonic_flag) {
156     diff = 1.0f;
157   } else {
158     if (1.0f > (weight_fac * mean_sbr / mean_org)) {
159       diff = 1.0f;
160     } else {
161       diff = weight_fac * mean_sbr / mean_org;
162     }
163   }
164 
165   if (inv_filtering_lvl == IXHEAACE_INVF_MID_LEVEL ||
166       inv_filtering_lvl == IXHEAACE_INVF_LOW_LEVEL || inv_filtering_lvl == IXHEAACE_INVF_OFF) {
167     diff = 1.0f;
168   }
169 
170   if (inv_filtering_lvl <= thr_offset) {
171     diff = 1.0f;
172   }
173 
174   *ptr_noise_lvl = diff / mean_org;
175   *ptr_noise_lvl *= noise_floor_offset;
176 
177   if (*ptr_noise_lvl > max_lvl) {
178     *ptr_noise_lvl = max_lvl;
179   }
180 }
181 
ixheaace_sbr_noise_floor_estimate_qmf(ixheaace_pstr_noise_flr_est_sbr pstr_noise_floor_est_sbr,const ixheaace_str_frame_info_sbr * ptr_frame_info,FLOAT32 * ptr_noise_lvls,FLOAT32 ** ptr_quota_orig,WORD8 * ptr_idx_vx,WORD32 missing_harmonics_flag,WORD32 start_index,WORD32 transient_flag,ixheaace_invf_mode * ptr_inv_filt_levels,WORD32 is_ld_sbr)182 VOID ixheaace_sbr_noise_floor_estimate_qmf(
183     ixheaace_pstr_noise_flr_est_sbr pstr_noise_floor_est_sbr,
184     const ixheaace_str_frame_info_sbr *ptr_frame_info, FLOAT32 *ptr_noise_lvls,
185     FLOAT32 **ptr_quota_orig, WORD8 *ptr_idx_vx, WORD32 missing_harmonics_flag,
186     WORD32 start_index, WORD32 transient_flag, ixheaace_invf_mode *ptr_inv_filt_levels,
187     WORD32 is_ld_sbr) {
188   WORD32 n_noise_envelopes, start_pos[2], stop_pos[2], env, band;
189   WORD32 num_of_noise_bands = pstr_noise_floor_est_sbr->num_of_noise_bands;
190   WORD32 *ptr_freq_band_tab = pstr_noise_floor_est_sbr->s_freq_qmf_band_tbl;
191 
192   n_noise_envelopes = ptr_frame_info->n_noise_envelopes;
193 
194   if (n_noise_envelopes == 1) {
195     start_pos[0] = start_index;
196     stop_pos[0] = start_index + 2;
197   } else {
198     start_pos[0] = start_index;
199     stop_pos[0] = start_index + 1;
200     start_pos[1] = start_index + 1;
201     stop_pos[1] = start_index + 2;
202   }
203 
204   for (env = 0; env < n_noise_envelopes; env++) {
205     for (band = 0; band < num_of_noise_bands; band++) {
206       ia_enhaacplus_enc_qmf_based_noise_floor_detection(
207           &ptr_noise_lvls[band + env * num_of_noise_bands], ptr_quota_orig,
208           pstr_noise_floor_est_sbr->weight_fac, pstr_noise_floor_est_sbr->max_level, 1.0f,
209           ptr_idx_vx, start_pos[env], stop_pos[env], ptr_freq_band_tab[band],
210           ptr_freq_band_tab[band + 1], missing_harmonics_flag,
211           pstr_noise_floor_est_sbr->thr_offset, ptr_inv_filt_levels[band]);
212     }
213   }
214 
215   ia_enhaacplus_enc_smoothing_noise_levels(
216       ptr_noise_lvls, n_noise_envelopes, pstr_noise_floor_est_sbr->num_of_noise_bands,
217       pstr_noise_floor_est_sbr->prev_noise_lvls, pstr_noise_floor_est_sbr->ptr_smooth_filter,
218       transient_flag, is_ld_sbr);
219 
220   for (env = 0; env < n_noise_envelopes; env++) {
221     for (band = 0; band < num_of_noise_bands; band++) {
222       ptr_noise_lvls[band + env * num_of_noise_bands] =
223           (FLOAT32)SBR_NOISE_FLOOR_OFFSET -
224           (FLOAT32)(SBR_INV_LOG_2 * log(ptr_noise_lvls[band + env * num_of_noise_bands]));
225     }
226   }
227 }
228 
ia_enhaacplus_enc_down_sample_lo_res(WORD32 * ptr_result,WORD32 num_result,const UWORD8 * ptr_freq_band_tab_ref,WORD32 num_ref)229 static IA_ERRORCODE ia_enhaacplus_enc_down_sample_lo_res(WORD32 *ptr_result, WORD32 num_result,
230                                                          const UWORD8 *ptr_freq_band_tab_ref,
231                                                          WORD32 num_ref) {
232   WORD32 step;
233   WORD32 i, j;
234   WORD32 org_length, result_length;
235   WORD32 v_index[MAXIMUM_FREQ_COEFFS / 2];
236 
237   org_length = num_ref;
238   result_length = num_result;
239 
240   v_index[0] = 0;
241 
242   i = 0;
243 
244   while (org_length > 0) {
245     i++;
246 
247     step = org_length / result_length;
248 
249     org_length = org_length - step;
250 
251     result_length--;
252 
253     v_index[i] = v_index[i - 1] + step;
254   }
255 
256   if (i != num_result) {
257     return IA_EXEHAACE_INIT_FATAL_SBR_NOISE_BAND_NOT_SUPPORTED;
258   }
259 
260   for (j = 0; j <= i; j++) {
261     ptr_result[j] = ptr_freq_band_tab_ref[v_index[j]];
262   }
263 
264   return IA_NO_ERROR;
265 }
266 
267 IA_ERRORCODE
ixheaace_create_sbr_noise_floor_estimate(ixheaace_pstr_noise_flr_est_sbr pstr_noise_floor_est_sbr,WORD32 ana_max_level,const UWORD8 * ptr_freq_band_tab,WORD32 num_scf,WORD32 noise_groups,UWORD32 use_speech_config,ixheaace_str_qmf_tabs * ptr_qmf_tab)268 ixheaace_create_sbr_noise_floor_estimate(ixheaace_pstr_noise_flr_est_sbr pstr_noise_floor_est_sbr,
269                                          WORD32 ana_max_level, const UWORD8 *ptr_freq_band_tab,
270                                          WORD32 num_scf, WORD32 noise_groups,
271                                          UWORD32 use_speech_config,
272                                          ixheaace_str_qmf_tabs *ptr_qmf_tab) {
273   memset(pstr_noise_floor_est_sbr, 0, sizeof(ixheaace_str_noise_flr_est_sbr));
274 
275   pstr_noise_floor_est_sbr->ptr_smooth_filter = ptr_qmf_tab->ptr_smooth_filter;
276 
277   if (use_speech_config) {
278     pstr_noise_floor_est_sbr->weight_fac = 1.0f;
279     pstr_noise_floor_est_sbr->thr_offset = IXHEAACE_INVF_LOW_LEVEL;
280   } else {
281     pstr_noise_floor_est_sbr->weight_fac = 0.25f;
282     pstr_noise_floor_est_sbr->thr_offset = IXHEAACE_INVF_MID_LEVEL;
283   }
284 
285   if (ana_max_level == -3) {
286     pstr_noise_floor_est_sbr->max_level = 0.5f;
287   } else if (ana_max_level == 3) {
288     pstr_noise_floor_est_sbr->max_level = 2.0f;
289   } else if (ana_max_level == 6) {
290     pstr_noise_floor_est_sbr->max_level = 4.0f;
291   } else {
292   }
293 
294   pstr_noise_floor_est_sbr->noise_groups = noise_groups;
295 
296   return ixheaace_reset_sbr_noise_floor_estimate(pstr_noise_floor_est_sbr, ptr_freq_band_tab,
297                                                  num_scf);
298 }
299 
300 IA_ERRORCODE
ixheaace_reset_sbr_noise_floor_estimate(ixheaace_pstr_noise_flr_est_sbr pstr_noise_floor_est_sbr,const UWORD8 * ptr_freq_band_tab,WORD32 num_scf)301 ixheaace_reset_sbr_noise_floor_estimate(ixheaace_pstr_noise_flr_est_sbr pstr_noise_floor_est_sbr,
302                                         const UWORD8 *ptr_freq_band_tab, WORD32 num_scf) {
303   WORD32 k2, kx;
304 
305   k2 = ptr_freq_band_tab[num_scf];
306   kx = ptr_freq_band_tab[0];
307 
308   if (pstr_noise_floor_est_sbr->noise_groups == 0) {
309     pstr_noise_floor_est_sbr->num_of_noise_bands = 1;
310   } else {
311     pstr_noise_floor_est_sbr->num_of_noise_bands = (WORD32)(
312         (pstr_noise_floor_est_sbr->noise_groups * log((FLOAT32)k2 / kx) * SBR_INV_LOG_2) + 0.5f);
313 
314     if (pstr_noise_floor_est_sbr->num_of_noise_bands == 0) {
315       pstr_noise_floor_est_sbr->num_of_noise_bands = 1;
316     }
317   }
318 
319   return ia_enhaacplus_enc_down_sample_lo_res(pstr_noise_floor_est_sbr->s_freq_qmf_band_tbl,
320                                               pstr_noise_floor_est_sbr->num_of_noise_bands,
321                                               ptr_freq_band_tab, num_scf);
322 }
323