1 /****************************************************************************** 2 * * 3 * Copyright (C) 2023 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 #pragma once 22 #define BS_MAX_NUM_OUT_CHANNELS (255) 23 #define MINIMUM_BITRATE 8000 24 25 typedef struct { 26 FLOAT64 *p_fd_mdct_windowed_long_buf; 27 FLOAT64 *p_fd_mdct_windowed_short_buf; 28 FLOAT32 *p_fft_mdct_buf; 29 FLOAT64 *p_sort_grouping_scratch; 30 WORD32 *p_degroup_scratch; 31 WORD32 *p_arith_map_prev_scratch; 32 WORD32 *p_arith_map_pres_scratch; 33 FLOAT64 *p_noise_filling_highest_tone; 34 FLOAT32 *p_lpd_frm_enc_scratch; 35 FLOAT64 *p_quant_spectrum_spec_scratch; 36 UWORD8 *ptr_scratch_buf; 37 FLOAT32 *p_synth_tcx_buf; 38 FLOAT32 *p_synth_buf; 39 FLOAT32 *p_wsig_buf; 40 FLOAT32 *p_wsyn_buf; 41 FLOAT32 *p_wsyn_tcx_buf; 42 FLOAT32 *p_temp_wsyn_buf; 43 FLOAT32 *p_buf_aut_corr; 44 FLOAT32 *p_buf_synthesis_tool; 45 FLOAT32 *p_buf_speech; 46 FLOAT32 *p_buf_res; 47 FLOAT32 *p_buf_signal; 48 FLOAT32 *p_lp_filter_coeff; 49 FLOAT32 *p_lp_filter_coeff_q; 50 WORD32 *p_prm_tcx; 51 FLOAT32 *p_wsp_prev_buf; 52 FLOAT32 *p_xn2; 53 FLOAT32 *p_fac_dec; 54 FLOAT32 *p_right_fac_spec; 55 FLOAT32 *p_x2; 56 WORD32 *p_param; 57 FLOAT32 *p_x; 58 FLOAT32 *p_xn_2; 59 FLOAT32 *p_fac_window; 60 FLOAT32 *p_temp_mdct; 61 WORD16 *p_fac_bits_word; 62 FLOAT64 *p_left_fac_time_data; 63 FLOAT32 *p_left_fac_timedata_flt; 64 FLOAT32 *p_left_fac_spec; 65 FLOAT64 *p_fac_win; 66 WORD32 *p_fac_prm; 67 FLOAT32 *p_acelp_folded_scratch; 68 FLOAT32 *p_xn1_tcx; 69 FLOAT32 *p_xn_buf_tcx; 70 FLOAT32 *p_x_tcx; 71 FLOAT32 *p_x_tmp_tcx; 72 FLOAT32 *p_en_tcx; 73 FLOAT32 *p_alfd_gains_tcx; 74 FLOAT32 *p_sq_enc_tcx; 75 WORD32 *p_sq_quant_tcx; 76 FLOAT32 *p_gain1_tcx; 77 FLOAT32 *p_gain2_tcx; 78 FLOAT32 *p_facelp_tcx; 79 FLOAT32 *p_xn2_tcx; 80 FLOAT32 *p_fac_window_tcx; 81 FLOAT32 *p_x1_tcx; 82 FLOAT32 *p_x2_tcx; 83 WORD32 *p_y_tcx; 84 FLOAT32 *p_in_out_tcx; 85 FLOAT32 *p_time_signal; 86 FLOAT32 *p_complex_fft; 87 WORD32 *p_tonal_flag; 88 FLOAT32 *p_pow_spec; 89 FLOAT64 *p_tns_scratch; 90 FLOAT64 *p_tns_filter; 91 FLOAT32 *p_exp_spec; 92 FLOAT32 *p_mdct_spec_float; 93 FLOAT32 *p_fir_sig_buf; 94 FLOAT32 *p_sq_gain_en; 95 FLOAT32 *p_acelp_ir_buf; 96 FLOAT32 *p_acelp_exc_buf; 97 FLOAT32 *p_adjthr_ptr_exp_spec; 98 FLOAT32 *p_adjthr_mdct_spec_float; 99 WORD16 *p_adjthr_quant_spec_temp; 100 FLOAT64 *p_cmpx_mdct_temp_buf; 101 FLOAT32 *p_fft_p2_y; 102 FLOAT32 *p_fft_p3_data_3; 103 FLOAT32 *p_fft_p3_y; 104 FLOAT32 *p_tcx_input; 105 FLOAT32 *p_tcx_output; 106 FLOAT64 *p_reconstructed_time_signal[MAX_TIME_CHANNELS]; 107 FLOAT32 *p_ol_pitch_buf_tmp; 108 FLOAT32 *p_ol_pitch_speech_buf; 109 FLOAT32 *p_ol_pitch_w_table; 110 FLOAT32 *p_ol_pitch_R; 111 FLOAT32 *p_ol_pitch_R0; 112 WORD32 *ptr_num_fac_bits; 113 WORD32 *ptr_tns_data_present; 114 FLOAT32 *ptr_tmp_lp_res; 115 116 FLOAT32 *ptr_sfb_form_fac[MAX_TIME_CHANNELS]; 117 FLOAT32 *ptr_sfb_num_relevant_lines[MAX_TIME_CHANNELS]; 118 FLOAT32 *ptr_sfb_ld_energy[MAX_TIME_CHANNELS]; 119 WORD32 *ptr_num_scfs; 120 WORD32 *ptr_max_ch_dyn_bits; 121 FLOAT32 *ptr_ch_bit_dist; 122 pUWORD8 ptr_fd_scratch; 123 pUWORD8 ptr_lpd_scratch; 124 FLOAT32 *ptr_tcx_scratch; 125 FLOAT64 *ptr_tns_scratch; 126 WORD32 *ptr_next_win_scratch; 127 FLOAT32 *ptr_acelp_scratch; 128 FLOAT32 mixed_rad_fft[2 * LEN_SUPERFRAME]; 129 pVOID drc_scratch; 130 VOID *ptr_drc_scratch_buf; 131 VOID *ptr_stack_mem; 132 } iusace_scratch_mem; 133 134 #define USAC_MAX_ELEMENTS (32) 135 #define USAC_MAX_CONFIG_EXTENSIONS (16) 136 137 #define ID_USAC_SCE 0 138 #define ID_USAC_CPE 1 139 #define ID_USAC_EXT 3 140 141 #define AOT_SBR (5) 142 #define AOT_USAC (42) 143 144 #define ID_EXT_ELE_FILL 0 145 #define ID_EXT_ELE_UNI_DRC 4 146 #define ID_EXT_ELE_AUDIOPREROLL (3) 147 148 #define ID_CONFIG_EXT_FILL 0 149 #define ID_CONFIG_EXT_DOWNMIX (1) 150 #define ID_CONFIG_EXT_LOUDNESS_INFO (2) 151 #define ID_CONFIG_EXT_STREAM_ID (7) 152 #define CONFIG_EXT_LEN_STREAM_ID (2) 153 #define NUM_COEFF (1024) 154 155 typedef enum { 156 157 USAC_ELEMENT_TYPE_INVALID = -1, 158 USAC_ELEMENT_TYPE_SCE = 0, 159 USAC_ELEMENT_TYPE_CPE = 1, 160 USAC_ELEMENT_TYPE_EXT = 3 161 162 } ia_usac_ele_type; 163 164 typedef struct { 165 UWORD32 harmonic_sbr; 166 UWORD32 bs_inter_tes; 167 UWORD32 bs_pvc; 168 UWORD32 dflt_start_freq; 169 UWORD32 dflt_stop_freq; 170 UWORD32 dflt_header_extra1; 171 UWORD32 dflt_header_extra2; 172 UWORD32 dflt_freq_scale; 173 UWORD32 dflt_alter_scale; 174 UWORD32 dflt_noise_bands; 175 UWORD32 dflt_limiter_bands; 176 UWORD32 dflt_limiter_gains; 177 UWORD32 dflt_interpol_freq; 178 UWORD32 dflt_smoothing_mode; 179 } ia_usac_enc_sbr_config_struct; 180 181 typedef struct { 182 WORD32 bs_tree_config; 183 WORD32 bs_freq_res; 184 WORD32 bs_fixed_gain_dmx; 185 WORD32 bs_temp_shape_config; 186 WORD32 bs_decorr_config; 187 WORD32 bs_residual_coding; 188 WORD32 bs_residual_bands; 189 WORD32 bs_low_rate_mode; 190 WORD32 bs_phase_coding; 191 WORD32 bs_quant_coarse_xxx; 192 WORD32 bs_ott_bands_phase; 193 WORD32 bs_ott_bands_phase_present; 194 WORD32 bs_pseudo_lr; 195 WORD32 bs_env_quant_mode; 196 WORD32 bs_high_rate_mode; 197 } ia_usac_enc_mps_config_struct; 198 199 typedef struct { 200 UWORD32 usac_ext_ele_type; 201 UWORD32 usac_ext_ele_cfg_len; 202 UWORD32 usac_ext_ele_dflt_len_present; 203 UWORD32 usac_ext_ele_dflt_len; 204 UWORD32 usac_ext_ele_payload_present; 205 UWORD32 stereo_config_index; 206 UWORD32 tw_mdct; 207 UWORD32 noise_filling; 208 UWORD8 usac_ext_ele_cfg_payload[6144 / 8]; 209 ia_usac_enc_sbr_config_struct str_usac_sbr_config; 210 ia_usac_enc_mps_config_struct str_usac_mps212_config; 211 UWORD8 *drc_config_data; 212 } ia_usac_enc_element_config_struct; 213 214 typedef struct { 215 UWORD32 num_elements; 216 UWORD32 num_ext_elements; 217 UWORD32 usac_element_type[USAC_MAX_ELEMENTS]; 218 UWORD32 usac_cfg_ext_present; 219 UWORD32 num_config_extensions; 220 UWORD32 usac_config_ext_type[USAC_MAX_CONFIG_EXTENSIONS]; 221 UWORD32 usac_config_ext_len[USAC_MAX_CONFIG_EXTENSIONS]; 222 UWORD8 *usac_config_ext_buf[USAC_MAX_CONFIG_EXTENSIONS]; 223 UWORD8 usac_cfg_ext_info_buf[USAC_MAX_CONFIG_EXTENSIONS][6144 / 8]; 224 WORD32 num_out_channels; 225 WORD32 num_signal_grp; 226 WORD32 output_channel_pos[BS_MAX_NUM_OUT_CHANNELS]; 227 WORD32 ccfl; 228 ia_usac_enc_element_config_struct str_usac_element_config[USAC_MAX_ELEMENTS]; 229 UWORD16 stream_identifier; 230 } ia_usac_config_struct; 231 232 typedef struct { 233 WORD32 aac_allow_scalefacs; 234 WORD32 aac_scale_facs; 235 WORD32 bit_rate; 236 WORD32 basic_bitrate; 237 WORD32 bw_limit[USAC_MAX_ELEMENTS]; 238 WORD32 ccfl; 239 WORD32 ccfl_idx; 240 WORD32 channels; 241 WORD32 codec_mode; 242 WORD32 flag_noiseFilling; 243 WORD32 iframes_interval; 244 UWORD32 num_elements; 245 UWORD32 num_ext_elements; 246 247 WORD32 sample_rate; 248 WORD32 native_sample_rate; 249 WORD32 core_sample_rate; 250 251 WORD32 tns_select; 252 WORD32 ui_pcm_wd_sz; 253 WORD32 use_fill_element; 254 WORD32 window_shape_prev[MAX_TIME_CHANNELS]; 255 WORD32 window_shape_prev_copy[MAX_TIME_CHANNELS]; 256 WORD32 window_sequence[MAX_TIME_CHANNELS]; 257 WORD32 window_sequence_prev[MAX_TIME_CHANNELS]; 258 WORD32 window_sequence_prev_copy[MAX_TIME_CHANNELS]; 259 WORD32 cmplx_pred_flag; 260 WORD32 wshape_flag; 261 WORD32 delay_total; 262 WORD32 in_frame_length; 263 // eSBR Parameters 264 WORD32 sbr_enable; 265 WORD32 sbr_ratio_idx; 266 WORD32 up_sample_ratio; 267 WORD32 sbr_pvc_active; 268 WORD32 sbr_harmonic; 269 WORD32 hq_esbr; 270 WORD32 sbr_inter_tes_active; 271 // MPS Parameters 272 WORD32 usac212enable; 273 ia_sfb_params_struct str_sfb_prms; 274 // DRC Params 275 FLAG use_drc_element; 276 WORD32 drc_frame_size; 277 ia_drc_input_config str_drc_cfg; 278 WORD32 use_acelp_only; 279 WORD32 random_access_interval; 280 WORD32 preroll_flag; 281 WORD32 num_preroll_frames; 282 WORD32 preroll_idx; 283 WORD32 is_ipf; 284 WORD32 preroll_frame; 285 WORD32 is_first_frame; 286 ia_drc_internal_config str_internal_drc_cfg; 287 WORD32 use_measured_loudness; 288 UWORD16 stream_id; 289 } ia_usac_encoder_config_struct; 290 291 typedef struct { 292 WORD32 mode; 293 WORD32 num_bits; 294 FLOAT32 lpc_coeffs_quant[2 * (ORDER + 1)]; 295 FLOAT32 lpc_coeffs[2 * (ORDER + 1)]; 296 FLOAT32 synth[ORDER + 128]; 297 FLOAT32 wsynth[1 + 128]; 298 FLOAT32 acelp_exc[2 * LEN_FRAME]; 299 WORD32 avq_params[FAC_LENGTH]; 300 FLOAT32 tcx_mem[128]; 301 FLOAT32 tcx_quant[1 + (2 * 128)]; 302 FLOAT32 tcx_fac; 303 FLOAT32 mem_wsyn; 304 } ia_usac_lpd_state_struct; 305 306 typedef struct { 307 WORD32 len_frame; 308 WORD32 len_subfrm; 309 WORD32 num_subfrm; 310 WORD16 acelp_core_mode; 311 WORD32 fscale; 312 FLOAT32 mem_lp_decim2[3]; 313 WORD32 decim_frac; 314 FLOAT32 mem_sig_in[4]; 315 FLOAT32 mem_preemph; 316 FLOAT32 old_speech_pe[L_OLD_SPEECH_HIGH_RATE + LEN_LPC0]; 317 FLOAT32 weighted_sig[128]; 318 ia_usac_lpd_state_struct lpd_state; 319 FLOAT32 prev_wsp[MAX_PITCH / OPL_DECIM]; 320 FLOAT32 prev_exc[MAX_PITCH + LEN_INTERPOL]; 321 FLOAT32 prev_wsyn_mem; 322 FLOAT32 prev_wsp_mem; 323 FLOAT32 prev_xnq_mem; 324 WORD32 prev_ovlp_size; 325 FLOAT32 isf_old[ORDER]; 326 FLOAT32 isp_old[ORDER]; 327 FLOAT32 isp_old_q[ORDER]; 328 FLOAT32 mem_wsp; 329 FLOAT32 ada_w; 330 FLOAT32 ol_gain; 331 WORD16 ol_wght_flg; 332 WORD32 prev_ol_lags[5]; 333 WORD32 prev_pitch_med; 334 FLOAT32 prev_hp_wsp[LEN_SUPERFRAME / OPL_DECIM + (MAX_PITCH / OPL_DECIM)]; 335 FLOAT32 hp_ol_ltp_mem[3 * 2 + 1]; 336 const FLOAT32 *lp_analysis_window; 337 FLOAT32 xn_buffer[128]; 338 WORD32 c_prev[(NUM_COEFF / 2) + 4]; 339 WORD32 c_pres[(NUM_COEFF / 2) + 4]; 340 WORD32 arith_reset_flag; 341 WORD16 prev_mode; 342 WORD32 num_bits_per_supfrm; 343 FLOAT32 fd_synth[2 * LEN_FRAME + 1 + ORDER]; 344 FLOAT32 fd_orig[2 * LEN_FRAME + 1 + ORDER]; 345 WORD32 low_pass_line; 346 WORD32 last_was_short; 347 WORD32 next_is_short; 348 FLOAT32 gain_tcx; 349 WORD32 max_sfb_short; 350 } ia_usac_td_encoder_struct; 351