xref: /aosp_15_r20/external/libxaac/encoder/iusace_config.h (revision 15dc779a375ca8b5125643b829a8aa4b70d7f451)
1 /******************************************************************************
2  *                                                                            *
3  * Copyright (C) 2023 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 
21 #pragma once
22 #define BS_MAX_NUM_OUT_CHANNELS (255)
23 #define MINIMUM_BITRATE 8000
24 
25 typedef struct {
26   FLOAT64 *p_fd_mdct_windowed_long_buf;
27   FLOAT64 *p_fd_mdct_windowed_short_buf;
28   FLOAT32 *p_fft_mdct_buf;
29   FLOAT64 *p_sort_grouping_scratch;
30   WORD32 *p_degroup_scratch;
31   WORD32 *p_arith_map_prev_scratch;
32   WORD32 *p_arith_map_pres_scratch;
33   FLOAT64 *p_noise_filling_highest_tone;
34   FLOAT32 *p_lpd_frm_enc_scratch;
35   FLOAT64 *p_quant_spectrum_spec_scratch;
36   UWORD8 *ptr_scratch_buf;
37   FLOAT32 *p_synth_tcx_buf;
38   FLOAT32 *p_synth_buf;
39   FLOAT32 *p_wsig_buf;
40   FLOAT32 *p_wsyn_buf;
41   FLOAT32 *p_wsyn_tcx_buf;
42   FLOAT32 *p_temp_wsyn_buf;
43   FLOAT32 *p_buf_aut_corr;
44   FLOAT32 *p_buf_synthesis_tool;
45   FLOAT32 *p_buf_speech;
46   FLOAT32 *p_buf_res;
47   FLOAT32 *p_buf_signal;
48   FLOAT32 *p_lp_filter_coeff;
49   FLOAT32 *p_lp_filter_coeff_q;
50   WORD32 *p_prm_tcx;
51   FLOAT32 *p_wsp_prev_buf;
52   FLOAT32 *p_xn2;
53   FLOAT32 *p_fac_dec;
54   FLOAT32 *p_right_fac_spec;
55   FLOAT32 *p_x2;
56   WORD32 *p_param;
57   FLOAT32 *p_x;
58   FLOAT32 *p_xn_2;
59   FLOAT32 *p_fac_window;
60   FLOAT32 *p_temp_mdct;
61   WORD16 *p_fac_bits_word;
62   FLOAT64 *p_left_fac_time_data;
63   FLOAT32 *p_left_fac_timedata_flt;
64   FLOAT32 *p_left_fac_spec;
65   FLOAT64 *p_fac_win;
66   WORD32 *p_fac_prm;
67   FLOAT32 *p_acelp_folded_scratch;
68   FLOAT32 *p_xn1_tcx;
69   FLOAT32 *p_xn_buf_tcx;
70   FLOAT32 *p_x_tcx;
71   FLOAT32 *p_x_tmp_tcx;
72   FLOAT32 *p_en_tcx;
73   FLOAT32 *p_alfd_gains_tcx;
74   FLOAT32 *p_sq_enc_tcx;
75   WORD32 *p_sq_quant_tcx;
76   FLOAT32 *p_gain1_tcx;
77   FLOAT32 *p_gain2_tcx;
78   FLOAT32 *p_facelp_tcx;
79   FLOAT32 *p_xn2_tcx;
80   FLOAT32 *p_fac_window_tcx;
81   FLOAT32 *p_x1_tcx;
82   FLOAT32 *p_x2_tcx;
83   WORD32 *p_y_tcx;
84   FLOAT32 *p_in_out_tcx;
85   FLOAT32 *p_time_signal;
86   FLOAT32 *p_complex_fft;
87   WORD32 *p_tonal_flag;
88   FLOAT32 *p_pow_spec;
89   FLOAT64 *p_tns_scratch;
90   FLOAT64 *p_tns_filter;
91   FLOAT32 *p_exp_spec;
92   FLOAT32 *p_mdct_spec_float;
93   FLOAT32 *p_fir_sig_buf;
94   FLOAT32 *p_sq_gain_en;
95   FLOAT32 *p_acelp_ir_buf;
96   FLOAT32 *p_acelp_exc_buf;
97   FLOAT32 *p_adjthr_ptr_exp_spec;
98   FLOAT32 *p_adjthr_mdct_spec_float;
99   WORD16 *p_adjthr_quant_spec_temp;
100   FLOAT64 *p_cmpx_mdct_temp_buf;
101   FLOAT32 *p_fft_p2_y;
102   FLOAT32 *p_fft_p3_data_3;
103   FLOAT32 *p_fft_p3_y;
104   FLOAT32 *p_tcx_input;
105   FLOAT32 *p_tcx_output;
106   FLOAT64 *p_reconstructed_time_signal[MAX_TIME_CHANNELS];
107   FLOAT32 *p_ol_pitch_buf_tmp;
108   FLOAT32 *p_ol_pitch_speech_buf;
109   FLOAT32 *p_ol_pitch_w_table;
110   FLOAT32 *p_ol_pitch_R;
111   FLOAT32 *p_ol_pitch_R0;
112   WORD32 *ptr_num_fac_bits;
113   WORD32 *ptr_tns_data_present;
114   FLOAT32 *ptr_tmp_lp_res;
115 
116   FLOAT32 *ptr_sfb_form_fac[MAX_TIME_CHANNELS];
117   FLOAT32 *ptr_sfb_num_relevant_lines[MAX_TIME_CHANNELS];
118   FLOAT32 *ptr_sfb_ld_energy[MAX_TIME_CHANNELS];
119   WORD32 *ptr_num_scfs;
120   WORD32 *ptr_max_ch_dyn_bits;
121   FLOAT32 *ptr_ch_bit_dist;
122   pUWORD8 ptr_fd_scratch;
123   pUWORD8 ptr_lpd_scratch;
124   FLOAT32 *ptr_tcx_scratch;
125   FLOAT64 *ptr_tns_scratch;
126   WORD32 *ptr_next_win_scratch;
127   FLOAT32 *ptr_acelp_scratch;
128   FLOAT32 mixed_rad_fft[2 * LEN_SUPERFRAME];
129   pVOID drc_scratch;
130   VOID *ptr_drc_scratch_buf;
131   VOID *ptr_stack_mem;
132 } iusace_scratch_mem;
133 
134 #define USAC_MAX_ELEMENTS (32)
135 #define USAC_MAX_CONFIG_EXTENSIONS (16)
136 
137 #define ID_USAC_SCE 0
138 #define ID_USAC_CPE 1
139 #define ID_USAC_EXT 3
140 
141 #define AOT_SBR (5)
142 #define AOT_USAC (42)
143 
144 #define ID_EXT_ELE_FILL 0
145 #define ID_EXT_ELE_UNI_DRC 4
146 #define ID_EXT_ELE_AUDIOPREROLL (3)
147 
148 #define ID_CONFIG_EXT_FILL 0
149 #define ID_CONFIG_EXT_DOWNMIX (1)
150 #define ID_CONFIG_EXT_LOUDNESS_INFO (2)
151 #define ID_CONFIG_EXT_STREAM_ID (7)
152 #define CONFIG_EXT_LEN_STREAM_ID (2)
153 #define NUM_COEFF (1024)
154 
155 typedef enum {
156 
157   USAC_ELEMENT_TYPE_INVALID = -1,
158   USAC_ELEMENT_TYPE_SCE = 0,
159   USAC_ELEMENT_TYPE_CPE = 1,
160   USAC_ELEMENT_TYPE_EXT = 3
161 
162 } ia_usac_ele_type;
163 
164 typedef struct {
165   UWORD32 harmonic_sbr;
166   UWORD32 bs_inter_tes;
167   UWORD32 bs_pvc;
168   UWORD32 dflt_start_freq;
169   UWORD32 dflt_stop_freq;
170   UWORD32 dflt_header_extra1;
171   UWORD32 dflt_header_extra2;
172   UWORD32 dflt_freq_scale;
173   UWORD32 dflt_alter_scale;
174   UWORD32 dflt_noise_bands;
175   UWORD32 dflt_limiter_bands;
176   UWORD32 dflt_limiter_gains;
177   UWORD32 dflt_interpol_freq;
178   UWORD32 dflt_smoothing_mode;
179 } ia_usac_enc_sbr_config_struct;
180 
181 typedef struct {
182   WORD32 bs_tree_config;
183   WORD32 bs_freq_res;
184   WORD32 bs_fixed_gain_dmx;
185   WORD32 bs_temp_shape_config;
186   WORD32 bs_decorr_config;
187   WORD32 bs_residual_coding;
188   WORD32 bs_residual_bands;
189   WORD32 bs_low_rate_mode;
190   WORD32 bs_phase_coding;
191   WORD32 bs_quant_coarse_xxx;
192   WORD32 bs_ott_bands_phase;
193   WORD32 bs_ott_bands_phase_present;
194   WORD32 bs_pseudo_lr;
195   WORD32 bs_env_quant_mode;
196   WORD32 bs_high_rate_mode;
197 } ia_usac_enc_mps_config_struct;
198 
199 typedef struct {
200   UWORD32 usac_ext_ele_type;
201   UWORD32 usac_ext_ele_cfg_len;
202   UWORD32 usac_ext_ele_dflt_len_present;
203   UWORD32 usac_ext_ele_dflt_len;
204   UWORD32 usac_ext_ele_payload_present;
205   UWORD32 stereo_config_index;
206   UWORD32 tw_mdct;
207   UWORD32 noise_filling;
208   UWORD8 usac_ext_ele_cfg_payload[6144 / 8];
209   ia_usac_enc_sbr_config_struct str_usac_sbr_config;
210   ia_usac_enc_mps_config_struct str_usac_mps212_config;
211   UWORD8 *drc_config_data;
212 } ia_usac_enc_element_config_struct;
213 
214 typedef struct {
215   UWORD32 num_elements;
216   UWORD32 num_ext_elements;
217   UWORD32 usac_element_type[USAC_MAX_ELEMENTS];
218   UWORD32 usac_cfg_ext_present;
219   UWORD32 num_config_extensions;
220   UWORD32 usac_config_ext_type[USAC_MAX_CONFIG_EXTENSIONS];
221   UWORD32 usac_config_ext_len[USAC_MAX_CONFIG_EXTENSIONS];
222   UWORD8 *usac_config_ext_buf[USAC_MAX_CONFIG_EXTENSIONS];
223   UWORD8 usac_cfg_ext_info_buf[USAC_MAX_CONFIG_EXTENSIONS][6144 / 8];
224   WORD32 num_out_channels;
225   WORD32 num_signal_grp;
226   WORD32 output_channel_pos[BS_MAX_NUM_OUT_CHANNELS];
227   WORD32 ccfl;
228   ia_usac_enc_element_config_struct str_usac_element_config[USAC_MAX_ELEMENTS];
229   UWORD16 stream_identifier;
230 } ia_usac_config_struct;
231 
232 typedef struct {
233   WORD32 aac_allow_scalefacs;
234   WORD32 aac_scale_facs;
235   WORD32 bit_rate;
236   WORD32 basic_bitrate;
237   WORD32 bw_limit[USAC_MAX_ELEMENTS];
238   WORD32 ccfl;
239   WORD32 ccfl_idx;
240   WORD32 channels;
241   WORD32 codec_mode;
242   WORD32 flag_noiseFilling;
243   WORD32 iframes_interval;
244   UWORD32 num_elements;
245   UWORD32 num_ext_elements;
246 
247   WORD32 sample_rate;
248   WORD32 native_sample_rate;
249   WORD32 core_sample_rate;
250 
251   WORD32 tns_select;
252   WORD32 ui_pcm_wd_sz;
253   WORD32 use_fill_element;
254   WORD32 window_shape_prev[MAX_TIME_CHANNELS];
255   WORD32 window_shape_prev_copy[MAX_TIME_CHANNELS];
256   WORD32 window_sequence[MAX_TIME_CHANNELS];
257   WORD32 window_sequence_prev[MAX_TIME_CHANNELS];
258   WORD32 window_sequence_prev_copy[MAX_TIME_CHANNELS];
259   WORD32 cmplx_pred_flag;
260   WORD32 wshape_flag;
261   WORD32 delay_total;
262   WORD32 in_frame_length;
263   // eSBR Parameters
264   WORD32 sbr_enable;
265   WORD32 sbr_ratio_idx;
266   WORD32 up_sample_ratio;
267   WORD32 sbr_pvc_active;
268   WORD32 sbr_harmonic;
269   WORD32 hq_esbr;
270   WORD32 sbr_inter_tes_active;
271   // MPS Parameters
272   WORD32 usac212enable;
273   ia_sfb_params_struct str_sfb_prms;
274   // DRC Params
275   FLAG use_drc_element;
276   WORD32 drc_frame_size;
277   ia_drc_input_config str_drc_cfg;
278   WORD32 use_acelp_only;
279   WORD32 random_access_interval;
280   WORD32 preroll_flag;
281   WORD32 num_preroll_frames;
282   WORD32 preroll_idx;
283   WORD32 is_ipf;
284   WORD32 preroll_frame;
285   WORD32 is_first_frame;
286   ia_drc_internal_config str_internal_drc_cfg;
287   WORD32 use_measured_loudness;
288   UWORD16 stream_id;
289 } ia_usac_encoder_config_struct;
290 
291 typedef struct {
292   WORD32 mode;
293   WORD32 num_bits;
294   FLOAT32 lpc_coeffs_quant[2 * (ORDER + 1)];
295   FLOAT32 lpc_coeffs[2 * (ORDER + 1)];
296   FLOAT32 synth[ORDER + 128];
297   FLOAT32 wsynth[1 + 128];
298   FLOAT32 acelp_exc[2 * LEN_FRAME];
299   WORD32 avq_params[FAC_LENGTH];
300   FLOAT32 tcx_mem[128];
301   FLOAT32 tcx_quant[1 + (2 * 128)];
302   FLOAT32 tcx_fac;
303   FLOAT32 mem_wsyn;
304 } ia_usac_lpd_state_struct;
305 
306 typedef struct {
307   WORD32 len_frame;
308   WORD32 len_subfrm;
309   WORD32 num_subfrm;
310   WORD16 acelp_core_mode;
311   WORD32 fscale;
312   FLOAT32 mem_lp_decim2[3];
313   WORD32 decim_frac;
314   FLOAT32 mem_sig_in[4];
315   FLOAT32 mem_preemph;
316   FLOAT32 old_speech_pe[L_OLD_SPEECH_HIGH_RATE + LEN_LPC0];
317   FLOAT32 weighted_sig[128];
318   ia_usac_lpd_state_struct lpd_state;
319   FLOAT32 prev_wsp[MAX_PITCH / OPL_DECIM];
320   FLOAT32 prev_exc[MAX_PITCH + LEN_INTERPOL];
321   FLOAT32 prev_wsyn_mem;
322   FLOAT32 prev_wsp_mem;
323   FLOAT32 prev_xnq_mem;
324   WORD32 prev_ovlp_size;
325   FLOAT32 isf_old[ORDER];
326   FLOAT32 isp_old[ORDER];
327   FLOAT32 isp_old_q[ORDER];
328   FLOAT32 mem_wsp;
329   FLOAT32 ada_w;
330   FLOAT32 ol_gain;
331   WORD16 ol_wght_flg;
332   WORD32 prev_ol_lags[5];
333   WORD32 prev_pitch_med;
334   FLOAT32 prev_hp_wsp[LEN_SUPERFRAME / OPL_DECIM + (MAX_PITCH / OPL_DECIM)];
335   FLOAT32 hp_ol_ltp_mem[3 * 2 + 1];
336   const FLOAT32 *lp_analysis_window;
337   FLOAT32 xn_buffer[128];
338   WORD32 c_prev[(NUM_COEFF / 2) + 4];
339   WORD32 c_pres[(NUM_COEFF / 2) + 4];
340   WORD32 arith_reset_flag;
341   WORD16 prev_mode;
342   WORD32 num_bits_per_supfrm;
343   FLOAT32 fd_synth[2 * LEN_FRAME + 1 + ORDER];
344   FLOAT32 fd_orig[2 * LEN_FRAME + 1 + ORDER];
345   WORD32 low_pass_line;
346   WORD32 last_was_short;
347   WORD32 next_is_short;
348   FLOAT32 gain_tcx;
349   WORD32 max_sfb_short;
350 } ia_usac_td_encoder_struct;
351