xref: /aosp_15_r20/external/libxaac/encoder/iusace_signal_classifier.h (revision 15dc779a375ca8b5125643b829a8aa4b70d7f451)
1 /******************************************************************************
2  *                                                                            *
3  * Copyright (C) 2023 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 
21 #pragma once
22 #define FD_MODE 2
23 #define TD_MODE 0
24 #define MIN_POW -200
25 #define INDEXOFLOWFREQUENCY 160
26 
27 #define NFRAMEAHEAD 1
28 #define AVE_TONAL_LENGTH 100
29 #define AVE_TONAL_LENGTH_SHORT 10
30 #define SPECTRAL_TILT_LENGTH 80
31 #define SPECTRAL_TILT_LENGTH_SHORT 20
32 #define SMOOTHING_LENGTH 100
33 
34 #define NO_BORDER 0
35 #define BORDER_MUSIC_SPEECH 1
36 #define BORDER_MUSIC_SPEECH_DEFINITE 2
37 #define BORDER_SPEECH_MUSIC 3
38 #define BORDER_SPEECH_MUSIC_DEFINITE 4
39 
40 #define TBD 0
41 #define SPEECH_DEFINITE 1
42 #define SPEECH 2
43 #define MUSIC_DEFINITE 3
44 #define MUSIC 4
45 #define LOG_1024_BASE_10 3.01029995664f
46 #define LOG_768_BASE_10 (2.88536122003f)
47 
48 typedef struct {
49   WORD32 smoothing_result_buf[100];            /**<buffer of smoothed mode decisions */
50   WORD32 init_result_behind[100];              /**<buffer of past mode decisions */
51   WORD32 init_result_ahead[NFRAMEAHEAD];       /**<buffer of ahead mode decisions */
52   WORD32 flag_border_buf_behind[10];           /**<buffer of past border flags */
53   WORD32 flag_border_buf_ahead[NFRAMEAHEAD];   /**<buffer of ahead border flags */
54   FLOAT32 frame_energy_buf_behind[10];         /**<buffer of past energies */
55   FLOAT32 frame_energy_buf_ahead[NFRAMEAHEAD]; /**<buffer of ahead energies */
56 } ia_classification_buf_struct;
57 
58 typedef struct {
59   WORD32 coding_mode; /**< coding mode of the frame */
60   WORD32 pre_mode;    /**< coding mode of the previous frame */
61 
62   FLOAT32 input_samples[3840 * 2];
63   WORD32 n_buffer_samples;
64   WORD32 class_buf[10];
65   WORD32 n_buf_class;
66   WORD32 n_class_frames;
67 
68   WORD32 is_switch_mode;
69 
70   WORD32 framecnt;
71   WORD32 init_flag;
72   WORD32 framecnt_xm;
73 
74   ia_classification_buf_struct buffers;
75   FLOAT32 spec_tilt_buf[100]; /* buffer of spectral tilt */
76   WORD32 n_tonal[100];        /* buffer of tonal */
77   WORD32 n_tonal_low_frequency[100];
78   FLOAT32 msd_spec_tilt_buf[5];
79   FLOAT32 msd_spec_tilt_short_buf[5]; /* buffer of the MSD of spectral tilt */
80   FLOAT32 ave_n_tonal_short_buf[5];
81   FLOAT32 ave_n_tonal_buf[5]; /* buffer of the AVE of tonal */
82 } ia_classification_struct;
83 
84 typedef struct {
85   FLOAT32 *time_signal;          /**<input signals */
86   WORD32 framecnt_xm;            /**<frame counter
87                                   */
88   WORD32 *n_tonal;               /**<buffer of the numbers of tonal
89                                   */
90   WORD32 *n_tonal_low_frequency; /**<buffer of the numbers of tonal in the low frequency domain
91                                   */
92   FLOAT32 *n_tonal_low_frequency_ratio; /**<the ratio of distribution of the numbers of tonal in
93                                            the low frequency domain*/
94   FLOAT32 *ave_n_tonal;                 /**<long - term AVE of tonal
95                                          */
96   FLOAT32 *ave_n_tonal_short;           /**<short - term AVE of tonal */
97 } ia_tonal_params_struct;
98 
99 typedef struct {
100   WORD32 framecnt;                     /**< frame counter*/
101   WORD32 *framecnt_xm;                 /**< frame counter*/
102   WORD32 *flag_border;                 /**< flag of current border*/
103   FLOAT32 ave_n_tonal_short;           /**< short - term AVE of tonal*/
104   FLOAT32 ave_n_tonal;                 /**< long - term AVE of tonal*/
105   FLOAT32 *ave_n_tonal_short_buf;      /**< buffer of short - term AVE of tonal*/
106   FLOAT32 *ave_n_tonal_buf;            /**< buffer long - term AVE of tonal*/
107   FLOAT32 msd_spec_tilt;               /**< long - term MSD of spectral tilt*/
108   FLOAT32 msd_spec_tilt_short;         /**< short - term MSD of spectral tilt*/
109   FLOAT32 *msd_spec_tilt_buf;          /**< buffer of long - term MSD of spectral tilt*/
110   FLOAT32 *msd_spec_tilt_short_buf;    /**< buffer of short - term MSD of spectral tilt*/
111   FLOAT32 n_tonal_low_frequency_ratio; /**< the ratio of distribution of the numbers of tonal in
112                                           the low frequency domain*/
113   FLOAT32 frame_energy;                /**< the energy of current frame*/
114 } ia_mode_params_struct;
115 
116 typedef struct {
117   WORD32 init_mode_decision_result; /**<  initial mode decision				*/
118   WORD32 *init_result_behind;       /**<  buffer of past mode decisions		*/
119   WORD32 *init_result_ahead;        /**<  buffer of ahead mode decisions	*/
120   WORD32 flag_border;               /**<  current flag of border			*/
121   WORD32 *flag_border_buf_behind;   /**<  buffer of past flags of border	*/
122   WORD32 *flag_border_buf_ahead;    /**<  buffer of ahead flags of border	*/
123   FLOAT32 frame_energy;             /**<  the energy of current frame		*/
124   FLOAT32 *frame_energy_buf_behind; /**<  buffer of past frame energies		*/
125   FLOAT32 *frame_energy_buf_ahead;  /**<  buffer of ahead frame energies	*/
126   WORD32 *smoothing_result_buf;     /**<  buffer of smoothed mode decision	*/
127   WORD32 flag_speech_definite;
128   WORD32 count_small_energy;
129   WORD32 flag_music_definite;
130   WORD32 num_smoothing;
131 } ia_smooth_params_struct; /**<  final mode decision result        */
132 
133 typedef struct {
134   FLOAT32 *time_signal;         /**<input signals                    */
135   WORD32 framecnt_xm;           /**<frame counter					*/
136   FLOAT32 *spec_tilt_buf;       /**<buffer of spectral tilt			*/
137   FLOAT32 *msd_spec_tilt;       /**<long - term MSD of spectral tilt	*/
138   FLOAT32 *msd_spec_tilt_short; /**<short - term MSD of spectral tilt*/
139   FLOAT32 frame_energy;
140 } ia_spec_tilt_params_struct;
141 
142 typedef struct {
143   const FLOAT64 twiddle_table_fft_float[514];
144   const FLOAT64 hanning_window_1024[FRAME_LEN_LONG];
145   const FLOAT64 absolute_threshold_1024[FRAME_LEN_LONG / 2];
146   const FLOAT64 hanning_window_768[LEN_SUPERFRAME_768];
147   const FLOAT64 absolute_threshold_768[LEN_SUPERFRAME_768 / 2];
148 } ia_signal_classifier_tables;
149 
150 extern const ia_signal_classifier_tables iusace_classify_arrays;
151 
152 VOID iusace_init_classification(ia_classification_struct *pstr_sig_class);
153