xref: /aosp_15_r20/external/libopus/dnn/osce_features.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2023 Amazon
2*a58d3d2aSXin Li    Written by Jan Buethe */
3*a58d3d2aSXin Li /*
4*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
5*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
6*a58d3d2aSXin Li    are met:
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
9*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
10*a58d3d2aSXin Li 
11*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
12*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
13*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
14*a58d3d2aSXin Li 
15*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19*a58d3d2aSXin Li    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li */
27*a58d3d2aSXin Li 
28*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
29*a58d3d2aSXin Li #include "config.h"
30*a58d3d2aSXin Li #endif
31*a58d3d2aSXin Li 
32*a58d3d2aSXin Li #define OSCE_SPEC_WINDOW_SIZE 320
33*a58d3d2aSXin Li #define OSCE_SPEC_NUM_FREQS 161
34*a58d3d2aSXin Li 
35*a58d3d2aSXin Li 
36*a58d3d2aSXin Li /*DEBUG*/
37*a58d3d2aSXin Li /*#define WRITE_FEATURES*/
38*a58d3d2aSXin Li /*#define DEBUG_PRING*/
39*a58d3d2aSXin Li /*******/
40*a58d3d2aSXin Li 
41*a58d3d2aSXin Li #include "stack_alloc.h"
42*a58d3d2aSXin Li #include "osce_features.h"
43*a58d3d2aSXin Li #include "kiss_fft.h"
44*a58d3d2aSXin Li #include "os_support.h"
45*a58d3d2aSXin Li #include "osce.h"
46*a58d3d2aSXin Li #include "freq.h"
47*a58d3d2aSXin Li 
48*a58d3d2aSXin Li 
49*a58d3d2aSXin Li #if defined(WRITE_FEATURES) || defined(DEBUG_PRING)
50*a58d3d2aSXin Li #include <stdio.h>
51*a58d3d2aSXin Li #include <stdlib.h>
52*a58d3d2aSXin Li #endif
53*a58d3d2aSXin Li 
54*a58d3d2aSXin Li static const int center_bins_clean[64] = {
55*a58d3d2aSXin Li       0,      2,      5,      8,     10,     12,     15,     18,
56*a58d3d2aSXin Li      20,     22,     25,     28,     30,     33,     35,     38,
57*a58d3d2aSXin Li      40,     42,     45,     48,     50,     52,     55,     58,
58*a58d3d2aSXin Li      60,     62,     65,     68,     70,     73,     75,     78,
59*a58d3d2aSXin Li      80,     82,     85,     88,     90,     92,     95,     98,
60*a58d3d2aSXin Li     100,    102,    105,    108,    110,    112,    115,    118,
61*a58d3d2aSXin Li     120,    122,    125,    128,    130,    132,    135,    138,
62*a58d3d2aSXin Li     140,    142,    145,    148,    150,    152,    155,    160
63*a58d3d2aSXin Li };
64*a58d3d2aSXin Li 
65*a58d3d2aSXin Li static const int center_bins_noisy[18] = {
66*a58d3d2aSXin Li       0,      4,      8,     12,     16,     20,     24,     28,
67*a58d3d2aSXin Li      32,     40,     48,     56,     64,     80,     96,    112,
68*a58d3d2aSXin Li     136,    160
69*a58d3d2aSXin Li };
70*a58d3d2aSXin Li 
71*a58d3d2aSXin Li static const float band_weights_clean[64] = {
72*a58d3d2aSXin Li      0.666666666667f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
73*a58d3d2aSXin Li      0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
74*a58d3d2aSXin Li      0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
75*a58d3d2aSXin Li      0.400000000000f,     0.400000000000f,     0.400000000000f,     0.400000000000f,
76*a58d3d2aSXin Li      0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
77*a58d3d2aSXin Li      0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
78*a58d3d2aSXin Li      0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
79*a58d3d2aSXin Li      0.400000000000f,     0.400000000000f,     0.400000000000f,     0.400000000000f,
80*a58d3d2aSXin Li      0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
81*a58d3d2aSXin Li      0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
82*a58d3d2aSXin Li      0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
83*a58d3d2aSXin Li      0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
84*a58d3d2aSXin Li      0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
85*a58d3d2aSXin Li      0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
86*a58d3d2aSXin Li      0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,
87*a58d3d2aSXin Li      0.500000000000f,     0.400000000000f,     0.250000000000f,     0.333333333333f
88*a58d3d2aSXin Li };
89*a58d3d2aSXin Li 
90*a58d3d2aSXin Li static const float band_weights_noisy[18] = {
91*a58d3d2aSXin Li      0.400000000000f,     0.250000000000f,     0.250000000000f,     0.250000000000f,
92*a58d3d2aSXin Li      0.250000000000f,     0.250000000000f,     0.250000000000f,     0.250000000000f,
93*a58d3d2aSXin Li      0.166666666667f,     0.125000000000f,     0.125000000000f,     0.125000000000f,
94*a58d3d2aSXin Li      0.083333333333f,     0.062500000000f,     0.062500000000f,     0.050000000000f,
95*a58d3d2aSXin Li      0.041666666667f,     0.080000000000f
96*a58d3d2aSXin Li };
97*a58d3d2aSXin Li 
98*a58d3d2aSXin Li static float osce_window[OSCE_SPEC_WINDOW_SIZE] = {
99*a58d3d2aSXin Li      0.004908718808f,     0.014725683311f,     0.024541228523f,     0.034354408400f,     0.044164277127f,
100*a58d3d2aSXin Li      0.053969889210f,     0.063770299562f,     0.073564563600f,     0.083351737332f,     0.093130877450f,
101*a58d3d2aSXin Li      0.102901041421f,     0.112661287575f,     0.122410675199f,     0.132148264628f,     0.141873117332f,
102*a58d3d2aSXin Li      0.151584296010f,     0.161280864678f,     0.170961888760f,     0.180626435180f,     0.190273572448f,
103*a58d3d2aSXin Li      0.199902370753f,     0.209511902052f,     0.219101240157f,     0.228669460829f,     0.238215641862f,
104*a58d3d2aSXin Li      0.247738863176f,     0.257238206902f,     0.266712757475f,     0.276161601717f,     0.285583828929f,
105*a58d3d2aSXin Li      0.294978530977f,     0.304344802381f,     0.313681740399f,     0.322988445118f,     0.332264019538f,
106*a58d3d2aSXin Li      0.341507569661f,     0.350718204573f,     0.359895036535f,     0.369037181064f,     0.378143757022f,
107*a58d3d2aSXin Li      0.387213886697f,     0.396246695891f,     0.405241314005f,     0.414196874117f,     0.423112513073f,
108*a58d3d2aSXin Li      0.431987371563f,     0.440820594212f,     0.449611329655f,     0.458358730621f,     0.467061954019f,
109*a58d3d2aSXin Li      0.475720161014f,     0.484332517110f,     0.492898192230f,     0.501416360796f,     0.509886201809f,
110*a58d3d2aSXin Li      0.518306898929f,     0.526677640552f,     0.534997619887f,     0.543266035038f,     0.551482089078f,
111*a58d3d2aSXin Li      0.559644990127f,     0.567753951426f,     0.575808191418f,     0.583806933818f,     0.591749407690f,
112*a58d3d2aSXin Li      0.599634847523f,     0.607462493302f,     0.615231590581f,     0.622941390558f,     0.630591150148f,
113*a58d3d2aSXin Li      0.638180132051f,     0.645707604824f,     0.653172842954f,     0.660575126926f,     0.667913743292f,
114*a58d3d2aSXin Li      0.675187984742f,     0.682397150168f,     0.689540544737f,     0.696617479953f,     0.703627273726f,
115*a58d3d2aSXin Li      0.710569250438f,     0.717442741007f,     0.724247082951f,     0.730981620454f,     0.737645704427f,
116*a58d3d2aSXin Li      0.744238692572f,     0.750759949443f,     0.757208846506f,     0.763584762206f,     0.769887082016f,
117*a58d3d2aSXin Li      0.776115198508f,     0.782268511401f,     0.788346427627f,     0.794348361383f,     0.800273734191f,
118*a58d3d2aSXin Li      0.806121974951f,     0.811892519997f,     0.817584813152f,     0.823198305781f,     0.828732456844f,
119*a58d3d2aSXin Li      0.834186732948f,     0.839560608398f,     0.844853565250f,     0.850065093356f,     0.855194690420f,
120*a58d3d2aSXin Li      0.860241862039f,     0.865206121757f,     0.870086991109f,     0.874883999665f,     0.879596685080f,
121*a58d3d2aSXin Li      0.884224593137f,     0.888767277786f,     0.893224301196f,     0.897595233788f,     0.901879654283f,
122*a58d3d2aSXin Li      0.906077149740f,     0.910187315596f,     0.914209755704f,     0.918144082372f,     0.921989916403f,
123*a58d3d2aSXin Li      0.925746887127f,     0.929414632439f,     0.932992798835f,     0.936481041442f,     0.939879024058f,
124*a58d3d2aSXin Li      0.943186419177f,     0.946402908026f,     0.949528180593f,     0.952561935658f,     0.955503880820f,
125*a58d3d2aSXin Li      0.958353732530f,     0.961111216112f,     0.963776065795f,     0.966348024735f,     0.968826845041f,
126*a58d3d2aSXin Li      0.971212287799f,     0.973504123096f,     0.975702130039f,     0.977806096779f,     0.979815820533f,
127*a58d3d2aSXin Li      0.981731107599f,     0.983551773378f,     0.985277642389f,     0.986908548290f,     0.988444333892f,
128*a58d3d2aSXin Li      0.989884851171f,     0.991229961288f,     0.992479534599f,     0.993633450666f,     0.994691598273f,
129*a58d3d2aSXin Li      0.995653875433f,     0.996520189401f,     0.997290456679f,     0.997964603026f,     0.998542563469f,
130*a58d3d2aSXin Li      0.999024282300f,     0.999409713092f,     0.999698818696f,     0.999891571247f,     0.999987952167f,
131*a58d3d2aSXin Li      0.999987952167f,     0.999891571247f,     0.999698818696f,     0.999409713092f,     0.999024282300f,
132*a58d3d2aSXin Li      0.998542563469f,     0.997964603026f,     0.997290456679f,     0.996520189401f,     0.995653875433f,
133*a58d3d2aSXin Li      0.994691598273f,     0.993633450666f,     0.992479534599f,     0.991229961288f,     0.989884851171f,
134*a58d3d2aSXin Li      0.988444333892f,     0.986908548290f,     0.985277642389f,     0.983551773378f,     0.981731107599f,
135*a58d3d2aSXin Li      0.979815820533f,     0.977806096779f,     0.975702130039f,     0.973504123096f,     0.971212287799f,
136*a58d3d2aSXin Li      0.968826845041f,     0.966348024735f,     0.963776065795f,     0.961111216112f,     0.958353732530f,
137*a58d3d2aSXin Li      0.955503880820f,     0.952561935658f,     0.949528180593f,     0.946402908026f,     0.943186419177f,
138*a58d3d2aSXin Li      0.939879024058f,     0.936481041442f,     0.932992798835f,     0.929414632439f,     0.925746887127f,
139*a58d3d2aSXin Li      0.921989916403f,     0.918144082372f,     0.914209755704f,     0.910187315596f,     0.906077149740f,
140*a58d3d2aSXin Li      0.901879654283f,     0.897595233788f,     0.893224301196f,     0.888767277786f,     0.884224593137f,
141*a58d3d2aSXin Li      0.879596685080f,     0.874883999665f,     0.870086991109f,     0.865206121757f,     0.860241862039f,
142*a58d3d2aSXin Li      0.855194690420f,     0.850065093356f,     0.844853565250f,     0.839560608398f,     0.834186732948f,
143*a58d3d2aSXin Li      0.828732456844f,     0.823198305781f,     0.817584813152f,     0.811892519997f,     0.806121974951f,
144*a58d3d2aSXin Li      0.800273734191f,     0.794348361383f,     0.788346427627f,     0.782268511401f,     0.776115198508f,
145*a58d3d2aSXin Li      0.769887082016f,     0.763584762206f,     0.757208846506f,     0.750759949443f,     0.744238692572f,
146*a58d3d2aSXin Li      0.737645704427f,     0.730981620454f,     0.724247082951f,     0.717442741007f,     0.710569250438f,
147*a58d3d2aSXin Li      0.703627273726f,     0.696617479953f,     0.689540544737f,     0.682397150168f,     0.675187984742f,
148*a58d3d2aSXin Li      0.667913743292f,     0.660575126926f,     0.653172842954f,     0.645707604824f,     0.638180132051f,
149*a58d3d2aSXin Li      0.630591150148f,     0.622941390558f,     0.615231590581f,     0.607462493302f,     0.599634847523f,
150*a58d3d2aSXin Li      0.591749407690f,     0.583806933818f,     0.575808191418f,     0.567753951426f,     0.559644990127f,
151*a58d3d2aSXin Li      0.551482089078f,     0.543266035038f,     0.534997619887f,     0.526677640552f,     0.518306898929f,
152*a58d3d2aSXin Li      0.509886201809f,     0.501416360796f,     0.492898192230f,     0.484332517110f,     0.475720161014f,
153*a58d3d2aSXin Li      0.467061954019f,     0.458358730621f,     0.449611329655f,     0.440820594212f,     0.431987371563f,
154*a58d3d2aSXin Li      0.423112513073f,     0.414196874117f,     0.405241314005f,     0.396246695891f,     0.387213886697f,
155*a58d3d2aSXin Li      0.378143757022f,     0.369037181064f,     0.359895036535f,     0.350718204573f,     0.341507569661f,
156*a58d3d2aSXin Li      0.332264019538f,     0.322988445118f,     0.313681740399f,     0.304344802381f,     0.294978530977f,
157*a58d3d2aSXin Li      0.285583828929f,     0.276161601717f,     0.266712757475f,     0.257238206902f,     0.247738863176f,
158*a58d3d2aSXin Li      0.238215641862f,     0.228669460829f,     0.219101240157f,     0.209511902052f,     0.199902370753f,
159*a58d3d2aSXin Li      0.190273572448f,     0.180626435180f,     0.170961888760f,     0.161280864678f,     0.151584296010f,
160*a58d3d2aSXin Li      0.141873117332f,     0.132148264628f,     0.122410675199f,     0.112661287575f,     0.102901041421f,
161*a58d3d2aSXin Li      0.093130877450f,     0.083351737332f,     0.073564563600f,     0.063770299562f,     0.053969889210f,
162*a58d3d2aSXin Li      0.044164277127f,     0.034354408400f,     0.024541228523f,     0.014725683311f,     0.004908718808f
163*a58d3d2aSXin Li };
164*a58d3d2aSXin Li 
apply_filterbank(float * x_out,float * x_in,const int * center_bins,const float * band_weights,int num_bands)165*a58d3d2aSXin Li static void apply_filterbank(float *x_out, float *x_in, const int *center_bins, const float* band_weights, int num_bands)
166*a58d3d2aSXin Li {
167*a58d3d2aSXin Li     int b, i;
168*a58d3d2aSXin Li     float frac;
169*a58d3d2aSXin Li 
170*a58d3d2aSXin Li     celt_assert(x_in != x_out)
171*a58d3d2aSXin Li 
172*a58d3d2aSXin Li     x_out[0] = 0;
173*a58d3d2aSXin Li     for (b = 0; b < num_bands - 1; b++)
174*a58d3d2aSXin Li     {
175*a58d3d2aSXin Li         x_out[b+1] = 0;
176*a58d3d2aSXin Li         for (i = center_bins[b]; i < center_bins[b+1]; i++)
177*a58d3d2aSXin Li         {
178*a58d3d2aSXin Li             frac = (float) (center_bins[b+1] - i) / (center_bins[b+1] - center_bins[b]);
179*a58d3d2aSXin Li             x_out[b]   += band_weights[b] * frac * x_in[i];
180*a58d3d2aSXin Li             x_out[b+1] += band_weights[b+1] * (1 - frac) * x_in[i];
181*a58d3d2aSXin Li 
182*a58d3d2aSXin Li         }
183*a58d3d2aSXin Li     }
184*a58d3d2aSXin Li     x_out[num_bands - 1] += band_weights[num_bands - 1] * x_in[center_bins[num_bands - 1]];
185*a58d3d2aSXin Li #ifdef DEBUG_PRINT
186*a58d3d2aSXin Li     for (b = 0; b < num_bands; b++)
187*a58d3d2aSXin Li     {
188*a58d3d2aSXin Li         printf("band[%d]: %f\n", b, x_out[b]);
189*a58d3d2aSXin Li     }
190*a58d3d2aSXin Li #endif
191*a58d3d2aSXin Li }
192*a58d3d2aSXin Li 
193*a58d3d2aSXin Li 
mag_spec_320_onesided(float * out,float * in)194*a58d3d2aSXin Li static void mag_spec_320_onesided(float *out, float *in)
195*a58d3d2aSXin Li {
196*a58d3d2aSXin Li     celt_assert(OSCE_SPEC_WINDOW_SIZE == 320);
197*a58d3d2aSXin Li     kiss_fft_cpx buffer[OSCE_SPEC_WINDOW_SIZE];
198*a58d3d2aSXin Li     int k;
199*a58d3d2aSXin Li     forward_transform(buffer, in);
200*a58d3d2aSXin Li 
201*a58d3d2aSXin Li     for (k = 0; k < OSCE_SPEC_NUM_FREQS; k++)
202*a58d3d2aSXin Li     {
203*a58d3d2aSXin Li         out[k] = OSCE_SPEC_WINDOW_SIZE * sqrt(buffer[k].r * buffer[k].r + buffer[k].i * buffer[k].i);
204*a58d3d2aSXin Li #ifdef DEBUG_PRINT
205*a58d3d2aSXin Li         printf("magspec[%d]: %f\n", k, out[k]);
206*a58d3d2aSXin Li #endif
207*a58d3d2aSXin Li     }
208*a58d3d2aSXin Li }
209*a58d3d2aSXin Li 
210*a58d3d2aSXin Li 
calculate_log_spectrum_from_lpc(float * spec,opus_int16 * a_q12,int lpc_order)211*a58d3d2aSXin Li static void calculate_log_spectrum_from_lpc(float *spec, opus_int16 *a_q12, int lpc_order)
212*a58d3d2aSXin Li {
213*a58d3d2aSXin Li     float buffer[OSCE_SPEC_WINDOW_SIZE] = {0};
214*a58d3d2aSXin Li     int i;
215*a58d3d2aSXin Li 
216*a58d3d2aSXin Li     /* zero expansion */
217*a58d3d2aSXin Li     buffer[0] = 1;
218*a58d3d2aSXin Li     for (i = 0; i < lpc_order; i++)
219*a58d3d2aSXin Li     {
220*a58d3d2aSXin Li         buffer[i+1] = - (float)a_q12[i] / (1U << 12);
221*a58d3d2aSXin Li     }
222*a58d3d2aSXin Li 
223*a58d3d2aSXin Li     /* calculate and invert magnitude spectrum */
224*a58d3d2aSXin Li     mag_spec_320_onesided(buffer, buffer);
225*a58d3d2aSXin Li 
226*a58d3d2aSXin Li     for (i = 0; i < OSCE_SPEC_NUM_FREQS; i++)
227*a58d3d2aSXin Li     {
228*a58d3d2aSXin Li         buffer[i] = 1.f / (buffer[i] + 1e-9f);
229*a58d3d2aSXin Li     }
230*a58d3d2aSXin Li 
231*a58d3d2aSXin Li     /* apply filterbank */
232*a58d3d2aSXin Li     apply_filterbank(spec, buffer, center_bins_clean, band_weights_clean, OSCE_CLEAN_SPEC_NUM_BANDS);
233*a58d3d2aSXin Li 
234*a58d3d2aSXin Li     /* log and scaling */
235*a58d3d2aSXin Li     for (i = 0; i < OSCE_CLEAN_SPEC_NUM_BANDS; i++)
236*a58d3d2aSXin Li     {
237*a58d3d2aSXin Li         spec[i] = 0.3f * log(spec[i] + 1e-9f);
238*a58d3d2aSXin Li     }
239*a58d3d2aSXin Li }
240*a58d3d2aSXin Li 
calculate_cepstrum(float * cepstrum,float * signal)241*a58d3d2aSXin Li static void calculate_cepstrum(float *cepstrum, float *signal)
242*a58d3d2aSXin Li {
243*a58d3d2aSXin Li     float buffer[OSCE_SPEC_WINDOW_SIZE];
244*a58d3d2aSXin Li     float *spec = &buffer[OSCE_SPEC_NUM_FREQS + 3];
245*a58d3d2aSXin Li     int n;
246*a58d3d2aSXin Li 
247*a58d3d2aSXin Li     celt_assert(cepstrum != signal)
248*a58d3d2aSXin Li 
249*a58d3d2aSXin Li     for (n = 0; n < OSCE_SPEC_WINDOW_SIZE; n++)
250*a58d3d2aSXin Li     {
251*a58d3d2aSXin Li         buffer[n] = osce_window[n] * signal[n];
252*a58d3d2aSXin Li     }
253*a58d3d2aSXin Li 
254*a58d3d2aSXin Li     /* calculate magnitude spectrum */
255*a58d3d2aSXin Li     mag_spec_320_onesided(buffer, buffer);
256*a58d3d2aSXin Li 
257*a58d3d2aSXin Li     /* accumulate bands */
258*a58d3d2aSXin Li     apply_filterbank(spec, buffer, center_bins_noisy, band_weights_noisy, OSCE_NOISY_SPEC_NUM_BANDS);
259*a58d3d2aSXin Li 
260*a58d3d2aSXin Li     /* log domain conversion */
261*a58d3d2aSXin Li     for (n = 0; n < OSCE_NOISY_SPEC_NUM_BANDS; n++)
262*a58d3d2aSXin Li     {
263*a58d3d2aSXin Li         spec[n] = log(spec[n] + 1e-9f);
264*a58d3d2aSXin Li #ifdef DEBUG_PRINT
265*a58d3d2aSXin Li         printf("logspec[%d]: %f\n", n, spec[n]);
266*a58d3d2aSXin Li #endif
267*a58d3d2aSXin Li     }
268*a58d3d2aSXin Li 
269*a58d3d2aSXin Li     /* DCT-II (orthonormal) */
270*a58d3d2aSXin Li     celt_assert(OSCE_NOISY_SPEC_NUM_BANDS == NB_BANDS);
271*a58d3d2aSXin Li     dct(cepstrum, spec);
272*a58d3d2aSXin Li }
273*a58d3d2aSXin Li 
calculate_acorr(float * acorr,float * signal,int lag)274*a58d3d2aSXin Li static void calculate_acorr(float *acorr, float *signal, int lag)
275*a58d3d2aSXin Li {
276*a58d3d2aSXin Li     int n, k;
277*a58d3d2aSXin Li     celt_assert(acorr != signal)
278*a58d3d2aSXin Li 
279*a58d3d2aSXin Li     for (k = -2; k <= 2; k++)
280*a58d3d2aSXin Li     {
281*a58d3d2aSXin Li         acorr[k+2] = 0;
282*a58d3d2aSXin Li         float xx = 0;
283*a58d3d2aSXin Li         float xy = 0;
284*a58d3d2aSXin Li         float yy = 0;
285*a58d3d2aSXin Li         for (n = 0; n < 80; n++)
286*a58d3d2aSXin Li         {
287*a58d3d2aSXin Li             /* obviously wasteful -> fix later */
288*a58d3d2aSXin Li             xx += signal[n] * signal[n];
289*a58d3d2aSXin Li             yy += signal[n - lag + k] * signal[n - lag + k];
290*a58d3d2aSXin Li             xy += signal[n] * signal[n - lag + k];
291*a58d3d2aSXin Li         }
292*a58d3d2aSXin Li         acorr[k+2] = xy / sqrt(xx * yy + 1e-9f);
293*a58d3d2aSXin Li     }
294*a58d3d2aSXin Li }
295*a58d3d2aSXin Li 
pitch_postprocessing(OSCEFeatureState * psFeatures,int lag,int type)296*a58d3d2aSXin Li static int pitch_postprocessing(OSCEFeatureState *psFeatures, int lag, int type)
297*a58d3d2aSXin Li {
298*a58d3d2aSXin Li     int new_lag;
299*a58d3d2aSXin Li     int modulus;
300*a58d3d2aSXin Li 
301*a58d3d2aSXin Li #ifdef OSCE_HANGOVER_BUGFIX
302*a58d3d2aSXin Li #define TESTBIT 1
303*a58d3d2aSXin Li #else
304*a58d3d2aSXin Li #define TESTBIT 0
305*a58d3d2aSXin Li #endif
306*a58d3d2aSXin Li 
307*a58d3d2aSXin Li     modulus = OSCE_PITCH_HANGOVER;
308*a58d3d2aSXin Li     if (modulus == 0) modulus ++;
309*a58d3d2aSXin Li 
310*a58d3d2aSXin Li     /* hangover is currently disabled to reflect a bug in the python code. ToDo: re-evaluate hangover */
311*a58d3d2aSXin Li     if (type != TYPE_VOICED && psFeatures->last_type == TYPE_VOICED && TESTBIT)
312*a58d3d2aSXin Li     /* enter hangover */
313*a58d3d2aSXin Li     {
314*a58d3d2aSXin Li         new_lag = OSCE_NO_PITCH_VALUE;
315*a58d3d2aSXin Li         if (psFeatures->pitch_hangover_count < OSCE_PITCH_HANGOVER)
316*a58d3d2aSXin Li         {
317*a58d3d2aSXin Li             new_lag = psFeatures->last_lag;
318*a58d3d2aSXin Li             psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % modulus;
319*a58d3d2aSXin Li         }
320*a58d3d2aSXin Li     }
321*a58d3d2aSXin Li     else if (type != TYPE_VOICED && psFeatures->pitch_hangover_count && TESTBIT)
322*a58d3d2aSXin Li     /* continue hangover */
323*a58d3d2aSXin Li     {
324*a58d3d2aSXin Li         new_lag = psFeatures->last_lag;
325*a58d3d2aSXin Li         psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % modulus;
326*a58d3d2aSXin Li     }
327*a58d3d2aSXin Li     else if (type != TYPE_VOICED)
328*a58d3d2aSXin Li     /* unvoiced frame after hangover */
329*a58d3d2aSXin Li     {
330*a58d3d2aSXin Li         new_lag = OSCE_NO_PITCH_VALUE;
331*a58d3d2aSXin Li         psFeatures->pitch_hangover_count = 0;
332*a58d3d2aSXin Li     }
333*a58d3d2aSXin Li     else
334*a58d3d2aSXin Li     /* voiced frame: update last_lag */
335*a58d3d2aSXin Li     {
336*a58d3d2aSXin Li         new_lag = lag;
337*a58d3d2aSXin Li         psFeatures->last_lag = lag;
338*a58d3d2aSXin Li         psFeatures->pitch_hangover_count = 0;
339*a58d3d2aSXin Li     }
340*a58d3d2aSXin Li 
341*a58d3d2aSXin Li     /* buffer update */
342*a58d3d2aSXin Li     psFeatures->last_type = type;
343*a58d3d2aSXin Li 
344*a58d3d2aSXin Li     /* with the current setup this should never happen (but who knows...) */
345*a58d3d2aSXin Li     celt_assert(new_lag)
346*a58d3d2aSXin Li 
347*a58d3d2aSXin Li     return new_lag;
348*a58d3d2aSXin Li }
349*a58d3d2aSXin Li 
osce_calculate_features(silk_decoder_state * psDec,silk_decoder_control * psDecCtrl,float * features,float * numbits,int * periods,const opus_int16 xq[],opus_int32 num_bits)350*a58d3d2aSXin Li void osce_calculate_features(
351*a58d3d2aSXin Li     silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
352*a58d3d2aSXin Li     silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
353*a58d3d2aSXin Li     float                       *features,                      /* O    input features                              */
354*a58d3d2aSXin Li     float                       *numbits,                       /* O    numbits and smoothed numbits                */
355*a58d3d2aSXin Li     int                         *periods,                       /* O    pitch lags on subframe basis                */
356*a58d3d2aSXin Li     const opus_int16            xq[],                           /* I    Decoded speech                              */
357*a58d3d2aSXin Li     opus_int32                  num_bits                        /* I    Size of SILK payload in bits                */
358*a58d3d2aSXin Li )
359*a58d3d2aSXin Li {
360*a58d3d2aSXin Li     int num_subframes, num_samples;
361*a58d3d2aSXin Li     float buffer[OSCE_FEATURES_MAX_HISTORY + OSCE_MAX_FEATURE_FRAMES * 80];
362*a58d3d2aSXin Li     float *frame, *pfeatures;
363*a58d3d2aSXin Li     OSCEFeatureState *psFeatures;
364*a58d3d2aSXin Li     int i, n, k;
365*a58d3d2aSXin Li #ifdef WRITE_FEATURES
366*a58d3d2aSXin Li     static FILE *f_feat = NULL;
367*a58d3d2aSXin Li     if (f_feat == NULL)
368*a58d3d2aSXin Li     {
369*a58d3d2aSXin Li         f_feat = fopen("assembled_features.f32", "wb");
370*a58d3d2aSXin Li     }
371*a58d3d2aSXin Li #endif
372*a58d3d2aSXin Li 
373*a58d3d2aSXin Li     /*OPUS_CLEAR(buffer, 1);*/
374*a58d3d2aSXin Li     memset(buffer, 0, sizeof(buffer));
375*a58d3d2aSXin Li 
376*a58d3d2aSXin Li     num_subframes = psDec->nb_subfr;
377*a58d3d2aSXin Li     num_samples = num_subframes * 80;
378*a58d3d2aSXin Li     psFeatures = &psDec->osce.features;
379*a58d3d2aSXin Li 
380*a58d3d2aSXin Li     /* smooth bit count */
381*a58d3d2aSXin Li     psFeatures->numbits_smooth = 0.9f * psFeatures->numbits_smooth + 0.1f * num_bits;
382*a58d3d2aSXin Li     numbits[0] = num_bits;
383*a58d3d2aSXin Li     numbits[1] = psFeatures->numbits_smooth;
384*a58d3d2aSXin Li 
385*a58d3d2aSXin Li     for (n = 0; n < num_samples; n++)
386*a58d3d2aSXin Li     {
387*a58d3d2aSXin Li         buffer[OSCE_FEATURES_MAX_HISTORY + n] = (float) xq[n] / (1U<<15);
388*a58d3d2aSXin Li     }
389*a58d3d2aSXin Li     OPUS_COPY(buffer, psFeatures->signal_history, OSCE_FEATURES_MAX_HISTORY);
390*a58d3d2aSXin Li 
391*a58d3d2aSXin Li     for (k = 0; k < num_subframes; k++)
392*a58d3d2aSXin Li     {
393*a58d3d2aSXin Li         pfeatures = features + k * OSCE_FEATURE_DIM;
394*a58d3d2aSXin Li         frame = &buffer[OSCE_FEATURES_MAX_HISTORY + k * 80];
395*a58d3d2aSXin Li         memset(pfeatures, 0, OSCE_FEATURE_DIM); /* precaution */
396*a58d3d2aSXin Li 
397*a58d3d2aSXin Li         /* clean spectrum from lpcs (update every other frame) */
398*a58d3d2aSXin Li         if (k % 2 == 0)
399*a58d3d2aSXin Li         {
400*a58d3d2aSXin Li             calculate_log_spectrum_from_lpc(pfeatures + OSCE_CLEAN_SPEC_START, psDecCtrl->PredCoef_Q12[k >> 1], psDec->LPC_order);
401*a58d3d2aSXin Li         }
402*a58d3d2aSXin Li         else
403*a58d3d2aSXin Li         {
404*a58d3d2aSXin Li             OPUS_COPY(pfeatures + OSCE_CLEAN_SPEC_START, pfeatures + OSCE_CLEAN_SPEC_START - OSCE_FEATURE_DIM, OSCE_CLEAN_SPEC_LENGTH);
405*a58d3d2aSXin Li         }
406*a58d3d2aSXin Li 
407*a58d3d2aSXin Li         /* noisy cepstrum from signal (update every other frame) */
408*a58d3d2aSXin Li         if (k % 2 == 0)
409*a58d3d2aSXin Li         {
410*a58d3d2aSXin Li             calculate_cepstrum(pfeatures + OSCE_NOISY_CEPSTRUM_START, frame - 160);
411*a58d3d2aSXin Li         }
412*a58d3d2aSXin Li         else
413*a58d3d2aSXin Li         {
414*a58d3d2aSXin Li             OPUS_COPY(pfeatures + OSCE_NOISY_CEPSTRUM_START, pfeatures + OSCE_NOISY_CEPSTRUM_START - OSCE_FEATURE_DIM, OSCE_NOISY_CEPSTRUM_LENGTH);
415*a58d3d2aSXin Li         }
416*a58d3d2aSXin Li 
417*a58d3d2aSXin Li         /* pitch hangover and zero value replacement */
418*a58d3d2aSXin Li         periods[k] = pitch_postprocessing(psFeatures, psDecCtrl->pitchL[k], psDec->indices.signalType);
419*a58d3d2aSXin Li 
420*a58d3d2aSXin Li         /* auto-correlation around pitch lag */
421*a58d3d2aSXin Li         calculate_acorr(pfeatures + OSCE_ACORR_START, frame, periods[k]);
422*a58d3d2aSXin Li 
423*a58d3d2aSXin Li         /* ltp */
424*a58d3d2aSXin Li         celt_assert(OSCE_LTP_LENGTH == LTP_ORDER)
425*a58d3d2aSXin Li         for (i = 0; i < OSCE_LTP_LENGTH; i++)
426*a58d3d2aSXin Li         {
427*a58d3d2aSXin Li             pfeatures[OSCE_LTP_START + i] = (float) psDecCtrl->LTPCoef_Q14[k * LTP_ORDER + i] / (1U << 14);
428*a58d3d2aSXin Li         }
429*a58d3d2aSXin Li 
430*a58d3d2aSXin Li         /* frame gain */
431*a58d3d2aSXin Li         pfeatures[OSCE_LOG_GAIN_START] = log((float) psDecCtrl->Gains_Q16[k] / (1UL << 16) + 1e-9f);
432*a58d3d2aSXin Li 
433*a58d3d2aSXin Li #ifdef WRITE_FEATURES
434*a58d3d2aSXin Li         fwrite(pfeatures, sizeof(*pfeatures), 93, f_feat);
435*a58d3d2aSXin Li #endif
436*a58d3d2aSXin Li     }
437*a58d3d2aSXin Li 
438*a58d3d2aSXin Li     /* buffer update */
439*a58d3d2aSXin Li     OPUS_COPY(psFeatures->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY);
440*a58d3d2aSXin Li }
441*a58d3d2aSXin Li 
442*a58d3d2aSXin Li 
osce_cross_fade_10ms(float * x_enhanced,float * x_in,int length)443*a58d3d2aSXin Li void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length)
444*a58d3d2aSXin Li {
445*a58d3d2aSXin Li     int i;
446*a58d3d2aSXin Li     celt_assert(length >= 160);
447*a58d3d2aSXin Li 
448*a58d3d2aSXin Li     for (i = 0; i < 160; i++)
449*a58d3d2aSXin Li     {
450*a58d3d2aSXin Li         x_enhanced[i] = osce_window[i] * x_enhanced[i] + (1.f - osce_window[i]) * x_in[i];
451*a58d3d2aSXin Li     }
452*a58d3d2aSXin Li 
453*a58d3d2aSXin Li 
454*a58d3d2aSXin Li }
455