xref: /aosp_15_r20/external/libopus/dnn/lpcnet_enc.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1 /* Copyright (c) 2017-2019 Mozilla */
2 /*
3    Redistribution and use in source and binary forms, with or without
4    modification, are permitted provided that the following conditions
5    are met:
6 
7    - Redistributions of source code must retain the above copyright
8    notice, this list of conditions and the following disclaimer.
9 
10    - Redistributions in binary form must reproduce the above copyright
11    notice, this list of conditions and the following disclaimer in the
12    documentation and/or other materials provided with the distribution.
13 
14    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
18    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30 
31 #include <stdlib.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include "kiss_fft.h"
35 #include "common.h"
36 #include <math.h>
37 #include "freq.h"
38 #include "pitch.h"
39 #include "arch.h"
40 #include <assert.h>
41 #include "lpcnet_private.h"
42 #include "lpcnet.h"
43 #include "os_support.h"
44 #include "_kiss_fft_guts.h"
45 #include "celt_lpc.h"
46 #include "mathops.h"
47 
48 
lpcnet_encoder_get_size(void)49 int lpcnet_encoder_get_size(void) {
50   return sizeof(LPCNetEncState);
51 }
52 
lpcnet_encoder_init(LPCNetEncState * st)53 int lpcnet_encoder_init(LPCNetEncState *st) {
54   memset(st, 0, sizeof(*st));
55   pitchdnn_init(&st->pitchdnn);
56   return 0;
57 }
58 
lpcnet_encoder_load_model(LPCNetEncState * st,const void * data,int len)59 int lpcnet_encoder_load_model(LPCNetEncState *st, const void *data, int len) {
60   return pitchdnn_load_model(&st->pitchdnn, data, len);
61 }
62 
lpcnet_encoder_create(void)63 LPCNetEncState *lpcnet_encoder_create(void) {
64   LPCNetEncState *st;
65   st = opus_alloc(lpcnet_encoder_get_size());
66   lpcnet_encoder_init(st);
67   return st;
68 }
69 
lpcnet_encoder_destroy(LPCNetEncState * st)70 void lpcnet_encoder_destroy(LPCNetEncState *st) {
71   opus_free(st);
72 }
73 
frame_analysis(LPCNetEncState * st,kiss_fft_cpx * X,float * Ex,const float * in)74 static void frame_analysis(LPCNetEncState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
75   float x[WINDOW_SIZE];
76   OPUS_COPY(x, st->analysis_mem, OVERLAP_SIZE);
77   OPUS_COPY(&x[OVERLAP_SIZE], in, FRAME_SIZE);
78   OPUS_COPY(st->analysis_mem, &in[FRAME_SIZE-OVERLAP_SIZE], OVERLAP_SIZE);
79   apply_window(x);
80   forward_transform(X, x);
81   lpcn_compute_band_energy(Ex, X);
82 }
83 
biquad(float * y,float mem[2],const float * x,const float * b,const float * a,int N)84 static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
85   int i;
86   float mem0, mem1;
87   mem0 = mem[0];
88   mem1 = mem[1];
89   for (i=0;i<N;i++) {
90     float xi, yi, mem00;
91     xi = x[i];
92     yi = x[i] + mem0;
93     mem00 = mem0;
94     /* Original code:
95     mem0 = mem1 + (b[0]*xi - a[0]*yi);
96     mem1 = (b[1]*xi - a[1]*yi);
97     Modified to reduce dependency chains: (the +1e-30f forces the ordering and has no effect on the output)
98     */
99     mem0 = (b[0]-a[0])*xi + mem1 - a[0]*mem0;
100     mem1 = (b[1]-a[1])*xi + 1e-30f - a[1]*mem00;
101     y[i] = yi;
102   }
103   mem[0] = mem0;
104   mem[1] = mem1;
105 }
106 
107 #define celt_log10(x) (0.3010299957f*celt_log2(x))
108 
compute_frame_features(LPCNetEncState * st,const float * in,int arch)109 void compute_frame_features(LPCNetEncState *st, const float *in, int arch) {
110   float aligned_in[FRAME_SIZE];
111   int i;
112   float Ly[NB_BANDS];
113   float follow, logMax;
114   kiss_fft_cpx X[FREQ_SIZE];
115   float Ex[NB_BANDS];
116   float xcorr[PITCH_MAX_PERIOD];
117   float ener0;
118   float ener;
119   float x[FRAME_SIZE+LPC_ORDER];
120   float frame_corr;
121   float xy, xx, yy;
122   int pitch;
123   float ener_norm[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
124   /* [b,a]=ellip(2, 2, 20, 1200/8000); */
125   static const float lp_b[2] = {-0.84946f, 1.f};
126   static const float lp_a[2] = {-1.54220f, 0.70781f};
127   OPUS_COPY(aligned_in, &st->analysis_mem[OVERLAP_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
128   frame_analysis(st, X, Ex, in);
129   st->if_features[0] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[0].r*X[0].r)-6.f)));
130   for (i=1;i<PITCH_IF_MAX_FREQ;i++) {
131     kiss_fft_cpx prod;
132     float norm_1;
133     C_MULC(prod, X[i], st->prev_if[i]);
134     norm_1 = 1.f/sqrt(1e-15f + prod.r*prod.r + prod.i*prod.i);
135     C_MULBYSCALAR(prod, norm_1);
136     st->if_features[3*i-2] = prod.r;
137     st->if_features[3*i-1] = prod.i;
138     st->if_features[3*i] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[i].r*X[i].r + X[i].i*X[i].i)-6.f)));
139   }
140   OPUS_COPY(st->prev_if, X, PITCH_IF_MAX_FREQ);
141   /*for (i=0;i<88;i++) printf("%f ", st->if_features[i]);printf("\n");*/
142   logMax = -2;
143   follow = -2;
144   for (i=0;i<NB_BANDS;i++) {
145     Ly[i] = celt_log10(1e-2f+Ex[i]);
146     Ly[i] = MAX16(logMax-8, MAX16(follow-2.5f, Ly[i]));
147     logMax = MAX16(logMax, Ly[i]);
148     follow = MAX16(follow-2.5f, Ly[i]);
149   }
150   dct(st->features, Ly);
151   st->features[0] -= 4;
152   lpc_from_cepstrum(st->lpc, st->features);
153   for (i=0;i<LPC_ORDER;i++) st->features[NB_BANDS+2+i] = st->lpc[i];
154   OPUS_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
155   OPUS_MOVE(st->lp_buf, &st->lp_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
156   OPUS_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET);
157   OPUS_COPY(&x[0], st->pitch_mem, LPC_ORDER);
158   OPUS_COPY(&x[LPC_ORDER], aligned_in, FRAME_SIZE);
159   OPUS_COPY(st->pitch_mem, &aligned_in[FRAME_SIZE-LPC_ORDER], LPC_ORDER);
160   celt_fir(&x[LPC_ORDER], st->lpc, &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, LPC_ORDER, arch);
161   for (i=0;i<FRAME_SIZE;i++) {
162     st->exc_buf[PITCH_MAX_PERIOD+i] = st->lp_buf[PITCH_MAX_PERIOD+i] + .7f*st->pitch_filt;
163     st->pitch_filt = st->lp_buf[PITCH_MAX_PERIOD+i];
164     /*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/
165   }
166   biquad(&st->lp_buf[PITCH_MAX_PERIOD], st->lp_mem, &st->lp_buf[PITCH_MAX_PERIOD], lp_b, lp_a, FRAME_SIZE);
167   {
168     double ener1;
169     float *buf = st->exc_buf;
170     celt_pitch_xcorr(&buf[PITCH_MAX_PERIOD], buf, xcorr, FRAME_SIZE, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD, arch);
171     ener0 = celt_inner_prod(&buf[PITCH_MAX_PERIOD], &buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
172     ener1 = celt_inner_prod(&buf[0], &buf[0], FRAME_SIZE, arch);
173     /*printf("%f\n", st->frame_weight[sub]);*/
174     for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
175       ener = 1 + ener0 + ener1;
176       st->xcorr_features[i] = 2*xcorr[i];
177       ener_norm[i] = ener;
178       ener1 += buf[i+FRAME_SIZE]*(double)buf[i+FRAME_SIZE] - buf[i]*(double)buf[i];
179       /*printf("%f ", st->xcorr_features[i]);*/
180     }
181     /* Split in a separate loop so the compiler can vectorize it */
182     for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
183       st->xcorr_features[i] /= ener_norm[i];
184     }
185     /*printf("\n");*/
186   }
187   st->dnn_pitch = compute_pitchdnn(&st->pitchdnn, st->if_features, st->xcorr_features, arch);
188   pitch = (int)floor(.5+256./pow(2.f,((1./60.)*((st->dnn_pitch+1.5)*60))));
189   xx = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
190   yy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD-pitch], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
191   xy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
192   /*printf("%f %f\n", frame_corr, xy/sqrt(1e-15+xx*yy));*/
193   frame_corr = xy/sqrt(1+xx*yy);
194   frame_corr = log(1.f+exp(5.f*frame_corr))/log(1+exp(5.f));
195   st->features[NB_BANDS] = st->dnn_pitch;
196   st->features[NB_BANDS + 1] = frame_corr-.5f;
197 }
198 
preemphasis(float * y,float * mem,const float * x,float coef,int N)199 void preemphasis(float *y, float *mem, const float *x, float coef, int N) {
200   int i;
201   for (i=0;i<N;i++) {
202     float yi;
203     yi = x[i] + *mem;
204     *mem = -coef*x[i];
205     y[i] = yi;
206   }
207 }
208 
lpcnet_compute_single_frame_features_impl(LPCNetEncState * st,float * x,float features[NB_TOTAL_FEATURES],int arch)209 static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *x, float features[NB_TOTAL_FEATURES], int arch) {
210   preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
211   compute_frame_features(st, x, arch);
212   OPUS_COPY(features, &st->features[0], NB_TOTAL_FEATURES);
213   return 0;
214 }
215 
lpcnet_compute_single_frame_features(LPCNetEncState * st,const opus_int16 * pcm,float features[NB_TOTAL_FEATURES],int arch)216 int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch) {
217   int i;
218   float x[FRAME_SIZE];
219   for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
220   lpcnet_compute_single_frame_features_impl(st, x, features, arch);
221   return 0;
222 }
223 
lpcnet_compute_single_frame_features_float(LPCNetEncState * st,const float * pcm,float features[NB_TOTAL_FEATURES],int arch)224 int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch) {
225   int i;
226   float x[FRAME_SIZE];
227   for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
228   lpcnet_compute_single_frame_features_impl(st, x, features, arch);
229   return 0;
230 }
231