1 /* Copyright (c) 2017-2019 Mozilla */
2 /*
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6
7 - Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9
10 - Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
18 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #include <stdlib.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include "kiss_fft.h"
35 #include "common.h"
36 #include <math.h>
37 #include "freq.h"
38 #include "pitch.h"
39 #include "arch.h"
40 #include <assert.h>
41 #include "lpcnet_private.h"
42 #include "lpcnet.h"
43 #include "os_support.h"
44 #include "_kiss_fft_guts.h"
45 #include "celt_lpc.h"
46 #include "mathops.h"
47
48
lpcnet_encoder_get_size(void)49 int lpcnet_encoder_get_size(void) {
50 return sizeof(LPCNetEncState);
51 }
52
lpcnet_encoder_init(LPCNetEncState * st)53 int lpcnet_encoder_init(LPCNetEncState *st) {
54 memset(st, 0, sizeof(*st));
55 pitchdnn_init(&st->pitchdnn);
56 return 0;
57 }
58
lpcnet_encoder_load_model(LPCNetEncState * st,const void * data,int len)59 int lpcnet_encoder_load_model(LPCNetEncState *st, const void *data, int len) {
60 return pitchdnn_load_model(&st->pitchdnn, data, len);
61 }
62
lpcnet_encoder_create(void)63 LPCNetEncState *lpcnet_encoder_create(void) {
64 LPCNetEncState *st;
65 st = opus_alloc(lpcnet_encoder_get_size());
66 lpcnet_encoder_init(st);
67 return st;
68 }
69
lpcnet_encoder_destroy(LPCNetEncState * st)70 void lpcnet_encoder_destroy(LPCNetEncState *st) {
71 opus_free(st);
72 }
73
frame_analysis(LPCNetEncState * st,kiss_fft_cpx * X,float * Ex,const float * in)74 static void frame_analysis(LPCNetEncState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
75 float x[WINDOW_SIZE];
76 OPUS_COPY(x, st->analysis_mem, OVERLAP_SIZE);
77 OPUS_COPY(&x[OVERLAP_SIZE], in, FRAME_SIZE);
78 OPUS_COPY(st->analysis_mem, &in[FRAME_SIZE-OVERLAP_SIZE], OVERLAP_SIZE);
79 apply_window(x);
80 forward_transform(X, x);
81 lpcn_compute_band_energy(Ex, X);
82 }
83
biquad(float * y,float mem[2],const float * x,const float * b,const float * a,int N)84 static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
85 int i;
86 float mem0, mem1;
87 mem0 = mem[0];
88 mem1 = mem[1];
89 for (i=0;i<N;i++) {
90 float xi, yi, mem00;
91 xi = x[i];
92 yi = x[i] + mem0;
93 mem00 = mem0;
94 /* Original code:
95 mem0 = mem1 + (b[0]*xi - a[0]*yi);
96 mem1 = (b[1]*xi - a[1]*yi);
97 Modified to reduce dependency chains: (the +1e-30f forces the ordering and has no effect on the output)
98 */
99 mem0 = (b[0]-a[0])*xi + mem1 - a[0]*mem0;
100 mem1 = (b[1]-a[1])*xi + 1e-30f - a[1]*mem00;
101 y[i] = yi;
102 }
103 mem[0] = mem0;
104 mem[1] = mem1;
105 }
106
107 #define celt_log10(x) (0.3010299957f*celt_log2(x))
108
compute_frame_features(LPCNetEncState * st,const float * in,int arch)109 void compute_frame_features(LPCNetEncState *st, const float *in, int arch) {
110 float aligned_in[FRAME_SIZE];
111 int i;
112 float Ly[NB_BANDS];
113 float follow, logMax;
114 kiss_fft_cpx X[FREQ_SIZE];
115 float Ex[NB_BANDS];
116 float xcorr[PITCH_MAX_PERIOD];
117 float ener0;
118 float ener;
119 float x[FRAME_SIZE+LPC_ORDER];
120 float frame_corr;
121 float xy, xx, yy;
122 int pitch;
123 float ener_norm[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
124 /* [b,a]=ellip(2, 2, 20, 1200/8000); */
125 static const float lp_b[2] = {-0.84946f, 1.f};
126 static const float lp_a[2] = {-1.54220f, 0.70781f};
127 OPUS_COPY(aligned_in, &st->analysis_mem[OVERLAP_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
128 frame_analysis(st, X, Ex, in);
129 st->if_features[0] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[0].r*X[0].r)-6.f)));
130 for (i=1;i<PITCH_IF_MAX_FREQ;i++) {
131 kiss_fft_cpx prod;
132 float norm_1;
133 C_MULC(prod, X[i], st->prev_if[i]);
134 norm_1 = 1.f/sqrt(1e-15f + prod.r*prod.r + prod.i*prod.i);
135 C_MULBYSCALAR(prod, norm_1);
136 st->if_features[3*i-2] = prod.r;
137 st->if_features[3*i-1] = prod.i;
138 st->if_features[3*i] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[i].r*X[i].r + X[i].i*X[i].i)-6.f)));
139 }
140 OPUS_COPY(st->prev_if, X, PITCH_IF_MAX_FREQ);
141 /*for (i=0;i<88;i++) printf("%f ", st->if_features[i]);printf("\n");*/
142 logMax = -2;
143 follow = -2;
144 for (i=0;i<NB_BANDS;i++) {
145 Ly[i] = celt_log10(1e-2f+Ex[i]);
146 Ly[i] = MAX16(logMax-8, MAX16(follow-2.5f, Ly[i]));
147 logMax = MAX16(logMax, Ly[i]);
148 follow = MAX16(follow-2.5f, Ly[i]);
149 }
150 dct(st->features, Ly);
151 st->features[0] -= 4;
152 lpc_from_cepstrum(st->lpc, st->features);
153 for (i=0;i<LPC_ORDER;i++) st->features[NB_BANDS+2+i] = st->lpc[i];
154 OPUS_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
155 OPUS_MOVE(st->lp_buf, &st->lp_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
156 OPUS_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET);
157 OPUS_COPY(&x[0], st->pitch_mem, LPC_ORDER);
158 OPUS_COPY(&x[LPC_ORDER], aligned_in, FRAME_SIZE);
159 OPUS_COPY(st->pitch_mem, &aligned_in[FRAME_SIZE-LPC_ORDER], LPC_ORDER);
160 celt_fir(&x[LPC_ORDER], st->lpc, &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, LPC_ORDER, arch);
161 for (i=0;i<FRAME_SIZE;i++) {
162 st->exc_buf[PITCH_MAX_PERIOD+i] = st->lp_buf[PITCH_MAX_PERIOD+i] + .7f*st->pitch_filt;
163 st->pitch_filt = st->lp_buf[PITCH_MAX_PERIOD+i];
164 /*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/
165 }
166 biquad(&st->lp_buf[PITCH_MAX_PERIOD], st->lp_mem, &st->lp_buf[PITCH_MAX_PERIOD], lp_b, lp_a, FRAME_SIZE);
167 {
168 double ener1;
169 float *buf = st->exc_buf;
170 celt_pitch_xcorr(&buf[PITCH_MAX_PERIOD], buf, xcorr, FRAME_SIZE, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD, arch);
171 ener0 = celt_inner_prod(&buf[PITCH_MAX_PERIOD], &buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
172 ener1 = celt_inner_prod(&buf[0], &buf[0], FRAME_SIZE, arch);
173 /*printf("%f\n", st->frame_weight[sub]);*/
174 for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
175 ener = 1 + ener0 + ener1;
176 st->xcorr_features[i] = 2*xcorr[i];
177 ener_norm[i] = ener;
178 ener1 += buf[i+FRAME_SIZE]*(double)buf[i+FRAME_SIZE] - buf[i]*(double)buf[i];
179 /*printf("%f ", st->xcorr_features[i]);*/
180 }
181 /* Split in a separate loop so the compiler can vectorize it */
182 for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
183 st->xcorr_features[i] /= ener_norm[i];
184 }
185 /*printf("\n");*/
186 }
187 st->dnn_pitch = compute_pitchdnn(&st->pitchdnn, st->if_features, st->xcorr_features, arch);
188 pitch = (int)floor(.5+256./pow(2.f,((1./60.)*((st->dnn_pitch+1.5)*60))));
189 xx = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
190 yy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD-pitch], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
191 xy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
192 /*printf("%f %f\n", frame_corr, xy/sqrt(1e-15+xx*yy));*/
193 frame_corr = xy/sqrt(1+xx*yy);
194 frame_corr = log(1.f+exp(5.f*frame_corr))/log(1+exp(5.f));
195 st->features[NB_BANDS] = st->dnn_pitch;
196 st->features[NB_BANDS + 1] = frame_corr-.5f;
197 }
198
preemphasis(float * y,float * mem,const float * x,float coef,int N)199 void preemphasis(float *y, float *mem, const float *x, float coef, int N) {
200 int i;
201 for (i=0;i<N;i++) {
202 float yi;
203 yi = x[i] + *mem;
204 *mem = -coef*x[i];
205 y[i] = yi;
206 }
207 }
208
lpcnet_compute_single_frame_features_impl(LPCNetEncState * st,float * x,float features[NB_TOTAL_FEATURES],int arch)209 static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *x, float features[NB_TOTAL_FEATURES], int arch) {
210 preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
211 compute_frame_features(st, x, arch);
212 OPUS_COPY(features, &st->features[0], NB_TOTAL_FEATURES);
213 return 0;
214 }
215
lpcnet_compute_single_frame_features(LPCNetEncState * st,const opus_int16 * pcm,float features[NB_TOTAL_FEATURES],int arch)216 int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch) {
217 int i;
218 float x[FRAME_SIZE];
219 for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
220 lpcnet_compute_single_frame_features_impl(st, x, features, arch);
221 return 0;
222 }
223
lpcnet_compute_single_frame_features_float(LPCNetEncState * st,const float * pcm,float features[NB_TOTAL_FEATURES],int arch)224 int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch) {
225 int i;
226 float x[FRAME_SIZE];
227 for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
228 lpcnet_compute_single_frame_features_impl(st, x, features, arch);
229 return 0;
230 }
231