1 /* Copyright (c) 2018 Mozilla */
2 /*
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6
7 - Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9
10 - Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
18 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #include <math.h>
32 #include <stdio.h>
33 #include "nnet_data.h"
34 #include "nnet.h"
35 #include "common.h"
36 #include "arch.h"
37 #include "lpcnet.h"
38 #include "lpcnet_private.h"
39 #include "os_support.h"
40
41 #define PREEMPH 0.85f
42
43 #define PDF_FLOOR 0.002
44
45 #define FRAME_INPUT_SIZE (NB_FEATURES + EMBED_PITCH_OUT_SIZE)
46
47
48 #if 0
49 static void print_vector(float *x, int N)
50 {
51 int i;
52 for (i=0;i<N;i++) printf("%f ", x[i]);
53 printf("\n");
54 }
55 #endif
56
57 #ifdef END2END
rc2lpc(float * lpc,const float * rc)58 void rc2lpc(float *lpc, const float *rc)
59 {
60 int i, j, k;
61 float tmp[LPC_ORDER];
62 float ntmp[LPC_ORDER] = {0.0};
63 OPUS_COPY(tmp, rc, LPC_ORDER);
64 for(i = 0; i < LPC_ORDER ; i++)
65 {
66 for(j = 0; j <= i-1; j++)
67 {
68 ntmp[j] = tmp[j] + tmp[i]*tmp[i - j - 1];
69 }
70 for(k = 0; k <= i-1; k++)
71 {
72 tmp[k] = ntmp[k];
73 }
74 }
75 for(i = 0; i < LPC_ORDER ; i++)
76 {
77 lpc[i] = tmp[i];
78 }
79 }
80
81 #endif
82
run_frame_network(LPCNetState * lpcnet,float * gru_a_condition,float * gru_b_condition,float * lpc,const float * features)83 void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features)
84 {
85 NNetState *net;
86 float condition[FEATURE_DENSE2_OUT_SIZE];
87 float in[FRAME_INPUT_SIZE];
88 float conv1_out[FEATURE_CONV1_OUT_SIZE];
89 float conv2_out[FEATURE_CONV2_OUT_SIZE];
90 float dense1_out[FEATURE_DENSE1_OUT_SIZE];
91 int pitch;
92 float rc[LPC_ORDER];
93 /* Matches the Python code -- the 0.1 avoids rounding issues. */
94 pitch = (int)floor(.1 + 50*features[NB_BANDS]+100);
95 pitch = IMIN(255, IMAX(33, pitch));
96 net = &lpcnet->nnet;
97 OPUS_COPY(in, features, NB_FEATURES);
98 compute_embedding(&lpcnet->model.embed_pitch, &in[NB_FEATURES], pitch);
99 compute_conv1d(&lpcnet->model.feature_conv1, conv1_out, net->feature_conv1_state, in);
100 if (lpcnet->frame_count < FEATURE_CONV1_DELAY) OPUS_CLEAR(conv1_out, FEATURE_CONV1_OUT_SIZE);
101 compute_conv1d(&lpcnet->model.feature_conv2, conv2_out, net->feature_conv2_state, conv1_out);
102 if (lpcnet->frame_count < FEATURES_DELAY) OPUS_CLEAR(conv2_out, FEATURE_CONV2_OUT_SIZE);
103 _lpcnet_compute_dense(&lpcnet->model.feature_dense1, dense1_out, conv2_out);
104 _lpcnet_compute_dense(&lpcnet->model.feature_dense2, condition, dense1_out);
105 OPUS_COPY(rc, condition, LPC_ORDER);
106 _lpcnet_compute_dense(&lpcnet->model.gru_a_dense_feature, gru_a_condition, condition);
107 _lpcnet_compute_dense(&lpcnet->model.gru_b_dense_feature, gru_b_condition, condition);
108 #ifdef END2END
109 rc2lpc(lpc, rc);
110 #elif FEATURES_DELAY>0
111 memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0]));
112 memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0]));
113 lpc_from_cepstrum(lpcnet->old_lpc[0], features);
114 #else
115 lpc_from_cepstrum(lpc, features);
116 #endif
117 #ifdef LPC_GAMMA
118 lpc_weighting(lpc, LPC_GAMMA);
119 #endif
120 if (lpcnet->frame_count < 1000) lpcnet->frame_count++;
121 }
122
run_frame_network_deferred(LPCNetState * lpcnet,const float * features)123 void run_frame_network_deferred(LPCNetState *lpcnet, const float *features)
124 {
125 int max_buffer_size = lpcnet->model.feature_conv1.kernel_size + lpcnet->model.feature_conv2.kernel_size - 2;
126 celt_assert(max_buffer_size <= MAX_FEATURE_BUFFER_SIZE);
127 if (lpcnet->feature_buffer_fill == max_buffer_size) {
128 OPUS_MOVE(lpcnet->feature_buffer, &lpcnet->feature_buffer[NB_FEATURES], (max_buffer_size-1)*NB_FEATURES);
129 } else {
130 lpcnet->feature_buffer_fill++;
131 }
132 OPUS_COPY(&lpcnet->feature_buffer[(lpcnet->feature_buffer_fill-1)*NB_FEATURES], features, NB_FEATURES);
133 }
134
run_frame_network_flush(LPCNetState * lpcnet)135 void run_frame_network_flush(LPCNetState *lpcnet)
136 {
137 int i;
138 for (i=0;i<lpcnet->feature_buffer_fill;i++) {
139 float lpc[LPC_ORDER];
140 float gru_a_condition[3*GRU_A_STATE_SIZE];
141 float gru_b_condition[3*GRU_B_STATE_SIZE];
142 run_frame_network(lpcnet, gru_a_condition, gru_b_condition, lpc, &lpcnet->feature_buffer[i*NB_FEATURES]);
143 }
144 lpcnet->feature_buffer_fill = 0;
145 }
146
run_sample_network(LPCNetState * lpcnet,const float * gru_a_condition,const float * gru_b_condition,int last_exc,int last_sig,int pred,const float * sampling_logit_table,kiss99_ctx * rng)147 int run_sample_network(LPCNetState *lpcnet, const float *gru_a_condition, const float *gru_b_condition, int last_exc, int last_sig, int pred, const float *sampling_logit_table, kiss99_ctx *rng)
148 {
149 NNetState *net;
150 float gru_a_input[3*GRU_A_STATE_SIZE];
151 float in_b[GRU_A_STATE_SIZE+FEATURE_DENSE2_OUT_SIZE];
152 float gru_b_input[3*GRU_B_STATE_SIZE];
153 net = &lpcnet->nnet;
154 #if 1
155 compute_gru_a_input(gru_a_input, gru_a_condition, GRU_A_STATE_SIZE, &lpcnet->model.gru_a_embed_sig, last_sig, &lpcnet->model.gru_a_embed_pred, pred, &lpcnet->model.gru_a_embed_exc, last_exc);
156 #else
157 OPUS_COPY(gru_a_input, gru_a_condition, 3*GRU_A_STATE_SIZE);
158 accum_embedding(&lpcnet->model.gru_a_embed_sig, gru_a_input, last_sig);
159 accum_embedding(&lpcnet->model.gru_a_embed_pred, gru_a_input, pred);
160 accum_embedding(&lpcnet->model.gru_a_embed_exc, gru_a_input, last_exc);
161 #endif
162 /*compute_gru3(&gru_a, net->gru_a_state, gru_a_input);*/
163 compute_sparse_gru(&lpcnet->model.sparse_gru_a, net->gru_a_state, gru_a_input);
164 OPUS_COPY(in_b, net->gru_a_state, GRU_A_STATE_SIZE);
165 OPUS_COPY(gru_b_input, gru_b_condition, 3*GRU_B_STATE_SIZE);
166 compute_gruB(&lpcnet->model.gru_b, gru_b_input, net->gru_b_state, in_b);
167 return sample_mdense(&lpcnet->model.dual_fc, net->gru_b_state, sampling_logit_table, rng);
168 }
169
lpcnet_get_size()170 int lpcnet_get_size()
171 {
172 return sizeof(LPCNetState);
173 }
174
lpcnet_reset(LPCNetState * lpcnet)175 void lpcnet_reset(LPCNetState *lpcnet)
176 {
177 const char* rng_string="LPCNet";
178 OPUS_CLEAR((char*)&lpcnet->LPCNET_RESET_START,
179 sizeof(LPCNetState)-
180 ((char*)&lpcnet->LPCNET_RESET_START - (char*)lpcnet));
181 lpcnet->last_exc = lin2ulaw(0.f);
182 kiss99_srand(&lpcnet->rng, (const unsigned char *)rng_string, strlen(rng_string));
183 }
184
lpcnet_init(LPCNetState * lpcnet)185 int lpcnet_init(LPCNetState *lpcnet)
186 {
187 int i;
188 int ret;
189 for (i=0;i<256;i++) {
190 float prob = .025f+.95f*i/255.f;
191 lpcnet->sampling_logit_table[i] = -log((1-prob)/prob);
192 }
193 #ifndef USE_WEIGHTS_FILE
194 ret = init_lpcnet_model(&lpcnet->model, lpcnet_arrays);
195 #else
196 ret = 0;
197 #endif
198 lpcnet_reset(lpcnet);
199 celt_assert(ret == 0);
200 return ret;
201 }
202
lpcnet_load_model(LPCNetState * st,const unsigned char * data,int len)203 int lpcnet_load_model(LPCNetState *st, const unsigned char *data, int len) {
204 WeightArray *list;
205 int ret;
206 parse_weights(&list, data, len);
207 ret = init_lpcnet_model(&st->model, list);
208 opus_free(list);
209 if (ret == 0) return 0;
210 else return -1;
211 }
212
213
lpcnet_create()214 LPCNetState *lpcnet_create()
215 {
216 LPCNetState *lpcnet;
217 lpcnet = (LPCNetState *)opus_alloc(lpcnet_get_size(), 1);
218 OPUS_CLEAR(lpcnet, 1);
219 lpcnet_init(lpcnet);
220 return lpcnet;
221 }
222
lpcnet_destroy(LPCNetState * lpcnet)223 void lpcnet_destroy(LPCNetState *lpcnet)
224 {
225 opus_free(lpcnet);
226 }
227
lpcnet_reset_signal(LPCNetState * lpcnet)228 void lpcnet_reset_signal(LPCNetState *lpcnet)
229 {
230 lpcnet->deemph_mem = 0;
231 lpcnet->last_exc = lin2ulaw(0.f);
232 OPUS_CLEAR(lpcnet->last_sig, LPC_ORDER);
233 OPUS_CLEAR(lpcnet->nnet.gru_a_state, GRU_A_STATE_SIZE);
234 OPUS_CLEAR(lpcnet->nnet.gru_b_state, GRU_B_STATE_SIZE);
235 }
236
lpcnet_synthesize_tail_impl(LPCNetState * lpcnet,opus_int16 * output,int N,int preload)237 void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload)
238 {
239 int i;
240
241 if (lpcnet->frame_count <= FEATURES_DELAY)
242 {
243 OPUS_CLEAR(output, N);
244 return;
245 }
246 for (i=0;i<N;i++)
247 {
248 int j;
249 float pcm;
250 int exc;
251 int last_sig_ulaw;
252 int pred_ulaw;
253 float pred = 0;
254 for (j=0;j<LPC_ORDER;j++) pred -= lpcnet->last_sig[j]*lpcnet->lpc[j];
255 last_sig_ulaw = lin2ulaw(lpcnet->last_sig[0]);
256 pred_ulaw = lin2ulaw(pred);
257 exc = run_sample_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table, &lpcnet->rng);
258 if (i < preload) {
259 exc = lin2ulaw(output[i]-PREEMPH*lpcnet->deemph_mem - pred);
260 pcm = output[i]-PREEMPH*lpcnet->deemph_mem;
261 } else {
262 pcm = pred + ulaw2lin(exc);
263 }
264 OPUS_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1);
265 lpcnet->last_sig[0] = pcm;
266 lpcnet->last_exc = exc;
267 pcm += PREEMPH*lpcnet->deemph_mem;
268 lpcnet->deemph_mem = pcm;
269 if (pcm<-32767) pcm = -32767;
270 if (pcm>32767) pcm = 32767;
271 if (i >= preload) output[i] = (int)floor(.5 + pcm);
272 }
273 }
274
lpcnet_synthesize_impl(LPCNetState * lpcnet,const float * features,opus_int16 * output,int N,int preload)275 void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload)
276 {
277 run_frame_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->lpc, features);
278 lpcnet_synthesize_tail_impl(lpcnet, output, N, preload);
279 }
280
lpcnet_synthesize(LPCNetState * lpcnet,const float * features,opus_int16 * output,int N)281 void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N) {
282 lpcnet_synthesize_impl(lpcnet, features, output, N, 0);
283 }
284