xref: /aosp_15_r20/external/libopus/dnn/lpcnet.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1 /* Copyright (c) 2018 Mozilla */
2 /*
3    Redistribution and use in source and binary forms, with or without
4    modification, are permitted provided that the following conditions
5    are met:
6 
7    - Redistributions of source code must retain the above copyright
8    notice, this list of conditions and the following disclaimer.
9 
10    - Redistributions in binary form must reproduce the above copyright
11    notice, this list of conditions and the following disclaimer in the
12    documentation and/or other materials provided with the distribution.
13 
14    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
18    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30 
31 #include <math.h>
32 #include <stdio.h>
33 #include "nnet_data.h"
34 #include "nnet.h"
35 #include "common.h"
36 #include "arch.h"
37 #include "lpcnet.h"
38 #include "lpcnet_private.h"
39 #include "os_support.h"
40 
41 #define PREEMPH 0.85f
42 
43 #define PDF_FLOOR 0.002
44 
45 #define FRAME_INPUT_SIZE (NB_FEATURES + EMBED_PITCH_OUT_SIZE)
46 
47 
48 #if 0
49 static void print_vector(float *x, int N)
50 {
51     int i;
52     for (i=0;i<N;i++) printf("%f ", x[i]);
53     printf("\n");
54 }
55 #endif
56 
57 #ifdef END2END
rc2lpc(float * lpc,const float * rc)58 void rc2lpc(float *lpc, const float *rc)
59 {
60   int i, j, k;
61   float tmp[LPC_ORDER];
62   float ntmp[LPC_ORDER] = {0.0};
63   OPUS_COPY(tmp, rc, LPC_ORDER);
64   for(i = 0; i < LPC_ORDER ; i++)
65     {
66         for(j = 0; j <= i-1; j++)
67         {
68             ntmp[j] = tmp[j] + tmp[i]*tmp[i - j - 1];
69         }
70         for(k = 0; k <= i-1; k++)
71         {
72             tmp[k] = ntmp[k];
73         }
74     }
75   for(i = 0; i < LPC_ORDER ; i++)
76   {
77     lpc[i] = tmp[i];
78   }
79 }
80 
81 #endif
82 
run_frame_network(LPCNetState * lpcnet,float * gru_a_condition,float * gru_b_condition,float * lpc,const float * features)83 void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features)
84 {
85     NNetState *net;
86     float condition[FEATURE_DENSE2_OUT_SIZE];
87     float in[FRAME_INPUT_SIZE];
88     float conv1_out[FEATURE_CONV1_OUT_SIZE];
89     float conv2_out[FEATURE_CONV2_OUT_SIZE];
90     float dense1_out[FEATURE_DENSE1_OUT_SIZE];
91     int pitch;
92     float rc[LPC_ORDER];
93     /* Matches the Python code -- the 0.1 avoids rounding issues. */
94     pitch = (int)floor(.1 + 50*features[NB_BANDS]+100);
95     pitch = IMIN(255, IMAX(33, pitch));
96     net = &lpcnet->nnet;
97     OPUS_COPY(in, features, NB_FEATURES);
98     compute_embedding(&lpcnet->model.embed_pitch, &in[NB_FEATURES], pitch);
99     compute_conv1d(&lpcnet->model.feature_conv1, conv1_out, net->feature_conv1_state, in);
100     if (lpcnet->frame_count < FEATURE_CONV1_DELAY) OPUS_CLEAR(conv1_out, FEATURE_CONV1_OUT_SIZE);
101     compute_conv1d(&lpcnet->model.feature_conv2, conv2_out, net->feature_conv2_state, conv1_out);
102     if (lpcnet->frame_count < FEATURES_DELAY) OPUS_CLEAR(conv2_out, FEATURE_CONV2_OUT_SIZE);
103     _lpcnet_compute_dense(&lpcnet->model.feature_dense1, dense1_out, conv2_out);
104     _lpcnet_compute_dense(&lpcnet->model.feature_dense2, condition, dense1_out);
105     OPUS_COPY(rc, condition, LPC_ORDER);
106     _lpcnet_compute_dense(&lpcnet->model.gru_a_dense_feature, gru_a_condition, condition);
107     _lpcnet_compute_dense(&lpcnet->model.gru_b_dense_feature, gru_b_condition, condition);
108 #ifdef END2END
109     rc2lpc(lpc, rc);
110 #elif FEATURES_DELAY>0
111     memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0]));
112     memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0]));
113     lpc_from_cepstrum(lpcnet->old_lpc[0], features);
114 #else
115     lpc_from_cepstrum(lpc, features);
116 #endif
117 #ifdef LPC_GAMMA
118     lpc_weighting(lpc, LPC_GAMMA);
119 #endif
120     if (lpcnet->frame_count < 1000) lpcnet->frame_count++;
121 }
122 
run_frame_network_deferred(LPCNetState * lpcnet,const float * features)123 void run_frame_network_deferred(LPCNetState *lpcnet, const float *features)
124 {
125     int max_buffer_size = lpcnet->model.feature_conv1.kernel_size + lpcnet->model.feature_conv2.kernel_size - 2;
126     celt_assert(max_buffer_size <= MAX_FEATURE_BUFFER_SIZE);
127     if (lpcnet->feature_buffer_fill == max_buffer_size) {
128         OPUS_MOVE(lpcnet->feature_buffer, &lpcnet->feature_buffer[NB_FEATURES],  (max_buffer_size-1)*NB_FEATURES);
129     } else {
130       lpcnet->feature_buffer_fill++;
131     }
132     OPUS_COPY(&lpcnet->feature_buffer[(lpcnet->feature_buffer_fill-1)*NB_FEATURES], features, NB_FEATURES);
133 }
134 
run_frame_network_flush(LPCNetState * lpcnet)135 void run_frame_network_flush(LPCNetState *lpcnet)
136 {
137     int i;
138     for (i=0;i<lpcnet->feature_buffer_fill;i++) {
139         float lpc[LPC_ORDER];
140         float gru_a_condition[3*GRU_A_STATE_SIZE];
141         float gru_b_condition[3*GRU_B_STATE_SIZE];
142         run_frame_network(lpcnet, gru_a_condition, gru_b_condition, lpc, &lpcnet->feature_buffer[i*NB_FEATURES]);
143     }
144     lpcnet->feature_buffer_fill = 0;
145 }
146 
run_sample_network(LPCNetState * lpcnet,const float * gru_a_condition,const float * gru_b_condition,int last_exc,int last_sig,int pred,const float * sampling_logit_table,kiss99_ctx * rng)147 int run_sample_network(LPCNetState *lpcnet, const float *gru_a_condition, const float *gru_b_condition, int last_exc, int last_sig, int pred, const float *sampling_logit_table, kiss99_ctx *rng)
148 {
149     NNetState *net;
150     float gru_a_input[3*GRU_A_STATE_SIZE];
151     float in_b[GRU_A_STATE_SIZE+FEATURE_DENSE2_OUT_SIZE];
152     float gru_b_input[3*GRU_B_STATE_SIZE];
153     net = &lpcnet->nnet;
154 #if 1
155     compute_gru_a_input(gru_a_input, gru_a_condition, GRU_A_STATE_SIZE, &lpcnet->model.gru_a_embed_sig, last_sig, &lpcnet->model.gru_a_embed_pred, pred, &lpcnet->model.gru_a_embed_exc, last_exc);
156 #else
157     OPUS_COPY(gru_a_input, gru_a_condition, 3*GRU_A_STATE_SIZE);
158     accum_embedding(&lpcnet->model.gru_a_embed_sig, gru_a_input, last_sig);
159     accum_embedding(&lpcnet->model.gru_a_embed_pred, gru_a_input, pred);
160     accum_embedding(&lpcnet->model.gru_a_embed_exc, gru_a_input, last_exc);
161 #endif
162     /*compute_gru3(&gru_a, net->gru_a_state, gru_a_input);*/
163     compute_sparse_gru(&lpcnet->model.sparse_gru_a, net->gru_a_state, gru_a_input);
164     OPUS_COPY(in_b, net->gru_a_state, GRU_A_STATE_SIZE);
165     OPUS_COPY(gru_b_input, gru_b_condition, 3*GRU_B_STATE_SIZE);
166     compute_gruB(&lpcnet->model.gru_b, gru_b_input, net->gru_b_state, in_b);
167     return sample_mdense(&lpcnet->model.dual_fc, net->gru_b_state, sampling_logit_table, rng);
168 }
169 
lpcnet_get_size()170 int lpcnet_get_size()
171 {
172     return sizeof(LPCNetState);
173 }
174 
lpcnet_reset(LPCNetState * lpcnet)175 void lpcnet_reset(LPCNetState *lpcnet)
176 {
177     const char* rng_string="LPCNet";
178     OPUS_CLEAR((char*)&lpcnet->LPCNET_RESET_START,
179             sizeof(LPCNetState)-
180             ((char*)&lpcnet->LPCNET_RESET_START - (char*)lpcnet));
181     lpcnet->last_exc = lin2ulaw(0.f);
182     kiss99_srand(&lpcnet->rng, (const unsigned char *)rng_string, strlen(rng_string));
183 }
184 
lpcnet_init(LPCNetState * lpcnet)185 int lpcnet_init(LPCNetState *lpcnet)
186 {
187     int i;
188     int ret;
189     for (i=0;i<256;i++) {
190         float prob = .025f+.95f*i/255.f;
191         lpcnet->sampling_logit_table[i] = -log((1-prob)/prob);
192     }
193 #ifndef USE_WEIGHTS_FILE
194     ret = init_lpcnet_model(&lpcnet->model, lpcnet_arrays);
195 #else
196     ret = 0;
197 #endif
198     lpcnet_reset(lpcnet);
199     celt_assert(ret == 0);
200     return ret;
201 }
202 
lpcnet_load_model(LPCNetState * st,const unsigned char * data,int len)203 int lpcnet_load_model(LPCNetState *st, const unsigned char *data, int len) {
204   WeightArray *list;
205   int ret;
206   parse_weights(&list, data, len);
207   ret = init_lpcnet_model(&st->model, list);
208   opus_free(list);
209   if (ret == 0) return 0;
210   else return -1;
211 }
212 
213 
lpcnet_create()214 LPCNetState *lpcnet_create()
215 {
216     LPCNetState *lpcnet;
217     lpcnet = (LPCNetState *)opus_alloc(lpcnet_get_size(), 1);
218     OPUS_CLEAR(lpcnet, 1);
219     lpcnet_init(lpcnet);
220     return lpcnet;
221 }
222 
lpcnet_destroy(LPCNetState * lpcnet)223 void lpcnet_destroy(LPCNetState *lpcnet)
224 {
225     opus_free(lpcnet);
226 }
227 
lpcnet_reset_signal(LPCNetState * lpcnet)228 void lpcnet_reset_signal(LPCNetState *lpcnet)
229 {
230     lpcnet->deemph_mem = 0;
231     lpcnet->last_exc = lin2ulaw(0.f);
232     OPUS_CLEAR(lpcnet->last_sig, LPC_ORDER);
233     OPUS_CLEAR(lpcnet->nnet.gru_a_state, GRU_A_STATE_SIZE);
234     OPUS_CLEAR(lpcnet->nnet.gru_b_state, GRU_B_STATE_SIZE);
235 }
236 
lpcnet_synthesize_tail_impl(LPCNetState * lpcnet,opus_int16 * output,int N,int preload)237 void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload)
238 {
239     int i;
240 
241     if (lpcnet->frame_count <= FEATURES_DELAY)
242     {
243         OPUS_CLEAR(output, N);
244         return;
245     }
246     for (i=0;i<N;i++)
247     {
248         int j;
249         float pcm;
250         int exc;
251         int last_sig_ulaw;
252         int pred_ulaw;
253         float pred = 0;
254         for (j=0;j<LPC_ORDER;j++) pred -= lpcnet->last_sig[j]*lpcnet->lpc[j];
255         last_sig_ulaw = lin2ulaw(lpcnet->last_sig[0]);
256         pred_ulaw = lin2ulaw(pred);
257         exc = run_sample_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table, &lpcnet->rng);
258         if (i < preload) {
259           exc = lin2ulaw(output[i]-PREEMPH*lpcnet->deemph_mem - pred);
260           pcm = output[i]-PREEMPH*lpcnet->deemph_mem;
261         } else {
262           pcm = pred + ulaw2lin(exc);
263         }
264         OPUS_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1);
265         lpcnet->last_sig[0] = pcm;
266         lpcnet->last_exc = exc;
267         pcm += PREEMPH*lpcnet->deemph_mem;
268         lpcnet->deemph_mem = pcm;
269         if (pcm<-32767) pcm = -32767;
270         if (pcm>32767) pcm = 32767;
271         if (i >= preload) output[i] = (int)floor(.5 + pcm);
272     }
273 }
274 
lpcnet_synthesize_impl(LPCNetState * lpcnet,const float * features,opus_int16 * output,int N,int preload)275 void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload)
276 {
277     run_frame_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->lpc, features);
278     lpcnet_synthesize_tail_impl(lpcnet, output, N, preload);
279 }
280 
lpcnet_synthesize(LPCNetState * lpcnet,const float * features,opus_int16 * output,int N)281 void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N) {
282     lpcnet_synthesize_impl(lpcnet, features, output, N, 0);
283 }
284