1 /* Copyright (c) 2023 Amazon */
2 /*
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6
7 - Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9
10 - Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
18 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #include "fwgan.h"
32 #include "os_support.h"
33 #include "freq.h"
34 #include "fwgan_data.h"
35 #include "lpcnet.h"
36 #include "pitch.h"
37 #include "nnet.h"
38 #include "lpcnet_private.h"
39
40 #define FEAT_IN_SIZE (BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4 + FWGAN_FRAME_SIZE/2)
41
42 #define FWGAN_FEATURES (NB_FEATURES-1)
43
pitch_embeddings(float * pembed,float * phase,double w0)44 static void pitch_embeddings(float *pembed, float *phase, double w0) {
45 int i;
46 float wreal, wimag;
47 #if 1
48 /* This Taylor expansion should be good enough since w0 is always small. */
49 float w2 = w0*w0;
50 wreal = 1 - .5*w2*(1.f - 0.083333333f*w2);
51 wimag = w0*(1 - 0.166666667f*w2*(1.f - 0.05f*w2));
52 #else
53 wreal = cos(w0);
54 wimag = sin(w0);
55 #endif
56 /* Speed-up phase reference by making phase a unit-norm complex value and rotating it
57 by exp(-i*w0) each sample. */
58 for (i=0;i<SUBFRAME_SIZE;i++) {
59 float tmp;
60 tmp = phase[0]*wreal - phase[1]*wimag;
61 phase[1] = phase[0]*wimag + phase[1]*wreal;
62 phase[0] = tmp;
63 pembed[i] = phase[1];
64 pembed[SUBFRAME_SIZE+i] = phase[0];
65 }
66 /* Renormalize once per sub-frame, though we could probably do it even less frequently. */
67 {
68 float r = 1.f/sqrt(phase[0]*phase[0] + phase[1]*phase[1]);
69 phase[0] *= r;
70 phase[1] *= r;
71 }
72 }
73
compute_wlpc(float lpc[LPC_ORDER],const float * features)74 static void compute_wlpc(float lpc[LPC_ORDER], const float *features) {
75 float lpc_weight;
76 int i;
77 lpc_from_cepstrum(lpc, features);
78 lpc_weight = 1.f;
79 for (i=0;i<LPC_ORDER;i++) {
80 lpc_weight *= FWGAN_GAMMA;
81 lpc[i] *= lpc_weight;
82 }
83 }
84
run_fwgan_upsampler(FWGANState * st,float * cond,const float * features)85 static void run_fwgan_upsampler(FWGANState *st, float *cond, const float *features)
86 {
87 FWGAN *model;
88 model = &st->model;
89 celt_assert(FWGAN_FEATURES == model->bfcc_with_corr_upsampler_fc.nb_inputs);
90 celt_assert(BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE == model->bfcc_with_corr_upsampler_fc.nb_outputs);
91 compute_generic_dense(&model->bfcc_with_corr_upsampler_fc, cond, features, ACTIVATION_TANH);
92 }
93
94 static void fwgan_synthesize_impl(FWGANState *st, float *pcm, const float *lpc, const float *features);
fwgan_cont(FWGANState * st,const float * pcm0,const float * features0)95 void fwgan_cont(FWGANState *st, const float *pcm0, const float *features0)
96 {
97 int i;
98 float norm2, norm_1;
99 float wpcm0[CONT_PCM_INPUTS];
100 float cont_inputs[CONT_PCM_INPUTS+1];
101 float tmp1[MAX_CONT_SIZE];
102 float tmp2[MAX_CONT_SIZE];
103 float lpc[LPC_ORDER];
104 float new_pcm[FWGAN_FRAME_SIZE];
105 FWGAN *model;
106 st->embed_phase[0] = 1;
107 model = &st->model;
108 compute_wlpc(lpc, features0);
109 /* Deemphasis memory is just the last continuation sample. */
110 st->deemph_mem = pcm0[CONT_PCM_INPUTS-1];
111
112 /* Apply analysis filter, considering that the preemphasis and deemphasis filter
113 cancel each other in this case since the LPC filter is constant across that boundary.
114 */
115 for (i=LPC_ORDER;i<CONT_PCM_INPUTS;i++) {
116 int j;
117 wpcm0[i] = pcm0[i];
118 for (j=0;j<LPC_ORDER;j++) wpcm0[i] += lpc[j]*pcm0[i-j-1];
119 }
120 /* FIXME: Make this less stupid. */
121 for (i=0;i<LPC_ORDER;i++) wpcm0[i] = wpcm0[LPC_ORDER];
122
123 /* The memory of the pre-empahsis is the last sample of the weighted signal
124 (ignoring preemphasis+deemphasis combination). */
125 st->preemph_mem = wpcm0[CONT_PCM_INPUTS-1];
126 /* The memory of the synthesis filter is the pre-emphasized continuation. */
127 for (i=0;i<LPC_ORDER;i++) st->syn_mem[i] = pcm0[CONT_PCM_INPUTS-1-i] - FWGAN_DEEMPHASIS*pcm0[CONT_PCM_INPUTS-2-i];
128
129 norm2 = celt_inner_prod(wpcm0, wpcm0, CONT_PCM_INPUTS, st->arch);
130 norm_1 = 1.f/sqrt(1e-8f + norm2);
131 for (i=0;i<CONT_PCM_INPUTS;i++) cont_inputs[i+1] = norm_1*wpcm0[i];
132 cont_inputs[0] = log(sqrt(norm2) + 1e-7f);
133
134 /* Continuation network */
135 compute_generic_dense(&model->cont_net_0, tmp1, cont_inputs, ACTIVATION_TANH);
136 compute_generic_dense(&model->cont_net_2, tmp2, tmp1, ACTIVATION_TANH);
137 compute_generic_dense(&model->cont_net_4, tmp1, tmp2, ACTIVATION_TANH);
138 compute_generic_dense(&model->cont_net_6, tmp2, tmp1, ACTIVATION_TANH);
139 compute_generic_dense(&model->cont_net_8, tmp1, tmp2, ACTIVATION_TANH);
140 celt_assert(CONT_NET_10_OUT_SIZE == model->cont_net_10.nb_outputs);
141 compute_generic_dense(&model->cont_net_10, st->cont, tmp1, ACTIVATION_TANH);
142
143 /* Computing continuation for each layer. */
144 celt_assert(RNN_GRU_STATE_SIZE == model->rnn_cont_fc_0.nb_outputs);
145 compute_generic_dense(&model->rnn_cont_fc_0, st->rnn_state, st->cont, ACTIVATION_TANH);
146
147 celt_assert(FWC1_STATE_SIZE == model->fwc1_cont_fc_0.nb_outputs);
148 compute_generic_dense(&model->fwc1_cont_fc_0, st->fwc1_state, st->cont, ACTIVATION_TANH);
149 celt_assert(FWC2_STATE_SIZE == model->fwc2_cont_fc_0.nb_outputs);
150 compute_generic_dense(&model->fwc2_cont_fc_0, st->fwc2_state, st->cont, ACTIVATION_TANH);
151 celt_assert(FWC3_STATE_SIZE == model->fwc3_cont_fc_0.nb_outputs);
152 compute_generic_dense(&model->fwc3_cont_fc_0, st->fwc3_state, st->cont, ACTIVATION_TANH);
153 celt_assert(FWC4_STATE_SIZE == model->fwc4_cont_fc_0.nb_outputs);
154 compute_generic_dense(&model->fwc4_cont_fc_0, st->fwc4_state, st->cont, ACTIVATION_TANH);
155 celt_assert(FWC5_STATE_SIZE == model->fwc5_cont_fc_0.nb_outputs);
156 compute_generic_dense(&model->fwc5_cont_fc_0, st->fwc5_state, st->cont, ACTIVATION_TANH);
157 celt_assert(FWC6_STATE_SIZE == model->fwc6_cont_fc_0.nb_outputs);
158 compute_generic_dense(&model->fwc6_cont_fc_0, st->fwc6_state, st->cont, ACTIVATION_TANH);
159 celt_assert(FWC7_STATE_SIZE == model->fwc7_cont_fc_0.nb_outputs);
160 compute_generic_dense(&model->fwc7_cont_fc_0, st->fwc7_state, st->cont, ACTIVATION_TANH);
161
162 st->cont_initialized = 1;
163 /* Process the first frame, discard the first subframe, and keep the rest for the first
164 synthesis call. */
165 fwgan_synthesize_impl(st, new_pcm, lpc, features0);
166 OPUS_COPY(st->pcm_buf, &new_pcm[SUBFRAME_SIZE], FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
167 }
168
apply_gain(float * pcm,float c0,float * last_gain)169 static void apply_gain(float *pcm, float c0, float *last_gain) {
170 int i;
171 float gain = pow(10.f, (0.5f*c0/sqrt(18.f)));
172 for (i=0;i<SUBFRAME_SIZE;i++) pcm[i] *= *last_gain;
173 *last_gain = gain;
174 }
175
fwgan_lpc_syn(float * pcm,float * mem,const float * lpc,float last_lpc[LPC_ORDER])176 static void fwgan_lpc_syn(float *pcm, float *mem, const float *lpc, float last_lpc[LPC_ORDER]) {
177 int i;
178 for (i=0;i<SUBFRAME_SIZE;i++) {
179 int j;
180 for (j=0;j<LPC_ORDER;j++) pcm[i] -= mem[j]*last_lpc[j];
181 OPUS_MOVE(&mem[1], &mem[0], LPC_ORDER-1);
182 mem[0] = pcm[i];
183 }
184 OPUS_COPY(last_lpc, lpc, LPC_ORDER);
185 }
186
fwgan_preemphasis(float * pcm,float * preemph_mem)187 static void fwgan_preemphasis(float *pcm, float *preemph_mem) {
188 int i;
189 for (i=0;i<SUBFRAME_SIZE;i++) {
190 float tmp = pcm[i];
191 pcm[i] -= FWGAN_DEEMPHASIS * *preemph_mem;
192 *preemph_mem = tmp;
193 }
194 }
195
fwgan_deemphasis(float * pcm,float * deemph_mem)196 static void fwgan_deemphasis(float *pcm, float *deemph_mem) {
197 int i;
198 for (i=0;i<SUBFRAME_SIZE;i++) {
199 pcm[i] += FWGAN_DEEMPHASIS * *deemph_mem;
200 *deemph_mem = pcm[i];
201 }
202 }
203
run_fwgan_subframe(FWGANState * st,float * pcm,const float * cond,double w0,const float * lpc,float c0)204 static void run_fwgan_subframe(FWGANState *st, float *pcm, const float *cond, double w0, const float *lpc, float c0)
205 {
206 float tmp1[FWC1_FC_0_OUT_SIZE];
207 float tmp2[IMAX(RNN_GRU_STATE_SIZE, FWC2_FC_0_OUT_SIZE)];
208 float feat_in[FEAT_IN_SIZE];
209 float rnn_in[FEAT_IN_CONV1_CONV_OUT_SIZE];
210 float pembed[FWGAN_FRAME_SIZE/2];
211 FWGAN *model;
212 model = &st->model;
213
214 pitch_embeddings(pembed, st->embed_phase, w0);
215 /* Interleave bfcc_cond and pembed for each subframe in feat_in. */
216 OPUS_COPY(&feat_in[BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4], &cond[0], BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4);
217 OPUS_COPY(&feat_in[0], &pembed[0], FWGAN_FRAME_SIZE/2);
218
219 compute_generic_conv1d(&model->feat_in_conv1_conv, rnn_in, st->cont_conv1_mem, feat_in, FEAT_IN_CONV1_CONV_IN_SIZE, ACTIVATION_LINEAR);
220 celt_assert(FEAT_IN_NL1_GATE_OUT_SIZE == model->feat_in_nl1_gate.nb_outputs);
221 compute_gated_activation(&model->feat_in_nl1_gate, rnn_in, rnn_in, ACTIVATION_TANH);
222
223 if (st->cont_initialized == 1) {
224 /* On the very first subframe we stop here. We only want to run the feat_in layer since the
225 others are initialized via the continuation network. */
226 OPUS_CLEAR(pcm, SUBFRAME_SIZE);
227 st->cont_initialized = 2;
228 apply_gain(pcm, c0, &st->last_gain);
229 OPUS_COPY(st->last_lpc, lpc, LPC_ORDER);
230 return;
231 }
232
233 compute_generic_gru(&model->rnn_gru_input, &model->rnn_gru_recurrent, st->rnn_state, rnn_in);
234 celt_assert(IMAX(RNN_GRU_STATE_SIZE, FWC2_FC_0_OUT_SIZE) >= model->rnn_nl_gate.nb_outputs);
235 compute_gated_activation(&model->rnn_nl_gate, tmp2, st->rnn_state, ACTIVATION_TANH);
236
237 compute_generic_conv1d(&model->fwc1_fc_0, tmp1, st->fwc1_state, tmp2, RNN_GRU_STATE_SIZE, ACTIVATION_LINEAR);
238 compute_gated_activation(&model->fwc1_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
239
240 compute_generic_conv1d(&model->fwc2_fc_0, tmp2, st->fwc2_state, tmp1, FWC1_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
241 compute_gated_activation(&model->fwc2_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
242
243 compute_generic_conv1d(&model->fwc3_fc_0, tmp1, st->fwc3_state, tmp2, FWC2_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
244 compute_gated_activation(&model->fwc3_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
245
246 compute_generic_conv1d(&model->fwc4_fc_0, tmp2, st->fwc4_state, tmp1, FWC3_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
247 compute_gated_activation(&model->fwc4_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
248
249 compute_generic_conv1d(&model->fwc5_fc_0, tmp1, st->fwc5_state, tmp2, FWC4_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
250 compute_gated_activation(&model->fwc5_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
251
252 compute_generic_conv1d(&model->fwc6_fc_0, tmp2, st->fwc6_state, tmp1, FWC5_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
253 compute_gated_activation(&model->fwc6_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
254
255 compute_generic_conv1d(&model->fwc7_fc_0, tmp1, st->fwc7_state, tmp2, FWC6_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
256 compute_gated_activation(&model->fwc7_fc_1_gate, pcm, tmp1, ACTIVATION_TANH);
257
258 apply_gain(pcm, c0, &st->last_gain);
259 fwgan_preemphasis(pcm, &st->preemph_mem);
260 fwgan_lpc_syn(pcm, st->syn_mem, lpc, st->last_lpc);
261 fwgan_deemphasis(pcm, &st->deemph_mem);
262 }
263
fwgan_init(FWGANState * st)264 void fwgan_init(FWGANState *st)
265 {
266 int ret;
267 OPUS_CLEAR(st, 1);
268 ret = init_fwgan(&st->model, fwgan_arrays);
269 celt_assert(ret == 0);
270 /* FIXME: perform arch detection. */
271 }
272
fwgan_load_model(FWGANState * st,const unsigned char * data,int len)273 int fwgan_load_model(FWGANState *st, const unsigned char *data, int len) {
274 WeightArray *list;
275 int ret;
276 parse_weights(&list, data, len);
277 ret = init_fwgan(&st->model, list);
278 opus_free(list);
279 if (ret == 0) return 0;
280 else return -1;
281 }
282
fwgan_synthesize_impl(FWGANState * st,float * pcm,const float * lpc,const float * features)283 static void fwgan_synthesize_impl(FWGANState *st, float *pcm, const float *lpc, const float *features)
284 {
285 int subframe;
286 float cond[BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE];
287 double w0;
288 int period;
289 float fwgan_features[NB_FEATURES-1];
290 celt_assert(st->cont_initialized);
291 OPUS_COPY(fwgan_features, features, NB_FEATURES-2);
292 fwgan_features[NB_FEATURES-2] = features[NB_FEATURES-1]+.5;
293
294 period = (int)floor(.1 + 50*features[NB_BANDS]+100);
295 w0 = 2*M_PI/period;
296 run_fwgan_upsampler(st, cond, fwgan_features);
297 for (subframe=0;subframe<NB_SUBFRAMES;subframe++) {
298 float *sub_cond;
299 sub_cond = &cond[subframe*BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4];
300 run_fwgan_subframe(st, &pcm[subframe*SUBFRAME_SIZE], sub_cond, w0, lpc, features[0]);
301 }
302 }
303
fwgan_synthesize(FWGANState * st,float * pcm,const float * features)304 void fwgan_synthesize(FWGANState *st, float *pcm, const float *features)
305 {
306 float lpc[LPC_ORDER];
307 float new_pcm[FWGAN_FRAME_SIZE];
308 compute_wlpc(lpc, features);
309 fwgan_synthesize_impl(st, new_pcm, lpc, features);
310 /* Handle buffering. */
311 OPUS_COPY(pcm, st->pcm_buf, FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
312 OPUS_COPY(&pcm[FWGAN_FRAME_SIZE-SUBFRAME_SIZE], new_pcm, SUBFRAME_SIZE);
313 OPUS_COPY(st->pcm_buf, &new_pcm[SUBFRAME_SIZE], FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
314 }
315
fwgan_synthesize_int(FWGANState * st,opus_int16 * pcm,const float * features)316 void fwgan_synthesize_int(FWGANState *st, opus_int16 *pcm, const float *features)
317 {
318 int i;
319 float fpcm[FWGAN_FRAME_SIZE];
320 fwgan_synthesize(st, fpcm, features);
321 for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
322 }
323