1 /* Copyright (c) 2017-2018 Mozilla */
2 /*
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6
7 - Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9
10 - Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
18 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #include <stdlib.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include <unistd.h>
35 #include "kiss_fft.h"
36 #include "common.h"
37 #include <math.h>
38 #include "freq.h"
39 #include "pitch.h"
40 #include "arch.h"
41 #include <assert.h>
42 #include "lpcnet.h"
43 #include "lpcnet_private.h"
44 #include "os_support.h"
45 #include "cpu_support.h"
46
47
biquad(float * y,float mem[2],const float * x,const float * b,const float * a,int N)48 static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
49 int i;
50 for (i=0;i<N;i++) {
51 float xi, yi;
52 xi = x[i];
53 yi = x[i] + mem[0];
54 mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
55 mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
56 y[i] = yi;
57 }
58 }
59
uni_rand(void)60 static float uni_rand(void) {
61 return rand()/(double)RAND_MAX-.5;
62 }
63
rand_resp(float * a,float * b)64 static void rand_resp(float *a, float *b) {
65 a[0] = .75*uni_rand();
66 a[1] = .75*uni_rand();
67 b[0] = .75*uni_rand();
68 b[1] = .75*uni_rand();
69 }
70
compute_noise(int * noise,float noise_std)71 void compute_noise(int *noise, float noise_std) {
72 int i;
73 for (i=0;i<FRAME_SIZE;i++) {
74 noise[i] = (int)floor(.5 + noise_std*.707*(log_approx(rand()/(float)RAND_MAX)-log_approx(rand()/(float)RAND_MAX)));
75 }
76 }
77
float2short(float x)78 static opus_int16 float2short(float x)
79 {
80 int i;
81 i = (int)floor(.5+x);
82 return IMAX(-32767, IMIN(32767, i));
83 }
84
85
write_audio(LPCNetEncState * st,const opus_int16 * pcm,const int * noise,FILE * file)86 void write_audio(LPCNetEncState *st, const opus_int16 *pcm, const int *noise, FILE *file) {
87 int i;
88 opus_int16 data[2*FRAME_SIZE];
89 for (i=0;i<FRAME_SIZE;i++) {
90 float p=0;
91 float e;
92 int j;
93 for (j=0;j<LPC_ORDER;j++) p -= st->features[NB_BANDS+2+j]*st->sig_mem[j];
94 e = lin2ulaw(pcm[i] - p);
95 /* Signal in. */
96 data[2*i] = float2short(st->sig_mem[0]);
97 /* Signal out. */
98 data[2*i+1] = pcm[i];
99 /* Simulate error on excitation. */
100 e += noise[i];
101 e = IMIN(255, IMAX(0, e));
102
103 OPUS_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
104 st->sig_mem[0] = p + ulaw2lin(e);
105 }
106 fwrite(data, 4*FRAME_SIZE, 1, file);
107 }
108
main(int argc,char ** argv)109 int main(int argc, char **argv) {
110 int i;
111 char *argv0;
112 int count=0;
113 static const float a_hp[2] = {-1.99599, 0.99600};
114 static const float b_hp[2] = {-2, 1};
115 float a_sig[2] = {0};
116 float b_sig[2] = {0};
117 float mem_hp_x[2]={0};
118 float mem_resp_x[2]={0};
119 float mem_preemph=0;
120 float x[FRAME_SIZE];
121 int gain_change_count=0;
122 FILE *f1;
123 FILE *ffeat;
124 FILE *fpcm=NULL;
125 opus_int16 pcm[FRAME_SIZE]={0};
126 int noisebuf[FRAME_SIZE]={0};
127 opus_int16 tmp[FRAME_SIZE] = {0};
128 float speech_gain=1;
129 float old_speech_gain = 1;
130 int one_pass_completed = 0;
131 LPCNetEncState *st;
132 float noise_std=0;
133 int training = -1;
134 int burg = 0;
135 int pitch = 0;
136 FILE *fnoise = NULL;
137 float noise_gain = 0;
138 long noise_size=0;
139 int arch;
140 srand(getpid());
141 arch = opus_select_arch();
142 st = lpcnet_encoder_create();
143 argv0=argv[0];
144 if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
145 burg = 1;
146 training = 1;
147 }
148 else if (argc == 4 && strcmp(argv[1], "-btest")==0) {
149 burg = 1;
150 training = 0;
151 }
152 else if (argc == 5 && strcmp(argv[1], "-ptrain")==0) {
153 pitch = 1;
154 training = 1;
155 fnoise = fopen(argv[2], "rb");
156 fseek(fnoise, 0, SEEK_END);
157 noise_size = ftell(fnoise);
158 fseek(fnoise, 0, SEEK_SET);
159 argv++;
160 }
161 else if (argc == 4 && strcmp(argv[1], "-ptest")==0) {
162 pitch = 1;
163 training = 0;
164 }
165 else if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
166 else if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
167 if (training == -1) {
168 fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv0);
169 fprintf(stderr, " or %s -test <speech> <features out>\n", argv0);
170 return 1;
171 }
172 f1 = fopen(argv[2], "r");
173 if (f1 == NULL) {
174 fprintf(stderr,"Error opening input .s16 16kHz speech input file: %s\n", argv[2]);
175 exit(1);
176 }
177 ffeat = fopen(argv[3], "wb");
178 if (ffeat == NULL) {
179 fprintf(stderr,"Error opening output feature file: %s\n", argv[3]);
180 exit(1);
181 }
182 if (training && !pitch) {
183 fpcm = fopen(argv[4], "wb");
184 if (fpcm == NULL) {
185 fprintf(stderr,"Error opening output PCM file: %s\n", argv[4]);
186 exit(1);
187 }
188 }
189 while (1) {
190 size_t ret;
191 ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
192 if (feof(f1) || ret != FRAME_SIZE) {
193 if (!training) break;
194 rewind(f1);
195 ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
196 if (ret != FRAME_SIZE) {
197 fprintf(stderr, "error reading\n");
198 exit(1);
199 }
200 one_pass_completed = 1;
201 }
202 for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
203 if (count*FRAME_SIZE_5MS>=10000000 && one_pass_completed) break;
204 if (training && ++gain_change_count > 2821) {
205 float tmp1, tmp2;
206 speech_gain = pow(10., (-30+(rand()%40))/20.);
207 if (rand()&1) speech_gain = -speech_gain;
208 if (rand()%20==0) speech_gain *= .01;
209 if (!pitch && rand()%100==0) speech_gain = 0;
210 gain_change_count = 0;
211 rand_resp(a_sig, b_sig);
212 tmp1 = rand()/(float)RAND_MAX;
213 tmp2 = rand()/(float)RAND_MAX;
214 noise_std = ABS16(-1.5*log(1e-4+tmp1)-.5*log(1e-4+tmp2));
215 if (fnoise != NULL) {
216 long pos;
217 /* Randomize the fraction because rand() only gives us 31 bits. */
218 float frac_pos = rand()/(float)RAND_MAX;
219 pos = (long)(frac_pos*noise_size);
220 /* 32-bit alignment. */
221 pos = pos/4 * 4;
222 if (pos > noise_size-500000) pos = noise_size-500000;
223 noise_gain = pow(10., (-15+(rand()%40))/20.);
224 if (rand()%10==0) noise_gain = 0;
225 fseek(fnoise, pos, SEEK_SET);
226 }
227 }
228 if (fnoise != NULL) {
229 opus_int16 noise[FRAME_SIZE];
230 ret = fread(noise, sizeof(opus_int16), FRAME_SIZE, fnoise);
231 for (i=0;i<FRAME_SIZE;i++) x[i] += noise[i]*noise_gain;
232 }
233 biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
234 biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
235 for (i=0;i<FRAME_SIZE;i++) {
236 float g;
237 float f = (float)i/FRAME_SIZE;
238 g = f*speech_gain + (1-f)*old_speech_gain;
239 x[i] *= g;
240 }
241 if (burg) {
242 float ceps[2*NB_BANDS];
243 burg_cepstral_analysis(ceps, x);
244 fwrite(ceps, sizeof(float), 2*NB_BANDS, ffeat);
245 }
246 preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
247 for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5f;
248 /* PCM is delayed by 1/2 frame to make the features centered on the frames. */
249 for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
250 compute_frame_features(st, x, arch);
251
252 if (fpcm) {
253 compute_noise(noisebuf, noise_std);
254 }
255
256 if (pitch) {
257 signed char pitch_features[PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES];
258 for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
259 pitch_features[i] = (int)floor(.5f + 127.f*st->xcorr_features[i]);
260 }
261 for (i=0;i<PITCH_IF_FEATURES;i++) {
262 pitch_features[i+PITCH_MAX_PERIOD-PITCH_MIN_PERIOD] = (int)floor(.5f + 127.f*st->if_features[i]);
263 }
264 fwrite(pitch_features, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES, 1, ffeat);
265 } else {
266 fwrite(st->features, sizeof(float), NB_TOTAL_FEATURES, ffeat);
267 }
268 /*if(pitch) fwrite(pcm, FRAME_SIZE, 2, stdout);*/
269 if (fpcm) write_audio(st, pcm, noisebuf, fpcm);
270 /*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/
271 for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
272 old_speech_gain = speech_gain;
273 count++;
274 }
275 fclose(f1);
276 fclose(ffeat);
277 if (fpcm) fclose(fpcm);
278 lpcnet_encoder_destroy(st);
279 return 0;
280 }
281