xref: /aosp_15_r20/external/libopus/dnn/dump_data.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1 /* Copyright (c) 2017-2018 Mozilla */
2 /*
3    Redistribution and use in source and binary forms, with or without
4    modification, are permitted provided that the following conditions
5    are met:
6 
7    - Redistributions of source code must retain the above copyright
8    notice, this list of conditions and the following disclaimer.
9 
10    - Redistributions in binary form must reproduce the above copyright
11    notice, this list of conditions and the following disclaimer in the
12    documentation and/or other materials provided with the distribution.
13 
14    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
18    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30 
31 #include <stdlib.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include <unistd.h>
35 #include "kiss_fft.h"
36 #include "common.h"
37 #include <math.h>
38 #include "freq.h"
39 #include "pitch.h"
40 #include "arch.h"
41 #include <assert.h>
42 #include "lpcnet.h"
43 #include "lpcnet_private.h"
44 #include "os_support.h"
45 #include "cpu_support.h"
46 
47 
biquad(float * y,float mem[2],const float * x,const float * b,const float * a,int N)48 static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
49   int i;
50   for (i=0;i<N;i++) {
51     float xi, yi;
52     xi = x[i];
53     yi = x[i] + mem[0];
54     mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
55     mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
56     y[i] = yi;
57   }
58 }
59 
uni_rand(void)60 static float uni_rand(void) {
61   return rand()/(double)RAND_MAX-.5;
62 }
63 
rand_resp(float * a,float * b)64 static void rand_resp(float *a, float *b) {
65   a[0] = .75*uni_rand();
66   a[1] = .75*uni_rand();
67   b[0] = .75*uni_rand();
68   b[1] = .75*uni_rand();
69 }
70 
compute_noise(int * noise,float noise_std)71 void compute_noise(int *noise, float noise_std) {
72   int i;
73   for (i=0;i<FRAME_SIZE;i++) {
74     noise[i] = (int)floor(.5 + noise_std*.707*(log_approx(rand()/(float)RAND_MAX)-log_approx(rand()/(float)RAND_MAX)));
75   }
76 }
77 
float2short(float x)78 static opus_int16 float2short(float x)
79 {
80   int i;
81   i = (int)floor(.5+x);
82   return IMAX(-32767, IMIN(32767, i));
83 }
84 
85 
write_audio(LPCNetEncState * st,const opus_int16 * pcm,const int * noise,FILE * file)86 void write_audio(LPCNetEncState *st, const opus_int16 *pcm, const int *noise, FILE *file) {
87   int i;
88   opus_int16 data[2*FRAME_SIZE];
89   for (i=0;i<FRAME_SIZE;i++) {
90     float p=0;
91     float e;
92     int j;
93     for (j=0;j<LPC_ORDER;j++) p -= st->features[NB_BANDS+2+j]*st->sig_mem[j];
94     e = lin2ulaw(pcm[i] - p);
95     /* Signal in. */
96     data[2*i] = float2short(st->sig_mem[0]);
97     /* Signal out. */
98     data[2*i+1] = pcm[i];
99     /* Simulate error on excitation. */
100     e += noise[i];
101     e = IMIN(255, IMAX(0, e));
102 
103     OPUS_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
104     st->sig_mem[0] = p + ulaw2lin(e);
105   }
106   fwrite(data, 4*FRAME_SIZE, 1, file);
107 }
108 
main(int argc,char ** argv)109 int main(int argc, char **argv) {
110   int i;
111   char *argv0;
112   int count=0;
113   static const float a_hp[2] = {-1.99599, 0.99600};
114   static const float b_hp[2] = {-2, 1};
115   float a_sig[2] = {0};
116   float b_sig[2] = {0};
117   float mem_hp_x[2]={0};
118   float mem_resp_x[2]={0};
119   float mem_preemph=0;
120   float x[FRAME_SIZE];
121   int gain_change_count=0;
122   FILE *f1;
123   FILE *ffeat;
124   FILE *fpcm=NULL;
125   opus_int16 pcm[FRAME_SIZE]={0};
126   int noisebuf[FRAME_SIZE]={0};
127   opus_int16 tmp[FRAME_SIZE] = {0};
128   float speech_gain=1;
129   float old_speech_gain = 1;
130   int one_pass_completed = 0;
131   LPCNetEncState *st;
132   float noise_std=0;
133   int training = -1;
134   int burg = 0;
135   int pitch = 0;
136   FILE *fnoise = NULL;
137   float noise_gain = 0;
138   long noise_size=0;
139   int arch;
140   srand(getpid());
141   arch = opus_select_arch();
142   st = lpcnet_encoder_create();
143   argv0=argv[0];
144   if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
145       burg = 1;
146       training = 1;
147   }
148   else if (argc == 4 && strcmp(argv[1], "-btest")==0) {
149       burg = 1;
150       training = 0;
151   }
152   else if (argc == 5 && strcmp(argv[1], "-ptrain")==0) {
153       pitch = 1;
154       training = 1;
155       fnoise = fopen(argv[2], "rb");
156       fseek(fnoise, 0, SEEK_END);
157       noise_size = ftell(fnoise);
158       fseek(fnoise, 0, SEEK_SET);
159       argv++;
160   }
161   else if (argc == 4 && strcmp(argv[1], "-ptest")==0) {
162       pitch = 1;
163       training = 0;
164   }
165   else if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
166   else if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
167   if (training == -1) {
168     fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv0);
169     fprintf(stderr, "  or   %s -test <speech> <features out>\n", argv0);
170     return 1;
171   }
172   f1 = fopen(argv[2], "r");
173   if (f1 == NULL) {
174     fprintf(stderr,"Error opening input .s16 16kHz speech input file: %s\n", argv[2]);
175     exit(1);
176   }
177   ffeat = fopen(argv[3], "wb");
178   if (ffeat == NULL) {
179     fprintf(stderr,"Error opening output feature file: %s\n", argv[3]);
180     exit(1);
181   }
182   if (training && !pitch) {
183     fpcm = fopen(argv[4], "wb");
184     if (fpcm == NULL) {
185       fprintf(stderr,"Error opening output PCM file: %s\n", argv[4]);
186       exit(1);
187     }
188   }
189   while (1) {
190     size_t ret;
191     ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
192     if (feof(f1) || ret != FRAME_SIZE) {
193       if (!training) break;
194       rewind(f1);
195       ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
196       if (ret != FRAME_SIZE) {
197         fprintf(stderr, "error reading\n");
198         exit(1);
199       }
200       one_pass_completed = 1;
201     }
202     for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
203     if (count*FRAME_SIZE_5MS>=10000000 && one_pass_completed) break;
204     if (training && ++gain_change_count > 2821) {
205       float tmp1, tmp2;
206       speech_gain = pow(10., (-30+(rand()%40))/20.);
207       if (rand()&1) speech_gain = -speech_gain;
208       if (rand()%20==0) speech_gain *= .01;
209       if (!pitch && rand()%100==0) speech_gain = 0;
210       gain_change_count = 0;
211       rand_resp(a_sig, b_sig);
212       tmp1 = rand()/(float)RAND_MAX;
213       tmp2 = rand()/(float)RAND_MAX;
214       noise_std = ABS16(-1.5*log(1e-4+tmp1)-.5*log(1e-4+tmp2));
215       if (fnoise != NULL) {
216         long pos;
217         /* Randomize the fraction because rand() only gives us 31 bits. */
218         float frac_pos = rand()/(float)RAND_MAX;
219         pos = (long)(frac_pos*noise_size);
220         /* 32-bit alignment. */
221         pos = pos/4 * 4;
222         if (pos > noise_size-500000) pos = noise_size-500000;
223         noise_gain = pow(10., (-15+(rand()%40))/20.);
224         if (rand()%10==0) noise_gain = 0;
225         fseek(fnoise, pos, SEEK_SET);
226       }
227     }
228     if (fnoise != NULL) {
229       opus_int16 noise[FRAME_SIZE];
230       ret = fread(noise, sizeof(opus_int16), FRAME_SIZE, fnoise);
231       for (i=0;i<FRAME_SIZE;i++) x[i] += noise[i]*noise_gain;
232     }
233     biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
234     biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
235     for (i=0;i<FRAME_SIZE;i++) {
236       float g;
237       float f = (float)i/FRAME_SIZE;
238       g = f*speech_gain + (1-f)*old_speech_gain;
239       x[i] *= g;
240     }
241     if (burg) {
242       float ceps[2*NB_BANDS];
243       burg_cepstral_analysis(ceps, x);
244       fwrite(ceps, sizeof(float), 2*NB_BANDS, ffeat);
245     }
246     preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
247     for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5f;
248     /* PCM is delayed by 1/2 frame to make the features centered on the frames. */
249     for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
250     compute_frame_features(st, x, arch);
251 
252     if (fpcm) {
253         compute_noise(noisebuf, noise_std);
254     }
255 
256     if (pitch) {
257       signed char pitch_features[PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES];
258       for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
259         pitch_features[i] = (int)floor(.5f + 127.f*st->xcorr_features[i]);
260       }
261       for (i=0;i<PITCH_IF_FEATURES;i++) {
262         pitch_features[i+PITCH_MAX_PERIOD-PITCH_MIN_PERIOD] = (int)floor(.5f + 127.f*st->if_features[i]);
263       }
264       fwrite(pitch_features, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES, 1, ffeat);
265     } else {
266       fwrite(st->features, sizeof(float), NB_TOTAL_FEATURES, ffeat);
267     }
268     /*if(pitch) fwrite(pcm, FRAME_SIZE, 2, stdout);*/
269     if (fpcm) write_audio(st, pcm, noisebuf, fpcm);
270     /*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/
271     for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
272     old_speech_gain = speech_gain;
273     count++;
274   }
275   fclose(f1);
276   fclose(ffeat);
277   if (fpcm) fclose(fpcm);
278   lpcnet_encoder_destroy(st);
279   return 0;
280 }
281