1 /* Copyright (C) 2003-2008 Jean-Marc Valin
2
3 File: mdf.c
4 Echo canceller based on the MDF algorithm (see below)
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are
8 met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
12
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 3. The name of the author may not be used to endorse or promote products
18 derived from this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 The echo canceller is based on the MDF algorithm described in:
35
36 J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
37 IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
38 February 1990.
39
40 We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
41 double-talk is achieved using a variable learning rate as described in:
42
43 Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
44 Cancellation With Double-Talk. IEEE Transactions on Audio,
45 Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
46 http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
47
48 There is no explicit double-talk detection, but a continuous variation
49 in the learning rate based on residual echo, double-talk and background
50 noise.
51
52 About the fixed-point version:
53 All the signals are represented with 16-bit words. The filter weights
54 are represented with 32-bit words, but only the top 16 bits are used
55 in most cases. The lower 16 bits are completely unreliable (due to the
56 fact that the update is done only on the top bits), but help in the
57 adaptation -- probably by removing a "threshold effect" due to
58 quantization (rounding going to zero) when the gradient is small.
59
60 Another kludge that seems to work good: when performing the weight
61 update, we only move half the way toward the "goal" this seems to
62 reduce the effect of quantization noise in the update phase. This
63 can be seen as applying a gradient descent on a "soft constraint"
64 instead of having a hard constraint.
65
66 */
67
68 #ifdef HAVE_CONFIG_H
69 #include "config.h"
70 #endif
71
72 #include "arch.h"
73 #include "speex/speex_echo.h"
74 #include "fftwrap.h"
75 #include "pseudofloat.h"
76 #include "math_approx.h"
77 #include "os_support.h"
78
79 #ifndef M_PI
80 #define M_PI 3.14159265358979323846
81 #endif
82
83 #ifdef FIXED_POINT
84 #define WEIGHT_SHIFT 11
85 #define NORMALIZE_SCALEDOWN 5
86 #define NORMALIZE_SCALEUP 3
87 #else
88 #define WEIGHT_SHIFT 0
89 #endif
90
91 /* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
92 and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
93 #define TWO_PATH
94
95 #ifdef FIXED_POINT
96 static const spx_float_t MIN_LEAK = {20972, -22};
97
98 /* Constants for the two-path filter */
99 static const spx_float_t VAR1_SMOOTH = {23593, -16};
100 static const spx_float_t VAR2_SMOOTH = {23675, -15};
101 static const spx_float_t VAR1_UPDATE = {16384, -15};
102 static const spx_float_t VAR2_UPDATE = {16384, -16};
103 static const spx_float_t VAR_BACKTRACK = {16384, -12};
104 #define TOP16(x) ((x)>>16)
105
106 #else
107
108 static const spx_float_t MIN_LEAK = .005f;
109
110 /* Constants for the two-path filter */
111 static const spx_float_t VAR1_SMOOTH = .36f;
112 static const spx_float_t VAR2_SMOOTH = .7225f;
113 static const spx_float_t VAR1_UPDATE = .5f;
114 static const spx_float_t VAR2_UPDATE = .25f;
115 static const spx_float_t VAR_BACKTRACK = 4.f;
116 #define TOP16(x) (x)
117 #endif
118
119
120 #define PLAYBACK_DELAY 2
121
122 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
123
124
125 /** Speex echo cancellation state. */
126 struct SpeexEchoState_ {
127 int frame_size; /**< Number of samples processed each time */
128 int window_size;
129 int M;
130 int cancel_count;
131 int adapted;
132 int saturated;
133 int screwed_up;
134 int C; /** Number of input channels (microphones) */
135 int K; /** Number of output channels (loudspeakers) */
136 spx_int32_t sampling_rate;
137 spx_word16_t spec_average;
138 spx_word16_t beta0;
139 spx_word16_t beta_max;
140 spx_word32_t sum_adapt;
141 spx_word16_t leak_estimate;
142
143 spx_word16_t *e; /* scratch */
144 spx_word16_t *x; /* Far-end input buffer (2N) */
145 spx_word16_t *X; /* Far-end buffer (M+1 frames) in frequency domain */
146 spx_word16_t *input; /* scratch */
147 spx_word16_t *y; /* scratch */
148 spx_word16_t *last_y;
149 spx_word16_t *Y; /* scratch */
150 spx_word16_t *E;
151 spx_word32_t *PHI; /* scratch */
152 spx_word32_t *W; /* (Background) filter weights */
153 #ifdef TWO_PATH
154 spx_word16_t *foreground; /* Foreground filter weights */
155 spx_word32_t Davg1; /* 1st recursive average of the residual power difference */
156 spx_word32_t Davg2; /* 2nd recursive average of the residual power difference */
157 spx_float_t Dvar1; /* Estimated variance of 1st estimator */
158 spx_float_t Dvar2; /* Estimated variance of 2nd estimator */
159 #endif
160 spx_word32_t *power; /* Power of the far-end signal */
161 spx_float_t *power_1;/* Inverse power of far-end */
162 spx_word16_t *wtmp; /* scratch */
163 #ifdef FIXED_POINT
164 spx_word16_t *wtmp2; /* scratch */
165 #endif
166 spx_word32_t *Rf; /* scratch */
167 spx_word32_t *Yf; /* scratch */
168 spx_word32_t *Xf; /* scratch */
169 spx_word32_t *Eh;
170 spx_word32_t *Yh;
171 spx_float_t Pey;
172 spx_float_t Pyy;
173 spx_word16_t *window;
174 spx_word16_t *prop;
175 void *fft_table;
176 spx_word16_t *memX, *memD, *memE;
177 spx_word16_t preemph;
178 spx_word16_t notch_radius;
179 spx_mem_t *notch_mem;
180
181 /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
182 spx_int16_t *play_buf;
183 int play_buf_pos;
184 int play_buf_started;
185 };
186
filter_dc_notch16(const spx_int16_t * in,spx_word16_t radius,spx_word16_t * out,int len,spx_mem_t * mem,int stride)187 static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
188 {
189 int i;
190 spx_word16_t den2;
191 #ifdef FIXED_POINT
192 den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
193 #else
194 den2 = radius*radius + .7*(1-radius)*(1-radius);
195 #endif
196 /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
197 for (i=0;i<len;i++)
198 {
199 spx_word16_t vin = in[i*stride];
200 spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
201 #ifdef FIXED_POINT
202 mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
203 #else
204 mem[0] = mem[1] + 2*(-vin + radius*vout);
205 #endif
206 mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
207 out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767);
208 }
209 }
210
211 /* This inner product is slightly different from the codec version because of fixed-point */
mdf_inner_prod(const spx_word16_t * x,const spx_word16_t * y,int len)212 static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
213 {
214 spx_word32_t sum=0;
215 len >>= 1;
216 while(len--)
217 {
218 spx_word32_t part=0;
219 part = MAC16_16(part,*x++,*y++);
220 part = MAC16_16(part,*x++,*y++);
221 /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
222 sum = ADD32(sum,SHR32(part,6));
223 }
224 return sum;
225 }
226
227 /** Compute power spectrum of a half-complex (packed) vector */
power_spectrum(const spx_word16_t * X,spx_word32_t * ps,int N)228 static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N)
229 {
230 int i, j;
231 ps[0]=MULT16_16(X[0],X[0]);
232 for (i=1,j=1;i<N-1;i+=2,j++)
233 {
234 ps[j] = MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
235 }
236 ps[j]=MULT16_16(X[i],X[i]);
237 }
238
239 /** Compute power spectrum of a half-complex (packed) vector and accumulate */
power_spectrum_accum(const spx_word16_t * X,spx_word32_t * ps,int N)240 static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N)
241 {
242 int i, j;
243 ps[0]+=MULT16_16(X[0],X[0]);
244 for (i=1,j=1;i<N-1;i+=2,j++)
245 {
246 ps[j] += MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
247 }
248 ps[j]+=MULT16_16(X[i],X[i]);
249 }
250
251 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
252 #ifdef FIXED_POINT
spectral_mul_accum(const spx_word16_t * X,const spx_word32_t * Y,spx_word16_t * acc,int N,int M)253 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
254 {
255 int i,j;
256 spx_word32_t tmp1=0,tmp2=0;
257 for (j=0;j<M;j++)
258 {
259 tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N]));
260 }
261 acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
262 for (i=1;i<N-1;i+=2)
263 {
264 tmp1 = tmp2 = 0;
265 for (j=0;j<M;j++)
266 {
267 tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1])));
268 tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1]));
269 }
270 acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
271 acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
272 }
273 tmp1 = tmp2 = 0;
274 for (j=0;j<M;j++)
275 {
276 tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1]));
277 }
278 acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
279 }
spectral_mul_accum16(const spx_word16_t * X,const spx_word16_t * Y,spx_word16_t * acc,int N,int M)280 static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M)
281 {
282 int i,j;
283 spx_word32_t tmp1=0,tmp2=0;
284 for (j=0;j<M;j++)
285 {
286 tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]);
287 }
288 acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
289 for (i=1;i<N-1;i+=2)
290 {
291 tmp1 = tmp2 = 0;
292 for (j=0;j<M;j++)
293 {
294 tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1]));
295 tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]);
296 }
297 acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
298 acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
299 }
300 tmp1 = tmp2 = 0;
301 for (j=0;j<M;j++)
302 {
303 tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]);
304 }
305 acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
306 }
307
308 #else
spectral_mul_accum(const spx_word16_t * X,const spx_word32_t * Y,spx_word16_t * acc,int N,int M)309 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
310 {
311 int i,j;
312 for (i=0;i<N;i++)
313 acc[i] = 0;
314 for (j=0;j<M;j++)
315 {
316 acc[0] += X[0]*Y[0];
317 for (i=1;i<N-1;i+=2)
318 {
319 acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
320 acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
321 }
322 acc[i] += X[i]*Y[i];
323 X += N;
324 Y += N;
325 }
326 }
327 #define spectral_mul_accum16 spectral_mul_accum
328 #endif
329
330 /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
weighted_spectral_mul_conj(const spx_float_t * w,const spx_float_t p,const spx_word16_t * X,const spx_word16_t * Y,spx_word32_t * prod,int N)331 static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N)
332 {
333 int i, j;
334 spx_float_t W;
335 W = FLOAT_AMULT(p, w[0]);
336 prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0]));
337 for (i=1,j=1;i<N-1;i+=2,j++)
338 {
339 W = FLOAT_AMULT(p, w[j]);
340 prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1]));
341 prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1]));
342 }
343 W = FLOAT_AMULT(p, w[j]);
344 prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
345 }
346
mdf_adjust_prop(const spx_word32_t * W,int N,int M,int P,spx_word16_t * prop)347 static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop)
348 {
349 int i, j, p;
350 spx_word16_t max_sum = 1;
351 spx_word32_t prop_sum = 1;
352 for (i=0;i<M;i++)
353 {
354 spx_word32_t tmp = 1;
355 for (p=0;p<P;p++)
356 for (j=0;j<N;j++)
357 tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18)));
358 #ifdef FIXED_POINT
359 /* Just a security in case an overflow were to occur */
360 tmp = MIN32(ABS32(tmp), 536870912);
361 #endif
362 prop[i] = spx_sqrt(tmp);
363 if (prop[i] > max_sum)
364 max_sum = prop[i];
365 }
366 for (i=0;i<M;i++)
367 {
368 prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum);
369 prop_sum += EXTEND32(prop[i]);
370 }
371 for (i=0;i<M;i++)
372 {
373 prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum);
374 /*printf ("%f ", prop[i]);*/
375 }
376 /*printf ("\n");*/
377 }
378
379 #ifdef DUMP_ECHO_CANCEL_DATA
380 #include <stdio.h>
381 static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL;
382
dump_audio(const spx_int16_t * rec,const spx_int16_t * play,const spx_int16_t * out,int len)383 static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len)
384 {
385 if (!(rFile && pFile && oFile))
386 {
387 speex_fatal("Dump files not open");
388 }
389 fwrite(rec, sizeof(spx_int16_t), len, rFile);
390 fwrite(play, sizeof(spx_int16_t), len, pFile);
391 fwrite(out, sizeof(spx_int16_t), len, oFile);
392 }
393 #endif
394
395 /** Creates a new echo canceller state */
speex_echo_state_init(int frame_size,int filter_length)396 EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
397 {
398 return speex_echo_state_init_mc(frame_size, filter_length, 1, 1);
399 }
400
speex_echo_state_init_mc(int frame_size,int filter_length,int nb_mic,int nb_speakers)401 EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
402 {
403 int i,N,M, C, K;
404 SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
405
406 st->K = nb_speakers;
407 st->C = nb_mic;
408 C=st->C;
409 K=st->K;
410 #ifdef DUMP_ECHO_CANCEL_DATA
411 if (rFile || pFile || oFile)
412 speex_fatal("Opening dump files twice");
413 rFile = fopen("aec_rec.sw", "wb");
414 pFile = fopen("aec_play.sw", "wb");
415 oFile = fopen("aec_out.sw", "wb");
416 #endif
417
418 st->frame_size = frame_size;
419 st->window_size = 2*frame_size;
420 N = st->window_size;
421 M = st->M = (filter_length+st->frame_size-1)/frame_size;
422 st->cancel_count=0;
423 st->sum_adapt = 0;
424 st->saturated = 0;
425 st->screwed_up = 0;
426 /* This is the default sampling rate */
427 st->sampling_rate = 8000;
428 st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
429 #ifdef FIXED_POINT
430 st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
431 st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
432 #else
433 st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
434 st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
435 #endif
436 st->leak_estimate = 0;
437
438 st->fft_table = spx_fft_init(N);
439
440 st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
441 st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t));
442 st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t));
443 st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
444 st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
445 st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
446 st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
447 st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
448 st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
449 st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
450
451 st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t));
452 st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
453 st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
454 st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t));
455 #ifdef TWO_PATH
456 st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t));
457 #endif
458 st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
459 st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
460 st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t));
461 st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
462 st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t));
463 st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
464 #ifdef FIXED_POINT
465 st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
466 for (i=0;i<N>>1;i++)
467 {
468 st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1));
469 st->window[N-i-1] = st->window[i];
470 }
471 #else
472 for (i=0;i<N;i++)
473 st->window[i] = .5-.5*cos(2*M_PI*i/N);
474 #endif
475 for (i=0;i<=st->frame_size;i++)
476 st->power_1[i] = FLOAT_ONE;
477 for (i=0;i<N*M*K*C;i++)
478 st->W[i] = 0;
479 {
480 spx_word32_t sum = 0;
481 /* Ratio of ~10 between adaptation rate of first and last block */
482 spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1);
483 st->prop[0] = QCONST16(.7, 15);
484 sum = EXTEND32(st->prop[0]);
485 for (i=1;i<M;i++)
486 {
487 st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay);
488 sum = ADD32(sum, EXTEND32(st->prop[i]));
489 }
490 for (i=M-1;i>=0;i--)
491 {
492 st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum);
493 }
494 }
495
496 st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t));
497 st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
498 st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
499 st->preemph = QCONST16(.9,15);
500 if (st->sampling_rate<12000)
501 st->notch_radius = QCONST16(.9, 15);
502 else if (st->sampling_rate<24000)
503 st->notch_radius = QCONST16(.982, 15);
504 else
505 st->notch_radius = QCONST16(.992, 15);
506
507 st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t));
508 st->adapted = 0;
509 st->Pey = st->Pyy = FLOAT_ONE;
510
511 #ifdef TWO_PATH
512 st->Davg1 = st->Davg2 = 0;
513 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
514 #endif
515
516 st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
517 st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
518 st->play_buf_started = 0;
519
520 return st;
521 }
522
523 /** Resets echo canceller state */
speex_echo_state_reset(SpeexEchoState * st)524 EXPORT void speex_echo_state_reset(SpeexEchoState *st)
525 {
526 int i, M, N, C, K;
527 st->cancel_count=0;
528 st->screwed_up = 0;
529 N = st->window_size;
530 M = st->M;
531 C=st->C;
532 K=st->K;
533 for (i=0;i<N*M;i++)
534 st->W[i] = 0;
535 #ifdef TWO_PATH
536 for (i=0;i<N*M;i++)
537 st->foreground[i] = 0;
538 #endif
539 for (i=0;i<N*(M+1);i++)
540 st->X[i] = 0;
541 for (i=0;i<=st->frame_size;i++)
542 {
543 st->power[i] = 0;
544 st->power_1[i] = FLOAT_ONE;
545 st->Eh[i] = 0;
546 st->Yh[i] = 0;
547 }
548 for (i=0;i<st->frame_size;i++)
549 {
550 st->last_y[i] = 0;
551 }
552 for (i=0;i<N*C;i++)
553 {
554 st->E[i] = 0;
555 }
556 for (i=0;i<N*K;i++)
557 {
558 st->x[i] = 0;
559 }
560 for (i=0;i<2*C;i++)
561 st->notch_mem[i] = 0;
562 for (i=0;i<C;i++)
563 st->memD[i]=st->memE[i]=0;
564 for (i=0;i<K;i++)
565 st->memX[i]=0;
566
567 st->saturated = 0;
568 st->adapted = 0;
569 st->sum_adapt = 0;
570 st->Pey = st->Pyy = FLOAT_ONE;
571 #ifdef TWO_PATH
572 st->Davg1 = st->Davg2 = 0;
573 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
574 #endif
575 for (i=0;i<3*st->frame_size;i++)
576 st->play_buf[i] = 0;
577 st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
578 st->play_buf_started = 0;
579
580 }
581
582 /** Destroys an echo canceller state */
speex_echo_state_destroy(SpeexEchoState * st)583 EXPORT void speex_echo_state_destroy(SpeexEchoState *st)
584 {
585 spx_fft_destroy(st->fft_table);
586
587 speex_free(st->e);
588 speex_free(st->x);
589 speex_free(st->input);
590 speex_free(st->y);
591 speex_free(st->last_y);
592 speex_free(st->Yf);
593 speex_free(st->Rf);
594 speex_free(st->Xf);
595 speex_free(st->Yh);
596 speex_free(st->Eh);
597
598 speex_free(st->X);
599 speex_free(st->Y);
600 speex_free(st->E);
601 speex_free(st->W);
602 #ifdef TWO_PATH
603 speex_free(st->foreground);
604 #endif
605 speex_free(st->PHI);
606 speex_free(st->power);
607 speex_free(st->power_1);
608 speex_free(st->window);
609 speex_free(st->prop);
610 speex_free(st->wtmp);
611 #ifdef FIXED_POINT
612 speex_free(st->wtmp2);
613 #endif
614 speex_free(st->memX);
615 speex_free(st->memD);
616 speex_free(st->memE);
617 speex_free(st->notch_mem);
618
619 speex_free(st->play_buf);
620 speex_free(st);
621
622 #ifdef DUMP_ECHO_CANCEL_DATA
623 fclose(rFile);
624 fclose(pFile);
625 fclose(oFile);
626 rFile = pFile = oFile = NULL;
627 #endif
628 }
629
speex_echo_capture(SpeexEchoState * st,const spx_int16_t * rec,spx_int16_t * out)630 EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
631 {
632 int i;
633 /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
634 st->play_buf_started = 1;
635 if (st->play_buf_pos>=st->frame_size)
636 {
637 speex_echo_cancellation(st, rec, st->play_buf, out);
638 st->play_buf_pos -= st->frame_size;
639 for (i=0;i<st->play_buf_pos;i++)
640 st->play_buf[i] = st->play_buf[i+st->frame_size];
641 } else {
642 speex_warning("No playback frame available (your application is buggy and/or got xruns)");
643 if (st->play_buf_pos!=0)
644 {
645 speex_warning("internal playback buffer corruption?");
646 st->play_buf_pos = 0;
647 }
648 for (i=0;i<st->frame_size;i++)
649 out[i] = rec[i];
650 }
651 }
652
speex_echo_playback(SpeexEchoState * st,const spx_int16_t * play)653 EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
654 {
655 /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
656 if (!st->play_buf_started)
657 {
658 speex_warning("discarded first playback frame");
659 return;
660 }
661 if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size)
662 {
663 int i;
664 for (i=0;i<st->frame_size;i++)
665 st->play_buf[st->play_buf_pos+i] = play[i];
666 st->play_buf_pos += st->frame_size;
667 if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size)
668 {
669 speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)");
670 for (i=0;i<st->frame_size;i++)
671 st->play_buf[st->play_buf_pos+i] = play[i];
672 st->play_buf_pos += st->frame_size;
673 }
674 } else {
675 speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)");
676 }
677 }
678
679 /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
speex_echo_cancel(SpeexEchoState * st,const spx_int16_t * in,const spx_int16_t * far_end,spx_int16_t * out,spx_int32_t * Yout)680 EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
681 {
682 speex_echo_cancellation(st, in, far_end, out);
683 }
684
685 /** Performs echo cancellation on a frame */
speex_echo_cancellation(SpeexEchoState * st,const spx_int16_t * in,const spx_int16_t * far_end,spx_int16_t * out)686 EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
687 {
688 int i,j, chan, speak;
689 int N,M, C, K;
690 spx_word32_t Syy,See,Sxx,Sdd, Sff;
691 #ifdef TWO_PATH
692 spx_word32_t Dbf;
693 int update_foreground;
694 #endif
695 spx_word32_t Sey;
696 spx_word16_t ss, ss_1;
697 spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE;
698 spx_float_t alpha, alpha_1;
699 spx_word16_t RER;
700 spx_word32_t tmp32;
701
702 N = st->window_size;
703 M = st->M;
704 C = st->C;
705 K = st->K;
706
707 st->cancel_count++;
708 #ifdef FIXED_POINT
709 ss=DIV32_16(11469,M);
710 ss_1 = SUB16(32767,ss);
711 #else
712 ss=.35/M;
713 ss_1 = 1-ss;
714 #endif
715
716 for (chan = 0; chan < C; chan++)
717 {
718 /* Apply a notch filter to make sure DC doesn't end up causing problems */
719 filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
720 /* Copy input data to buffer and apply pre-emphasis */
721 /* Copy input data to buffer */
722 for (i=0;i<st->frame_size;i++)
723 {
724 spx_word32_t tmp32;
725 /* FIXME: This core has changed a bit, need to merge properly */
726 tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
727 #ifdef FIXED_POINT
728 if (tmp32 > 32767)
729 {
730 tmp32 = 32767;
731 if (st->saturated == 0)
732 st->saturated = 1;
733 }
734 if (tmp32 < -32767)
735 {
736 tmp32 = -32767;
737 if (st->saturated == 0)
738 st->saturated = 1;
739 }
740 #endif
741 st->memD[chan] = st->input[chan*st->frame_size+i];
742 st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
743 }
744 }
745
746 for (speak = 0; speak < K; speak++)
747 {
748 for (i=0;i<st->frame_size;i++)
749 {
750 spx_word32_t tmp32;
751 st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
752 tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
753 #ifdef FIXED_POINT
754 /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
755 if (tmp32 > 32767)
756 {
757 tmp32 = 32767;
758 st->saturated = M+1;
759 }
760 if (tmp32 < -32767)
761 {
762 tmp32 = -32767;
763 st->saturated = M+1;
764 }
765 #endif
766 st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
767 st->memX[speak] = far_end[i*K+speak];
768 }
769 }
770
771 for (speak = 0; speak < K; speak++)
772 {
773 /* Shift memory: this could be optimized eventually*/
774 for (j=M-1;j>=0;j--)
775 {
776 for (i=0;i<N;i++)
777 st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
778 }
779 /* Convert x (echo input) to frequency domain */
780 spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
781 }
782
783 Sxx = 0;
784 for (speak = 0; speak < K; speak++)
785 {
786 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
787 power_spectrum_accum(st->X+speak*N, st->Xf, N);
788 }
789
790 Sff = 0;
791 for (chan = 0; chan < C; chan++)
792 {
793 #ifdef TWO_PATH
794 /* Compute foreground filter */
795 spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
796 spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
797 for (i=0;i<st->frame_size;i++)
798 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
799 Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
800 #endif
801 }
802
803 /* Adjust proportional adaption rate */
804 /* FIXME: Adjust that for C, K*/
805 if (st->adapted)
806 mdf_adjust_prop (st->W, N, M, C*K, st->prop);
807 /* Compute weight gradient */
808 if (st->saturated == 0)
809 {
810 for (chan = 0; chan < C; chan++)
811 {
812 for (speak = 0; speak < K; speak++)
813 {
814 for (j=M-1;j>=0;j--)
815 {
816 weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
817 for (i=0;i<N;i++)
818 st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
819 }
820 }
821 }
822 } else {
823 st->saturated--;
824 }
825
826 /* FIXME: MC conversion required */
827 /* Update weight to prevent circular convolution (MDF / AUMDF) */
828 for (chan = 0; chan < C; chan++)
829 {
830 for (speak = 0; speak < K; speak++)
831 {
832 for (j=0;j<M;j++)
833 {
834 /* This is a variant of the Alternatively Updated MDF (AUMDF) */
835 /* Remove the "if" to make this an MDF filter */
836 if (j==0 || st->cancel_count%(M-1) == j-1)
837 {
838 #ifdef FIXED_POINT
839 for (i=0;i<N;i++)
840 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
841 spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
842 for (i=0;i<st->frame_size;i++)
843 {
844 st->wtmp[i]=0;
845 }
846 for (i=st->frame_size;i<N;i++)
847 {
848 st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
849 }
850 spx_fft(st->fft_table, st->wtmp, st->wtmp2);
851 /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
852 for (i=0;i<N;i++)
853 st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
854 #else
855 spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
856 for (i=st->frame_size;i<N;i++)
857 {
858 st->wtmp[i]=0;
859 }
860 spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
861 #endif
862 }
863 }
864 }
865 }
866
867 /* So we can use power_spectrum_accum */
868 for (i=0;i<=st->frame_size;i++)
869 st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
870
871 Dbf = 0;
872 See = 0;
873 #ifdef TWO_PATH
874 /* Difference in response, this is used to estimate the variance of our residual power estimate */
875 for (chan = 0; chan < C; chan++)
876 {
877 spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
878 spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
879 for (i=0;i<st->frame_size;i++)
880 st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
881 Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
882 for (i=0;i<st->frame_size;i++)
883 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
884 See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
885 }
886 #endif
887
888 #ifndef TWO_PATH
889 Sff = See;
890 #endif
891
892 #ifdef TWO_PATH
893 /* Logic for updating the foreground filter */
894
895 /* For two time windows, compute the mean of the energy difference, as well as the variance */
896 st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
897 st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
898 st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
899 st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
900
901 /* Equivalent float code:
902 st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
903 st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
904 st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
905 st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
906 */
907
908 update_foreground = 0;
909 /* Check if we have a statistically significant reduction in the residual echo */
910 /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
911 if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf)))
912 update_foreground = 1;
913 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1))))
914 update_foreground = 1;
915 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
916 update_foreground = 1;
917
918 /* Do we update? */
919 if (update_foreground)
920 {
921 st->Davg1 = st->Davg2 = 0;
922 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
923 /* Copy background filter to foreground filter */
924 for (i=0;i<N*M*C*K;i++)
925 st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
926 /* Apply a smooth transition so as to not introduce blocking artifacts */
927 for (chan = 0; chan < C; chan++)
928 for (i=0;i<st->frame_size;i++)
929 st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
930 } else {
931 int reset_background=0;
932 /* Otherwise, check if the background filter is significantly worse */
933 if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf))))
934 reset_background = 1;
935 if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1)))
936 reset_background = 1;
937 if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2)))
938 reset_background = 1;
939 if (reset_background)
940 {
941 /* Copy foreground filter to background filter */
942 for (i=0;i<N*M*C*K;i++)
943 st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
944 /* We also need to copy the output so as to get correct adaptation */
945 for (chan = 0; chan < C; chan++)
946 {
947 for (i=0;i<st->frame_size;i++)
948 st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
949 for (i=0;i<st->frame_size;i++)
950 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
951 }
952 See = Sff;
953 st->Davg1 = st->Davg2 = 0;
954 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
955 }
956 }
957 #endif
958
959 Sey = Syy = Sdd = 0;
960 for (chan = 0; chan < C; chan++)
961 {
962 /* Compute error signal (for the output with de-emphasis) */
963 for (i=0;i<st->frame_size;i++)
964 {
965 spx_word32_t tmp_out;
966 #ifdef TWO_PATH
967 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
968 #else
969 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
970 #endif
971 tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
972 /* This is an arbitrary test for saturation in the microphone signal */
973 if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
974 {
975 if (st->saturated == 0)
976 st->saturated = 1;
977 }
978 out[i*C+chan] = WORD2INT(tmp_out);
979 st->memE[chan] = tmp_out;
980 }
981
982 #ifdef DUMP_ECHO_CANCEL_DATA
983 dump_audio(in, far_end, out, st->frame_size);
984 #endif
985
986 /* Compute error signal (filter update version) */
987 for (i=0;i<st->frame_size;i++)
988 {
989 st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
990 st->e[chan*N+i] = 0;
991 }
992
993 /* Compute a bunch of correlations */
994 /* FIXME: bad merge */
995 Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
996 Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
997 Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
998
999 /* Convert error to frequency domain */
1000 spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
1001 for (i=0;i<st->frame_size;i++)
1002 st->y[i+chan*N] = 0;
1003 spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
1004
1005 /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
1006 power_spectrum_accum(st->E+chan*N, st->Rf, N);
1007 power_spectrum_accum(st->Y+chan*N, st->Yf, N);
1008
1009 }
1010
1011 /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
1012
1013 /* Do some sanity check */
1014 if (!(Syy>=0 && Sxx>=0 && See >= 0)
1015 #ifndef FIXED_POINT
1016 || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9)
1017 #endif
1018 )
1019 {
1020 /* Things have gone really bad */
1021 st->screwed_up += 50;
1022 for (i=0;i<st->frame_size*C;i++)
1023 out[i] = 0;
1024 } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
1025 {
1026 /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */
1027 st->screwed_up++;
1028 } else {
1029 /* Everything's fine */
1030 st->screwed_up=0;
1031 }
1032 if (st->screwed_up>=50)
1033 {
1034 speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
1035 speex_echo_state_reset(st);
1036 return;
1037 }
1038
1039 /* Add a small noise floor to make sure not to have problems when dividing */
1040 See = MAX32(See, SHR32(MULT16_16(N, 100),6));
1041
1042 for (speak = 0; speak < K; speak++)
1043 {
1044 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
1045 power_spectrum_accum(st->X+speak*N, st->Xf, N);
1046 }
1047
1048
1049 /* Smooth far end energy estimate over time */
1050 for (j=0;j<=st->frame_size;j++)
1051 st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
1052
1053 /* Compute filtered spectra and (cross-)correlations */
1054 for (j=st->frame_size;j>=0;j--)
1055 {
1056 spx_float_t Eh, Yh;
1057 Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
1058 Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
1059 Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
1060 Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
1061 #ifdef FIXED_POINT
1062 st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]);
1063 st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]);
1064 #else
1065 st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j];
1066 st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
1067 #endif
1068 }
1069
1070 Pyy = FLOAT_SQRT(Pyy);
1071 Pey = FLOAT_DIVU(Pey,Pyy);
1072
1073 /* Compute correlation updatete rate */
1074 tmp32 = MULT16_32_Q15(st->beta0,Syy);
1075 if (tmp32 > MULT16_32_Q15(st->beta_max,See))
1076 tmp32 = MULT16_32_Q15(st->beta_max,See);
1077 alpha = FLOAT_DIV32(tmp32, See);
1078 alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
1079 /* Update correlations (recursive average) */
1080 st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey));
1081 st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy));
1082 if (FLOAT_LT(st->Pyy, FLOAT_ONE))
1083 st->Pyy = FLOAT_ONE;
1084 /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
1085 if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy)))
1086 st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy);
1087 if (FLOAT_GT(st->Pey, st->Pyy))
1088 st->Pey = st->Pyy;
1089 /* leak_estimate is the linear regression result */
1090 st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
1091 /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
1092 if (st->leak_estimate > 16383)
1093 st->leak_estimate = 32767;
1094 else
1095 st->leak_estimate = SHL16(st->leak_estimate,1);
1096 /*printf ("%f\n", st->leak_estimate);*/
1097
1098 /* Compute Residual to Error Ratio */
1099 #ifdef FIXED_POINT
1100 tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
1101 tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
1102 /* Check for y in e (lower bound on RER) */
1103 {
1104 spx_float_t bound = PSEUDOFLOAT(Sey);
1105 bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
1106 if (FLOAT_GT(bound, PSEUDOFLOAT(See)))
1107 tmp32 = See;
1108 else if (tmp32 < FLOAT_EXTRACT32(bound))
1109 tmp32 = FLOAT_EXTRACT32(bound);
1110 }
1111 if (tmp32 > SHR32(See,1))
1112 tmp32 = SHR32(See,1);
1113 RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
1114 #else
1115 RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See;
1116 /* Check for y in e (lower bound on RER) */
1117 if (RER < Sey*Sey/(1+See*Syy))
1118 RER = Sey*Sey/(1+See*Syy);
1119 if (RER > .5)
1120 RER = .5;
1121 #endif
1122
1123 /* We consider that the filter has had minimal adaptation if the following is true*/
1124 if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy))
1125 {
1126 st->adapted = 1;
1127 }
1128
1129 if (st->adapted)
1130 {
1131 /* Normal learning rate calculation once we're past the minimal adaptation phase */
1132 for (i=0;i<=st->frame_size;i++)
1133 {
1134 spx_word32_t r, e;
1135 /* Compute frequency-domain adaptation mask */
1136 r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3));
1137 e = SHL32(st->Rf[i],3)+1;
1138 #ifdef FIXED_POINT
1139 if (r>SHR32(e,1))
1140 r = SHR32(e,1);
1141 #else
1142 if (r>.5*e)
1143 r = .5*e;
1144 #endif
1145 r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1146 /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
1147 st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
1148 }
1149 } else {
1150 /* Temporary adaption rate if filter is not yet adapted enough */
1151 spx_word16_t adapt_rate=0;
1152
1153 if (Sxx > SHR32(MULT16_16(N, 1000),6))
1154 {
1155 tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
1156 #ifdef FIXED_POINT
1157 if (tmp32 > SHR32(See,2))
1158 tmp32 = SHR32(See,2);
1159 #else
1160 if (tmp32 > .25*See)
1161 tmp32 = .25*See;
1162 #endif
1163 adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
1164 }
1165 for (i=0;i<=st->frame_size;i++)
1166 st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);
1167
1168
1169 /* How much have we adapted so far? */
1170 st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
1171 }
1172
1173 /* FIXME: MC conversion required */
1174 for (i=0;i<st->frame_size;i++)
1175 st->last_y[i] = st->last_y[st->frame_size+i];
1176 if (st->adapted)
1177 {
1178 /* If the filter is adapted, take the filtered echo */
1179 for (i=0;i<st->frame_size;i++)
1180 st->last_y[st->frame_size+i] = in[i]-out[i];
1181 } else {
1182 /* If filter isn't adapted yet, all we can do is take the far end signal directly */
1183 /* moved earlier: for (i=0;i<N;i++)
1184 st->last_y[i] = st->x[i];*/
1185 }
1186
1187 }
1188
1189 /* Compute spectrum of estimated echo for use in an echo post-filter */
speex_echo_get_residual(SpeexEchoState * st,spx_word32_t * residual_echo,int len)1190 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len)
1191 {
1192 int i;
1193 spx_word16_t leak2;
1194 int N;
1195
1196 N = st->window_size;
1197
1198 /* Apply hanning window (should pre-compute it)*/
1199 for (i=0;i<N;i++)
1200 st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
1201
1202 /* Compute power spectrum of the echo */
1203 spx_fft(st->fft_table, st->y, st->Y);
1204 power_spectrum(st->Y, residual_echo, N);
1205
1206 #ifdef FIXED_POINT
1207 if (st->leak_estimate > 16383)
1208 leak2 = 32767;
1209 else
1210 leak2 = SHL16(st->leak_estimate, 1);
1211 #else
1212 if (st->leak_estimate>.5)
1213 leak2 = 1;
1214 else
1215 leak2 = 2*st->leak_estimate;
1216 #endif
1217 /* Estimate residual echo */
1218 for (i=0;i<=st->frame_size;i++)
1219 residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
1220
1221 }
1222
speex_echo_ctl(SpeexEchoState * st,int request,void * ptr)1223 EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
1224 {
1225 switch(request)
1226 {
1227
1228 case SPEEX_ECHO_GET_FRAME_SIZE:
1229 (*(int*)ptr) = st->frame_size;
1230 break;
1231 case SPEEX_ECHO_SET_SAMPLING_RATE:
1232 st->sampling_rate = (*(int*)ptr);
1233 st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
1234 #ifdef FIXED_POINT
1235 st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
1236 st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
1237 #else
1238 st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
1239 st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
1240 #endif
1241 if (st->sampling_rate<12000)
1242 st->notch_radius = QCONST16(.9, 15);
1243 else if (st->sampling_rate<24000)
1244 st->notch_radius = QCONST16(.982, 15);
1245 else
1246 st->notch_radius = QCONST16(.992, 15);
1247 break;
1248 case SPEEX_ECHO_GET_SAMPLING_RATE:
1249 (*(int*)ptr) = st->sampling_rate;
1250 break;
1251 case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE:
1252 /*FIXME: Implement this for multiple channels */
1253 *((spx_int32_t *)ptr) = st->M * st->frame_size;
1254 break;
1255 case SPEEX_ECHO_GET_IMPULSE_RESPONSE:
1256 {
1257 int M = st->M, N = st->window_size, n = st->frame_size, i, j;
1258 spx_int32_t *filt = (spx_int32_t *) ptr;
1259 for(j=0;j<M;j++)
1260 {
1261 /*FIXME: Implement this for multiple channels */
1262 #ifdef FIXED_POINT
1263 for (i=0;i<N;i++)
1264 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN));
1265 spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
1266 #else
1267 spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
1268 #endif
1269 for(i=0;i<n;i++)
1270 filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN);
1271 }
1272 }
1273 break;
1274 default:
1275 speex_warning_int("Unknown speex_echo_ctl request: ", request);
1276 return -1;
1277 }
1278 return 0;
1279 }
1280