xref: /aosp_15_r20/external/speex/libspeexdsp/mdf.c (revision 28e138c64d234588b5cd2a8a403b584bd3036e4e)
1 /* Copyright (C) 2003-2008 Jean-Marc Valin
2 
3    File: mdf.c
4    Echo canceller based on the MDF algorithm (see below)
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions are
8    met:
9 
10    1. Redistributions of source code must retain the above copyright notice,
11    this list of conditions and the following disclaimer.
12 
13    2. Redistributions in binary form must reproduce the above copyright
14    notice, this list of conditions and the following disclaimer in the
15    documentation and/or other materials provided with the distribution.
16 
17    3. The name of the author may not be used to endorse or promote products
18    derived from this software without specific prior written permission.
19 
20    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30    POSSIBILITY OF SUCH DAMAGE.
31 */
32 
33 /*
34    The echo canceller is based on the MDF algorithm described in:
35 
36    J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
37    IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
38    February 1990.
39 
40    We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
41    double-talk is achieved using a variable learning rate as described in:
42 
43    Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
44    Cancellation With Double-Talk. IEEE Transactions on Audio,
45    Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
46    http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
47 
48    There is no explicit double-talk detection, but a continuous variation
49    in the learning rate based on residual echo, double-talk and background
50    noise.
51 
52    About the fixed-point version:
53    All the signals are represented with 16-bit words. The filter weights
54    are represented with 32-bit words, but only the top 16 bits are used
55    in most cases. The lower 16 bits are completely unreliable (due to the
56    fact that the update is done only on the top bits), but help in the
57    adaptation -- probably by removing a "threshold effect" due to
58    quantization (rounding going to zero) when the gradient is small.
59 
60    Another kludge that seems to work good: when performing the weight
61    update, we only move half the way toward the "goal" this seems to
62    reduce the effect of quantization noise in the update phase. This
63    can be seen as applying a gradient descent on a "soft constraint"
64    instead of having a hard constraint.
65 
66 */
67 
68 #ifdef HAVE_CONFIG_H
69 #include "config.h"
70 #endif
71 
72 #include "arch.h"
73 #include "speex/speex_echo.h"
74 #include "fftwrap.h"
75 #include "pseudofloat.h"
76 #include "math_approx.h"
77 #include "os_support.h"
78 
79 #ifndef M_PI
80 #define M_PI 3.14159265358979323846
81 #endif
82 
83 #ifdef FIXED_POINT
84 #define WEIGHT_SHIFT 11
85 #define NORMALIZE_SCALEDOWN 5
86 #define NORMALIZE_SCALEUP 3
87 #else
88 #define WEIGHT_SHIFT 0
89 #endif
90 
91 /* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
92    and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
93 #define TWO_PATH
94 
95 #ifdef FIXED_POINT
96 static const spx_float_t MIN_LEAK = {20972, -22};
97 
98 /* Constants for the two-path filter */
99 static const spx_float_t VAR1_SMOOTH = {23593, -16};
100 static const spx_float_t VAR2_SMOOTH = {23675, -15};
101 static const spx_float_t VAR1_UPDATE = {16384, -15};
102 static const spx_float_t VAR2_UPDATE = {16384, -16};
103 static const spx_float_t VAR_BACKTRACK = {16384, -12};
104 #define TOP16(x) ((x)>>16)
105 
106 #else
107 
108 static const spx_float_t MIN_LEAK = .005f;
109 
110 /* Constants for the two-path filter */
111 static const spx_float_t VAR1_SMOOTH = .36f;
112 static const spx_float_t VAR2_SMOOTH = .7225f;
113 static const spx_float_t VAR1_UPDATE = .5f;
114 static const spx_float_t VAR2_UPDATE = .25f;
115 static const spx_float_t VAR_BACKTRACK = 4.f;
116 #define TOP16(x) (x)
117 #endif
118 
119 
120 #define PLAYBACK_DELAY 2
121 
122 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
123 
124 
125 /** Speex echo cancellation state. */
126 struct SpeexEchoState_ {
127    int frame_size;           /**< Number of samples processed each time */
128    int window_size;
129    int M;
130    int cancel_count;
131    int adapted;
132    int saturated;
133    int screwed_up;
134    int C;                    /** Number of input channels (microphones) */
135    int K;                    /** Number of output channels (loudspeakers) */
136    spx_int32_t sampling_rate;
137    spx_word16_t spec_average;
138    spx_word16_t beta0;
139    spx_word16_t beta_max;
140    spx_word32_t sum_adapt;
141    spx_word16_t leak_estimate;
142 
143    spx_word16_t *e;      /* scratch */
144    spx_word16_t *x;      /* Far-end input buffer (2N) */
145    spx_word16_t *X;      /* Far-end buffer (M+1 frames) in frequency domain */
146    spx_word16_t *input;  /* scratch */
147    spx_word16_t *y;      /* scratch */
148    spx_word16_t *last_y;
149    spx_word16_t *Y;      /* scratch */
150    spx_word16_t *E;
151    spx_word32_t *PHI;    /* scratch */
152    spx_word32_t *W;      /* (Background) filter weights */
153 #ifdef TWO_PATH
154    spx_word16_t *foreground; /* Foreground filter weights */
155    spx_word32_t  Davg1;  /* 1st recursive average of the residual power difference */
156    spx_word32_t  Davg2;  /* 2nd recursive average of the residual power difference */
157    spx_float_t   Dvar1;  /* Estimated variance of 1st estimator */
158    spx_float_t   Dvar2;  /* Estimated variance of 2nd estimator */
159 #endif
160    spx_word32_t *power;  /* Power of the far-end signal */
161    spx_float_t  *power_1;/* Inverse power of far-end */
162    spx_word16_t *wtmp;   /* scratch */
163 #ifdef FIXED_POINT
164    spx_word16_t *wtmp2;  /* scratch */
165 #endif
166    spx_word32_t *Rf;     /* scratch */
167    spx_word32_t *Yf;     /* scratch */
168    spx_word32_t *Xf;     /* scratch */
169    spx_word32_t *Eh;
170    spx_word32_t *Yh;
171    spx_float_t   Pey;
172    spx_float_t   Pyy;
173    spx_word16_t *window;
174    spx_word16_t *prop;
175    void *fft_table;
176    spx_word16_t *memX, *memD, *memE;
177    spx_word16_t preemph;
178    spx_word16_t notch_radius;
179    spx_mem_t *notch_mem;
180 
181    /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
182    spx_int16_t *play_buf;
183    int play_buf_pos;
184    int play_buf_started;
185 };
186 
filter_dc_notch16(const spx_int16_t * in,spx_word16_t radius,spx_word16_t * out,int len,spx_mem_t * mem,int stride)187 static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
188 {
189    int i;
190    spx_word16_t den2;
191 #ifdef FIXED_POINT
192    den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
193 #else
194    den2 = radius*radius + .7*(1-radius)*(1-radius);
195 #endif
196    /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
197    for (i=0;i<len;i++)
198    {
199       spx_word16_t vin = in[i*stride];
200       spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
201 #ifdef FIXED_POINT
202       mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
203 #else
204       mem[0] = mem[1] + 2*(-vin + radius*vout);
205 #endif
206       mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
207       out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767);
208    }
209 }
210 
211 /* This inner product is slightly different from the codec version because of fixed-point */
mdf_inner_prod(const spx_word16_t * x,const spx_word16_t * y,int len)212 static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
213 {
214    spx_word32_t sum=0;
215    len >>= 1;
216    while(len--)
217    {
218       spx_word32_t part=0;
219       part = MAC16_16(part,*x++,*y++);
220       part = MAC16_16(part,*x++,*y++);
221       /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
222       sum = ADD32(sum,SHR32(part,6));
223    }
224    return sum;
225 }
226 
227 /** Compute power spectrum of a half-complex (packed) vector */
power_spectrum(const spx_word16_t * X,spx_word32_t * ps,int N)228 static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N)
229 {
230    int i, j;
231    ps[0]=MULT16_16(X[0],X[0]);
232    for (i=1,j=1;i<N-1;i+=2,j++)
233    {
234       ps[j] =  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
235    }
236    ps[j]=MULT16_16(X[i],X[i]);
237 }
238 
239 /** Compute power spectrum of a half-complex (packed) vector and accumulate */
power_spectrum_accum(const spx_word16_t * X,spx_word32_t * ps,int N)240 static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N)
241 {
242    int i, j;
243    ps[0]+=MULT16_16(X[0],X[0]);
244    for (i=1,j=1;i<N-1;i+=2,j++)
245    {
246       ps[j] +=  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
247    }
248    ps[j]+=MULT16_16(X[i],X[i]);
249 }
250 
251 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
252 #ifdef FIXED_POINT
spectral_mul_accum(const spx_word16_t * X,const spx_word32_t * Y,spx_word16_t * acc,int N,int M)253 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
254 {
255    int i,j;
256    spx_word32_t tmp1=0,tmp2=0;
257    for (j=0;j<M;j++)
258    {
259       tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N]));
260    }
261    acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
262    for (i=1;i<N-1;i+=2)
263    {
264       tmp1 = tmp2 = 0;
265       for (j=0;j<M;j++)
266       {
267          tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1])));
268          tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1]));
269       }
270       acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
271       acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
272    }
273    tmp1 = tmp2 = 0;
274    for (j=0;j<M;j++)
275    {
276       tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1]));
277    }
278    acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
279 }
spectral_mul_accum16(const spx_word16_t * X,const spx_word16_t * Y,spx_word16_t * acc,int N,int M)280 static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M)
281 {
282    int i,j;
283    spx_word32_t tmp1=0,tmp2=0;
284    for (j=0;j<M;j++)
285    {
286       tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]);
287    }
288    acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
289    for (i=1;i<N-1;i+=2)
290    {
291       tmp1 = tmp2 = 0;
292       for (j=0;j<M;j++)
293       {
294          tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1]));
295          tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]);
296       }
297       acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
298       acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
299    }
300    tmp1 = tmp2 = 0;
301    for (j=0;j<M;j++)
302    {
303       tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]);
304    }
305    acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
306 }
307 
308 #else
spectral_mul_accum(const spx_word16_t * X,const spx_word32_t * Y,spx_word16_t * acc,int N,int M)309 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
310 {
311    int i,j;
312    for (i=0;i<N;i++)
313       acc[i] = 0;
314    for (j=0;j<M;j++)
315    {
316       acc[0] += X[0]*Y[0];
317       for (i=1;i<N-1;i+=2)
318       {
319          acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
320          acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
321       }
322       acc[i] += X[i]*Y[i];
323       X += N;
324       Y += N;
325    }
326 }
327 #define spectral_mul_accum16 spectral_mul_accum
328 #endif
329 
330 /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
weighted_spectral_mul_conj(const spx_float_t * w,const spx_float_t p,const spx_word16_t * X,const spx_word16_t * Y,spx_word32_t * prod,int N)331 static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N)
332 {
333    int i, j;
334    spx_float_t W;
335    W = FLOAT_AMULT(p, w[0]);
336    prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0]));
337    for (i=1,j=1;i<N-1;i+=2,j++)
338    {
339       W = FLOAT_AMULT(p, w[j]);
340       prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1]));
341       prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1]));
342    }
343    W = FLOAT_AMULT(p, w[j]);
344    prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
345 }
346 
mdf_adjust_prop(const spx_word32_t * W,int N,int M,int P,spx_word16_t * prop)347 static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop)
348 {
349    int i, j, p;
350    spx_word16_t max_sum = 1;
351    spx_word32_t prop_sum = 1;
352    for (i=0;i<M;i++)
353    {
354       spx_word32_t tmp = 1;
355       for (p=0;p<P;p++)
356          for (j=0;j<N;j++)
357             tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18)));
358 #ifdef FIXED_POINT
359       /* Just a security in case an overflow were to occur */
360       tmp = MIN32(ABS32(tmp), 536870912);
361 #endif
362       prop[i] = spx_sqrt(tmp);
363       if (prop[i] > max_sum)
364          max_sum = prop[i];
365    }
366    for (i=0;i<M;i++)
367    {
368       prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum);
369       prop_sum += EXTEND32(prop[i]);
370    }
371    for (i=0;i<M;i++)
372    {
373       prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum);
374       /*printf ("%f ", prop[i]);*/
375    }
376    /*printf ("\n");*/
377 }
378 
379 #ifdef DUMP_ECHO_CANCEL_DATA
380 #include <stdio.h>
381 static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL;
382 
dump_audio(const spx_int16_t * rec,const spx_int16_t * play,const spx_int16_t * out,int len)383 static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len)
384 {
385    if (!(rFile && pFile && oFile))
386    {
387       speex_fatal("Dump files not open");
388    }
389    fwrite(rec, sizeof(spx_int16_t), len, rFile);
390    fwrite(play, sizeof(spx_int16_t), len, pFile);
391    fwrite(out, sizeof(spx_int16_t), len, oFile);
392 }
393 #endif
394 
395 /** Creates a new echo canceller state */
speex_echo_state_init(int frame_size,int filter_length)396 EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
397 {
398    return speex_echo_state_init_mc(frame_size, filter_length, 1, 1);
399 }
400 
speex_echo_state_init_mc(int frame_size,int filter_length,int nb_mic,int nb_speakers)401 EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
402 {
403    int i,N,M, C, K;
404    SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
405 
406    st->K = nb_speakers;
407    st->C = nb_mic;
408    C=st->C;
409    K=st->K;
410 #ifdef DUMP_ECHO_CANCEL_DATA
411    if (rFile || pFile || oFile)
412       speex_fatal("Opening dump files twice");
413    rFile = fopen("aec_rec.sw", "wb");
414    pFile = fopen("aec_play.sw", "wb");
415    oFile = fopen("aec_out.sw", "wb");
416 #endif
417 
418    st->frame_size = frame_size;
419    st->window_size = 2*frame_size;
420    N = st->window_size;
421    M = st->M = (filter_length+st->frame_size-1)/frame_size;
422    st->cancel_count=0;
423    st->sum_adapt = 0;
424    st->saturated = 0;
425    st->screwed_up = 0;
426    /* This is the default sampling rate */
427    st->sampling_rate = 8000;
428    st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
429 #ifdef FIXED_POINT
430    st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
431    st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
432 #else
433    st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
434    st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
435 #endif
436    st->leak_estimate = 0;
437 
438    st->fft_table = spx_fft_init(N);
439 
440    st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
441    st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t));
442    st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t));
443    st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
444    st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
445    st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
446    st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
447    st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
448    st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
449    st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
450 
451    st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t));
452    st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
453    st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
454    st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t));
455 #ifdef TWO_PATH
456    st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t));
457 #endif
458    st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
459    st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
460    st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t));
461    st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
462    st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t));
463    st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
464 #ifdef FIXED_POINT
465    st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
466    for (i=0;i<N>>1;i++)
467    {
468       st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1));
469       st->window[N-i-1] = st->window[i];
470    }
471 #else
472    for (i=0;i<N;i++)
473       st->window[i] = .5-.5*cos(2*M_PI*i/N);
474 #endif
475    for (i=0;i<=st->frame_size;i++)
476       st->power_1[i] = FLOAT_ONE;
477    for (i=0;i<N*M*K*C;i++)
478       st->W[i] = 0;
479    {
480       spx_word32_t sum = 0;
481       /* Ratio of ~10 between adaptation rate of first and last block */
482       spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1);
483       st->prop[0] = QCONST16(.7, 15);
484       sum = EXTEND32(st->prop[0]);
485       for (i=1;i<M;i++)
486       {
487          st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay);
488          sum = ADD32(sum, EXTEND32(st->prop[i]));
489       }
490       for (i=M-1;i>=0;i--)
491       {
492          st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum);
493       }
494    }
495 
496    st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t));
497    st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
498    st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
499    st->preemph = QCONST16(.9,15);
500    if (st->sampling_rate<12000)
501       st->notch_radius = QCONST16(.9, 15);
502    else if (st->sampling_rate<24000)
503       st->notch_radius = QCONST16(.982, 15);
504    else
505       st->notch_radius = QCONST16(.992, 15);
506 
507    st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t));
508    st->adapted = 0;
509    st->Pey = st->Pyy = FLOAT_ONE;
510 
511 #ifdef TWO_PATH
512    st->Davg1 = st->Davg2 = 0;
513    st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
514 #endif
515 
516    st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
517    st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
518    st->play_buf_started = 0;
519 
520    return st;
521 }
522 
523 /** Resets echo canceller state */
speex_echo_state_reset(SpeexEchoState * st)524 EXPORT void speex_echo_state_reset(SpeexEchoState *st)
525 {
526    int i, M, N, C, K;
527    st->cancel_count=0;
528    st->screwed_up = 0;
529    N = st->window_size;
530    M = st->M;
531    C=st->C;
532    K=st->K;
533    for (i=0;i<N*M;i++)
534       st->W[i] = 0;
535 #ifdef TWO_PATH
536    for (i=0;i<N*M;i++)
537       st->foreground[i] = 0;
538 #endif
539    for (i=0;i<N*(M+1);i++)
540       st->X[i] = 0;
541    for (i=0;i<=st->frame_size;i++)
542    {
543       st->power[i] = 0;
544       st->power_1[i] = FLOAT_ONE;
545       st->Eh[i] = 0;
546       st->Yh[i] = 0;
547    }
548    for (i=0;i<st->frame_size;i++)
549    {
550       st->last_y[i] = 0;
551    }
552    for (i=0;i<N*C;i++)
553    {
554       st->E[i] = 0;
555    }
556    for (i=0;i<N*K;i++)
557    {
558       st->x[i] = 0;
559    }
560    for (i=0;i<2*C;i++)
561       st->notch_mem[i] = 0;
562    for (i=0;i<C;i++)
563       st->memD[i]=st->memE[i]=0;
564    for (i=0;i<K;i++)
565       st->memX[i]=0;
566 
567    st->saturated = 0;
568    st->adapted = 0;
569    st->sum_adapt = 0;
570    st->Pey = st->Pyy = FLOAT_ONE;
571 #ifdef TWO_PATH
572    st->Davg1 = st->Davg2 = 0;
573    st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
574 #endif
575    for (i=0;i<3*st->frame_size;i++)
576       st->play_buf[i] = 0;
577    st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
578    st->play_buf_started = 0;
579 
580 }
581 
582 /** Destroys an echo canceller state */
speex_echo_state_destroy(SpeexEchoState * st)583 EXPORT void speex_echo_state_destroy(SpeexEchoState *st)
584 {
585    spx_fft_destroy(st->fft_table);
586 
587    speex_free(st->e);
588    speex_free(st->x);
589    speex_free(st->input);
590    speex_free(st->y);
591    speex_free(st->last_y);
592    speex_free(st->Yf);
593    speex_free(st->Rf);
594    speex_free(st->Xf);
595    speex_free(st->Yh);
596    speex_free(st->Eh);
597 
598    speex_free(st->X);
599    speex_free(st->Y);
600    speex_free(st->E);
601    speex_free(st->W);
602 #ifdef TWO_PATH
603    speex_free(st->foreground);
604 #endif
605    speex_free(st->PHI);
606    speex_free(st->power);
607    speex_free(st->power_1);
608    speex_free(st->window);
609    speex_free(st->prop);
610    speex_free(st->wtmp);
611 #ifdef FIXED_POINT
612    speex_free(st->wtmp2);
613 #endif
614    speex_free(st->memX);
615    speex_free(st->memD);
616    speex_free(st->memE);
617    speex_free(st->notch_mem);
618 
619    speex_free(st->play_buf);
620    speex_free(st);
621 
622 #ifdef DUMP_ECHO_CANCEL_DATA
623    fclose(rFile);
624    fclose(pFile);
625    fclose(oFile);
626    rFile = pFile = oFile = NULL;
627 #endif
628 }
629 
speex_echo_capture(SpeexEchoState * st,const spx_int16_t * rec,spx_int16_t * out)630 EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
631 {
632    int i;
633    /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
634    st->play_buf_started = 1;
635    if (st->play_buf_pos>=st->frame_size)
636    {
637       speex_echo_cancellation(st, rec, st->play_buf, out);
638       st->play_buf_pos -= st->frame_size;
639       for (i=0;i<st->play_buf_pos;i++)
640          st->play_buf[i] = st->play_buf[i+st->frame_size];
641    } else {
642       speex_warning("No playback frame available (your application is buggy and/or got xruns)");
643       if (st->play_buf_pos!=0)
644       {
645          speex_warning("internal playback buffer corruption?");
646          st->play_buf_pos = 0;
647       }
648       for (i=0;i<st->frame_size;i++)
649          out[i] = rec[i];
650    }
651 }
652 
speex_echo_playback(SpeexEchoState * st,const spx_int16_t * play)653 EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
654 {
655    /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
656    if (!st->play_buf_started)
657    {
658       speex_warning("discarded first playback frame");
659       return;
660    }
661    if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size)
662    {
663       int i;
664       for (i=0;i<st->frame_size;i++)
665          st->play_buf[st->play_buf_pos+i] = play[i];
666       st->play_buf_pos += st->frame_size;
667       if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size)
668       {
669          speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)");
670          for (i=0;i<st->frame_size;i++)
671             st->play_buf[st->play_buf_pos+i] = play[i];
672          st->play_buf_pos += st->frame_size;
673       }
674    } else {
675       speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)");
676    }
677 }
678 
679 /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
speex_echo_cancel(SpeexEchoState * st,const spx_int16_t * in,const spx_int16_t * far_end,spx_int16_t * out,spx_int32_t * Yout)680 EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
681 {
682    speex_echo_cancellation(st, in, far_end, out);
683 }
684 
685 /** Performs echo cancellation on a frame */
speex_echo_cancellation(SpeexEchoState * st,const spx_int16_t * in,const spx_int16_t * far_end,spx_int16_t * out)686 EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
687 {
688    int i,j, chan, speak;
689    int N,M, C, K;
690    spx_word32_t Syy,See,Sxx,Sdd, Sff;
691 #ifdef TWO_PATH
692    spx_word32_t Dbf;
693    int update_foreground;
694 #endif
695    spx_word32_t Sey;
696    spx_word16_t ss, ss_1;
697    spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE;
698    spx_float_t alpha, alpha_1;
699    spx_word16_t RER;
700    spx_word32_t tmp32;
701 
702    N = st->window_size;
703    M = st->M;
704    C = st->C;
705    K = st->K;
706 
707    st->cancel_count++;
708 #ifdef FIXED_POINT
709    ss=DIV32_16(11469,M);
710    ss_1 = SUB16(32767,ss);
711 #else
712    ss=.35/M;
713    ss_1 = 1-ss;
714 #endif
715 
716    for (chan = 0; chan < C; chan++)
717    {
718       /* Apply a notch filter to make sure DC doesn't end up causing problems */
719       filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
720       /* Copy input data to buffer and apply pre-emphasis */
721       /* Copy input data to buffer */
722       for (i=0;i<st->frame_size;i++)
723       {
724          spx_word32_t tmp32;
725          /* FIXME: This core has changed a bit, need to merge properly */
726          tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
727 #ifdef FIXED_POINT
728          if (tmp32 > 32767)
729          {
730             tmp32 = 32767;
731             if (st->saturated == 0)
732                st->saturated = 1;
733          }
734          if (tmp32 < -32767)
735          {
736             tmp32 = -32767;
737             if (st->saturated == 0)
738                st->saturated = 1;
739          }
740 #endif
741          st->memD[chan] = st->input[chan*st->frame_size+i];
742          st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
743       }
744    }
745 
746    for (speak = 0; speak < K; speak++)
747    {
748       for (i=0;i<st->frame_size;i++)
749       {
750          spx_word32_t tmp32;
751          st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
752          tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
753 #ifdef FIXED_POINT
754          /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
755          if (tmp32 > 32767)
756          {
757             tmp32 = 32767;
758             st->saturated = M+1;
759          }
760          if (tmp32 < -32767)
761          {
762             tmp32 = -32767;
763             st->saturated = M+1;
764          }
765 #endif
766          st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
767          st->memX[speak] = far_end[i*K+speak];
768       }
769    }
770 
771    for (speak = 0; speak < K; speak++)
772    {
773       /* Shift memory: this could be optimized eventually*/
774       for (j=M-1;j>=0;j--)
775       {
776          for (i=0;i<N;i++)
777             st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
778       }
779       /* Convert x (echo input) to frequency domain */
780       spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
781    }
782 
783    Sxx = 0;
784    for (speak = 0; speak < K; speak++)
785    {
786       Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
787       power_spectrum_accum(st->X+speak*N, st->Xf, N);
788    }
789 
790    Sff = 0;
791    for (chan = 0; chan < C; chan++)
792    {
793 #ifdef TWO_PATH
794       /* Compute foreground filter */
795       spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
796       spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
797       for (i=0;i<st->frame_size;i++)
798          st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
799       Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
800 #endif
801    }
802 
803    /* Adjust proportional adaption rate */
804    /* FIXME: Adjust that for C, K*/
805    if (st->adapted)
806       mdf_adjust_prop (st->W, N, M, C*K, st->prop);
807    /* Compute weight gradient */
808    if (st->saturated == 0)
809    {
810       for (chan = 0; chan < C; chan++)
811       {
812          for (speak = 0; speak < K; speak++)
813          {
814             for (j=M-1;j>=0;j--)
815             {
816                weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
817                for (i=0;i<N;i++)
818                   st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
819             }
820          }
821       }
822    } else {
823       st->saturated--;
824    }
825 
826    /* FIXME: MC conversion required */
827    /* Update weight to prevent circular convolution (MDF / AUMDF) */
828    for (chan = 0; chan < C; chan++)
829    {
830       for (speak = 0; speak < K; speak++)
831       {
832          for (j=0;j<M;j++)
833          {
834             /* This is a variant of the Alternatively Updated MDF (AUMDF) */
835             /* Remove the "if" to make this an MDF filter */
836             if (j==0 || st->cancel_count%(M-1) == j-1)
837             {
838 #ifdef FIXED_POINT
839                for (i=0;i<N;i++)
840                   st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
841                spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
842                for (i=0;i<st->frame_size;i++)
843                {
844                   st->wtmp[i]=0;
845                }
846                for (i=st->frame_size;i<N;i++)
847                {
848                   st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
849                }
850                spx_fft(st->fft_table, st->wtmp, st->wtmp2);
851                /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
852                for (i=0;i<N;i++)
853                   st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
854 #else
855                spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
856                for (i=st->frame_size;i<N;i++)
857                {
858                   st->wtmp[i]=0;
859                }
860                spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
861 #endif
862             }
863          }
864       }
865    }
866 
867    /* So we can use power_spectrum_accum */
868    for (i=0;i<=st->frame_size;i++)
869       st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
870 
871    Dbf = 0;
872    See = 0;
873 #ifdef TWO_PATH
874    /* Difference in response, this is used to estimate the variance of our residual power estimate */
875    for (chan = 0; chan < C; chan++)
876    {
877       spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
878       spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
879       for (i=0;i<st->frame_size;i++)
880          st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
881       Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
882       for (i=0;i<st->frame_size;i++)
883          st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
884       See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
885    }
886 #endif
887 
888 #ifndef TWO_PATH
889    Sff = See;
890 #endif
891 
892 #ifdef TWO_PATH
893    /* Logic for updating the foreground filter */
894 
895    /* For two time windows, compute the mean of the energy difference, as well as the variance */
896    st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
897    st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
898    st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
899    st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
900 
901    /* Equivalent float code:
902    st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
903    st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
904    st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
905    st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
906    */
907 
908    update_foreground = 0;
909    /* Check if we have a statistically significant reduction in the residual echo */
910    /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
911    if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf)))
912       update_foreground = 1;
913    else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1))))
914       update_foreground = 1;
915    else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
916       update_foreground = 1;
917 
918    /* Do we update? */
919    if (update_foreground)
920    {
921       st->Davg1 = st->Davg2 = 0;
922       st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
923       /* Copy background filter to foreground filter */
924       for (i=0;i<N*M*C*K;i++)
925          st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
926       /* Apply a smooth transition so as to not introduce blocking artifacts */
927       for (chan = 0; chan < C; chan++)
928          for (i=0;i<st->frame_size;i++)
929             st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
930    } else {
931       int reset_background=0;
932       /* Otherwise, check if the background filter is significantly worse */
933       if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf))))
934          reset_background = 1;
935       if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1)))
936          reset_background = 1;
937       if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2)))
938          reset_background = 1;
939       if (reset_background)
940       {
941          /* Copy foreground filter to background filter */
942          for (i=0;i<N*M*C*K;i++)
943             st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
944          /* We also need to copy the output so as to get correct adaptation */
945          for (chan = 0; chan < C; chan++)
946          {
947             for (i=0;i<st->frame_size;i++)
948                st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
949             for (i=0;i<st->frame_size;i++)
950                st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
951          }
952          See = Sff;
953          st->Davg1 = st->Davg2 = 0;
954          st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
955       }
956    }
957 #endif
958 
959    Sey = Syy = Sdd = 0;
960    for (chan = 0; chan < C; chan++)
961    {
962       /* Compute error signal (for the output with de-emphasis) */
963       for (i=0;i<st->frame_size;i++)
964       {
965          spx_word32_t tmp_out;
966 #ifdef TWO_PATH
967          tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
968 #else
969          tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
970 #endif
971          tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
972       /* This is an arbitrary test for saturation in the microphone signal */
973          if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
974          {
975          if (st->saturated == 0)
976             st->saturated = 1;
977          }
978          out[i*C+chan] = WORD2INT(tmp_out);
979          st->memE[chan] = tmp_out;
980       }
981 
982 #ifdef DUMP_ECHO_CANCEL_DATA
983       dump_audio(in, far_end, out, st->frame_size);
984 #endif
985 
986       /* Compute error signal (filter update version) */
987       for (i=0;i<st->frame_size;i++)
988       {
989          st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
990          st->e[chan*N+i] = 0;
991       }
992 
993       /* Compute a bunch of correlations */
994       /* FIXME: bad merge */
995       Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
996       Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
997       Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
998 
999       /* Convert error to frequency domain */
1000       spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
1001       for (i=0;i<st->frame_size;i++)
1002          st->y[i+chan*N] = 0;
1003       spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
1004 
1005       /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
1006       power_spectrum_accum(st->E+chan*N, st->Rf, N);
1007       power_spectrum_accum(st->Y+chan*N, st->Yf, N);
1008 
1009    }
1010 
1011    /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
1012 
1013    /* Do some sanity check */
1014    if (!(Syy>=0 && Sxx>=0 && See >= 0)
1015 #ifndef FIXED_POINT
1016        || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9)
1017 #endif
1018       )
1019    {
1020       /* Things have gone really bad */
1021       st->screwed_up += 50;
1022       for (i=0;i<st->frame_size*C;i++)
1023          out[i] = 0;
1024    } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
1025    {
1026       /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */
1027       st->screwed_up++;
1028    } else {
1029       /* Everything's fine */
1030       st->screwed_up=0;
1031    }
1032    if (st->screwed_up>=50)
1033    {
1034       speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
1035       speex_echo_state_reset(st);
1036       return;
1037    }
1038 
1039    /* Add a small noise floor to make sure not to have problems when dividing */
1040    See = MAX32(See, SHR32(MULT16_16(N, 100),6));
1041 
1042    for (speak = 0; speak < K; speak++)
1043    {
1044       Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
1045       power_spectrum_accum(st->X+speak*N, st->Xf, N);
1046    }
1047 
1048 
1049    /* Smooth far end energy estimate over time */
1050    for (j=0;j<=st->frame_size;j++)
1051       st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
1052 
1053    /* Compute filtered spectra and (cross-)correlations */
1054    for (j=st->frame_size;j>=0;j--)
1055    {
1056       spx_float_t Eh, Yh;
1057       Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
1058       Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
1059       Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
1060       Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
1061 #ifdef FIXED_POINT
1062       st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]);
1063       st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]);
1064 #else
1065       st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j];
1066       st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
1067 #endif
1068    }
1069 
1070    Pyy = FLOAT_SQRT(Pyy);
1071    Pey = FLOAT_DIVU(Pey,Pyy);
1072 
1073    /* Compute correlation updatete rate */
1074    tmp32 = MULT16_32_Q15(st->beta0,Syy);
1075    if (tmp32 > MULT16_32_Q15(st->beta_max,See))
1076       tmp32 = MULT16_32_Q15(st->beta_max,See);
1077    alpha = FLOAT_DIV32(tmp32, See);
1078    alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
1079    /* Update correlations (recursive average) */
1080    st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey));
1081    st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy));
1082    if (FLOAT_LT(st->Pyy, FLOAT_ONE))
1083       st->Pyy = FLOAT_ONE;
1084    /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
1085    if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy)))
1086       st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy);
1087    if (FLOAT_GT(st->Pey, st->Pyy))
1088       st->Pey = st->Pyy;
1089    /* leak_estimate is the linear regression result */
1090    st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
1091    /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
1092    if (st->leak_estimate > 16383)
1093       st->leak_estimate = 32767;
1094    else
1095       st->leak_estimate = SHL16(st->leak_estimate,1);
1096    /*printf ("%f\n", st->leak_estimate);*/
1097 
1098    /* Compute Residual to Error Ratio */
1099 #ifdef FIXED_POINT
1100    tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
1101    tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
1102    /* Check for y in e (lower bound on RER) */
1103    {
1104       spx_float_t bound = PSEUDOFLOAT(Sey);
1105       bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
1106       if (FLOAT_GT(bound, PSEUDOFLOAT(See)))
1107          tmp32 = See;
1108       else if (tmp32 < FLOAT_EXTRACT32(bound))
1109          tmp32 = FLOAT_EXTRACT32(bound);
1110    }
1111    if (tmp32 > SHR32(See,1))
1112       tmp32 = SHR32(See,1);
1113    RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
1114 #else
1115    RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See;
1116    /* Check for y in e (lower bound on RER) */
1117    if (RER < Sey*Sey/(1+See*Syy))
1118       RER = Sey*Sey/(1+See*Syy);
1119    if (RER > .5)
1120       RER = .5;
1121 #endif
1122 
1123    /* We consider that the filter has had minimal adaptation if the following is true*/
1124    if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy))
1125    {
1126       st->adapted = 1;
1127    }
1128 
1129    if (st->adapted)
1130    {
1131       /* Normal learning rate calculation once we're past the minimal adaptation phase */
1132       for (i=0;i<=st->frame_size;i++)
1133       {
1134          spx_word32_t r, e;
1135          /* Compute frequency-domain adaptation mask */
1136          r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3));
1137          e = SHL32(st->Rf[i],3)+1;
1138 #ifdef FIXED_POINT
1139          if (r>SHR32(e,1))
1140             r = SHR32(e,1);
1141 #else
1142          if (r>.5*e)
1143             r = .5*e;
1144 #endif
1145          r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1146          /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
1147          st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
1148       }
1149    } else {
1150       /* Temporary adaption rate if filter is not yet adapted enough */
1151       spx_word16_t adapt_rate=0;
1152 
1153       if (Sxx > SHR32(MULT16_16(N, 1000),6))
1154       {
1155          tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
1156 #ifdef FIXED_POINT
1157          if (tmp32 > SHR32(See,2))
1158             tmp32 = SHR32(See,2);
1159 #else
1160          if (tmp32 > .25*See)
1161             tmp32 = .25*See;
1162 #endif
1163          adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
1164       }
1165       for (i=0;i<=st->frame_size;i++)
1166          st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);
1167 
1168 
1169       /* How much have we adapted so far? */
1170       st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
1171    }
1172 
1173    /* FIXME: MC conversion required */
1174       for (i=0;i<st->frame_size;i++)
1175          st->last_y[i] = st->last_y[st->frame_size+i];
1176    if (st->adapted)
1177    {
1178       /* If the filter is adapted, take the filtered echo */
1179       for (i=0;i<st->frame_size;i++)
1180          st->last_y[st->frame_size+i] = in[i]-out[i];
1181    } else {
1182       /* If filter isn't adapted yet, all we can do is take the far end signal directly */
1183       /* moved earlier: for (i=0;i<N;i++)
1184       st->last_y[i] = st->x[i];*/
1185    }
1186 
1187 }
1188 
1189 /* Compute spectrum of estimated echo for use in an echo post-filter */
speex_echo_get_residual(SpeexEchoState * st,spx_word32_t * residual_echo,int len)1190 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len)
1191 {
1192    int i;
1193    spx_word16_t leak2;
1194    int N;
1195 
1196    N = st->window_size;
1197 
1198    /* Apply hanning window (should pre-compute it)*/
1199    for (i=0;i<N;i++)
1200       st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
1201 
1202    /* Compute power spectrum of the echo */
1203    spx_fft(st->fft_table, st->y, st->Y);
1204    power_spectrum(st->Y, residual_echo, N);
1205 
1206 #ifdef FIXED_POINT
1207    if (st->leak_estimate > 16383)
1208       leak2 = 32767;
1209    else
1210       leak2 = SHL16(st->leak_estimate, 1);
1211 #else
1212    if (st->leak_estimate>.5)
1213       leak2 = 1;
1214    else
1215       leak2 = 2*st->leak_estimate;
1216 #endif
1217    /* Estimate residual echo */
1218    for (i=0;i<=st->frame_size;i++)
1219       residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
1220 
1221 }
1222 
speex_echo_ctl(SpeexEchoState * st,int request,void * ptr)1223 EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
1224 {
1225    switch(request)
1226    {
1227 
1228       case SPEEX_ECHO_GET_FRAME_SIZE:
1229          (*(int*)ptr) = st->frame_size;
1230          break;
1231       case SPEEX_ECHO_SET_SAMPLING_RATE:
1232          st->sampling_rate = (*(int*)ptr);
1233          st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
1234 #ifdef FIXED_POINT
1235          st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
1236          st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
1237 #else
1238          st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
1239          st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
1240 #endif
1241          if (st->sampling_rate<12000)
1242             st->notch_radius = QCONST16(.9, 15);
1243          else if (st->sampling_rate<24000)
1244             st->notch_radius = QCONST16(.982, 15);
1245          else
1246             st->notch_radius = QCONST16(.992, 15);
1247          break;
1248       case SPEEX_ECHO_GET_SAMPLING_RATE:
1249          (*(int*)ptr) = st->sampling_rate;
1250          break;
1251       case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE:
1252          /*FIXME: Implement this for multiple channels */
1253          *((spx_int32_t *)ptr) = st->M * st->frame_size;
1254          break;
1255       case SPEEX_ECHO_GET_IMPULSE_RESPONSE:
1256       {
1257          int M = st->M, N = st->window_size, n = st->frame_size, i, j;
1258          spx_int32_t *filt = (spx_int32_t *) ptr;
1259          for(j=0;j<M;j++)
1260          {
1261             /*FIXME: Implement this for multiple channels */
1262 #ifdef FIXED_POINT
1263             for (i=0;i<N;i++)
1264                st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN));
1265             spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
1266 #else
1267             spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
1268 #endif
1269             for(i=0;i<n;i++)
1270                filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN);
1271          }
1272       }
1273          break;
1274       default:
1275          speex_warning_int("Unknown speex_echo_ctl request: ", request);
1276          return -1;
1277    }
1278    return 0;
1279 }
1280