1 /* Copyright (C) 2007 Hong Zhiqian */
2 /**
3 @file mdf_tm.h
4 @author Hong Zhiqian
5 @brief Various compatibility routines for Speex (TriMedia version)
6 */
7 /*
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14
15 - Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions and the following disclaimer in the
17 documentation and/or other materials provided with the distribution.
18
19 - Neither the name of the Xiph.org Foundation nor the names of its
20 contributors may be used to endorse or promote products derived from
21 this software without specific prior written permission.
22
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
27 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35 #include <ops/custom_defs.h>
36 #include "profile_tm.h"
37
38 // shifted power spectrum to fftwrap.c so that optimisation can be shared between mdf.c and preprocess.c
39 #define OVERRIDE_POWER_SPECTRUM
40
41 #ifdef FIXED_POINT
42
43 #else
44
45 #define OVERRIDE_FILTER_DC_NOTCH16
filter_dc_notch16(const spx_int16_t * restrict in,float radius,float * restrict out,int len,float * restrict mem)46 void filter_dc_notch16(
47 const spx_int16_t * restrict in,
48 float radius,
49 float * restrict out,
50 int len,
51 float * restrict mem
52 )
53 {
54 register int i;
55 register float den2, r1;
56 register float mem0, mem1;
57
58 FILTERDCNOTCH16_START();
59
60 r1 = 1 - radius;
61 den2 = (radius * radius) + (0.7 * r1 * r1);
62 mem0 = mem[0];
63 mem1 = mem[1];
64
65 #if (TM_UNROLL && TM_UNROLL_FILTERDCNOTCH16)
66 #pragma TCS_unroll=4
67 #pragma TCS_unrollexact=1
68 #endif
69 for ( i=0 ; i<len ; ++i )
70 {
71 register float vin = in[i];
72 register float vout = mem0 + vin;
73 register float rvout = radius * vout;
74
75 mem0 = mem1 + 2 * (-vin + rvout);
76 mem1 = vin - (den2 * vout);
77
78 out[i] = rvout;
79 }
80 #if (TM_UNROLL && TM_UNROLL_FILTERDCNOTCH16)
81 #pragma TCS_unrollexact=0
82 #pragma TCS_unroll=0
83 #endif
84
85 mem[0] = mem0;
86 mem[1] = mem1;
87
88 FILTERDCNOTCH16_STOP();
89 }
90
91 #define OVERRIDE_MDF_INNER_PROD
mdf_inner_prod(const float * restrict x,const float * restrict y,int len)92 float mdf_inner_prod(
93 const float * restrict x,
94 const float * restrict y,
95 int len
96 )
97 {
98 register float sum = 0;
99
100 MDFINNERPROD_START();
101
102 len >>= 1;
103
104 #if (TM_UNROLL && TM_UNROLL_MDFINNERPRODUCT)
105 #pragma TCS_unroll=4
106 #pragma TCS_unrollexact=1
107 #endif
108 while(len--)
109 {
110 register float acc0, acc1;
111
112 acc0 = (*x++) * (*y++);
113 acc1 = (*x++) * (*y++);
114
115 sum += acc0 + acc1;
116 }
117 #if (TM_UNROLL && TM_UNROLL_MDFINNERPRODUCT)
118 #pragma TCS_unrollexact=0
119 #pragma TCS_unroll=0
120 #endif
121
122 MDFINNERPROD_STOP();
123
124 return sum;
125 }
126
127 #define OVERRIDE_SPECTRAL_MUL_ACCUM
spectral_mul_accum(const float * restrict X,const float * restrict Y,float * restrict acc,int N,int M)128 void spectral_mul_accum(
129 const float * restrict X,
130 const float * restrict Y,
131 float * restrict acc,
132 int N, int M
133 )
134 {
135 register int i, j;
136 register float Xi, Yi, Xii, Yii;
137 register int _N;
138
139 SPECTRALMULACCUM_START();
140
141 acc[0] = X[0] * Y[0];
142 _N = N-1;
143
144 for ( i=1 ; i<_N ; i+=2 )
145 {
146 Xi = X[i];
147 Yi = Y[i];
148 Xii = X[i+1];
149 Yii = Y[i+1];
150
151 acc[i] = (Xi * Yi - Xii * Yii);
152 acc[i+1]= (Xii * Yi + Xi * Yii);
153 }
154
155 acc[_N] = X[_N] * Y[_N];
156
157 for ( j=1,X+=N,Y+=N ; j<M ; j++ )
158 {
159 acc[0] += X[0] * Y[0];
160
161 for ( i=1 ; i<N-1 ; i+=2 )
162 {
163 Xi = X[i];
164 Yi = Y[i];
165 Xii = X[i+1];
166 Yii = Y[i+1];
167
168 acc[i] += (Xi * Yi - Xii * Yii);
169 acc[i+1]+= (Xii * Yi + Xi * Yii);
170 }
171
172 acc[_N] += X[_N] * Y[_N];
173 X += N;
174 Y += N;
175 }
176
177 SPECTRALMULACCUM_STOP();
178 }
179
180 #define OVERRIDE_WEIGHTED_SPECTRAL_MUL_CONJ
weighted_spectral_mul_conj(const float * restrict w,const float p,const float * restrict X,const float * restrict Y,float * restrict prod,int N)181 void weighted_spectral_mul_conj(
182 const float * restrict w,
183 const float p,
184 const float * restrict X,
185 const float * restrict Y,
186 float * restrict prod,
187 int N
188 )
189 {
190 register int i, j;
191 register int _N;
192
193 WEIGHTEDSPECTRALMULCONJ_START();
194
195 prod[0] = p * w[0] * X[0] * Y[0];
196 _N = N-1;
197
198 for (i=1,j=1;i<_N;i+=2,j++)
199 {
200 register float W;
201 register float Xi, Yi, Xii, Yii;
202
203 Xi = X[i];
204 Yi = Y[i];
205 Xii = X[i+1];
206 Yii = Y[i+1];
207 W = p * w[j];
208
209
210 prod[i] = W * (Xi * Yi + Xii * Yii);
211 prod[i+1]= W * (Xi * Yii - Xii * Yi);
212 }
213
214 prod[_N] = p * w[j] * X[_N] * Y[_N];
215
216 WEIGHTEDSPECTRALMULCONJ_STOP();
217 }
218
219 #define OVERRIDE_MDF_ADJUST_PROP
mdf_adjust_prop(const float * restrict W,int N,int M,float * restrict prop)220 void mdf_adjust_prop(
221 const float * restrict W,
222 int N,
223 int M,
224 float * restrict prop
225 )
226 {
227 register int i, j;
228 register float max_sum = 1;
229 register float prop_sum = 1;
230
231 MDFADJUSTPROP_START();
232
233 for ( i=0 ; i<M ; ++i )
234 {
235 register float tmp = 1;
236 register int k = i * N;
237 register int l = k + N;
238 register float propi;
239
240 #if (TM_UNROLL && TM_UNROLL_MDFADJUSTPROP)
241 #pragma TCS_unroll=4
242 #pragma TCS_unrollexact=1
243 #endif
244 for ( j=k ; j<l ; ++j )
245 {
246 register float wi = W[j];
247
248 tmp += wi * wi;
249 }
250 #if (TM_UNROLL && TM_UNROLL_MDFADJUSTPROP)
251 #pragma TCS_unrollexact=0
252 #pragma TCS_unroll=0
253 #endif
254
255 propi = spx_sqrt(tmp);
256 prop[i]= propi;
257 max_sum= fmux(propi > max_sum, propi, max_sum);
258 }
259
260 for ( i=0 ; i<M ; ++i )
261 {
262 register float propi = prop[i];
263
264 propi += .1f * max_sum;
265 prop_sum += propi;
266 prop[i] = propi;
267 }
268
269 prop_sum = 0.99f / prop_sum;
270 for ( i=0 ; i<M ; ++i )
271 { prop[i] = prop_sum * prop[i];
272 }
273
274 MDFADJUSTPROP_STOP();
275 }
276
277
278 #define OVERRIDE_SPEEX_ECHO_GET_RESIDUAL
speex_echo_get_residual(SpeexEchoState * restrict st,float * restrict residual_echo,int len)279 void speex_echo_get_residual(
280 SpeexEchoState * restrict st,
281 float * restrict residual_echo,
282 int len
283 )
284 {
285 register int i;
286 register float leak2, leake;
287 register int N;
288 register float * restrict window;
289 register float * restrict last_y;
290 register float * restrict y;
291
292 SPEEXECHOGETRESIDUAL_START();
293
294 window = st->window;
295 last_y = st->last_y;
296 y = st->y;
297 N = st->window_size;
298
299
300 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
301 #pragma TCS_unroll=4
302 #pragma TCS_unrollexact=1
303 #endif
304 for (i=0;i<N;i++)
305 { y[i] = window[i] * last_y[i];
306 }
307 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
308 #pragma TCS_unrollexact=0
309 #pragma TCS_unroll=0
310 #endif
311
312 spx_fft(st->fft_table, st->y, st->Y);
313 power_spectrum(st->Y, residual_echo, N);
314
315 leake = st->leak_estimate;
316 leak2 = fmux(leake > .5, 1, 2 * leake);
317 N = st->frame_size;
318
319 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
320 #pragma TCS_unroll=4
321 #pragma TCS_unrollexact=1
322 #endif
323 for ( i=0 ; i<N ; ++i )
324 { residual_echo[i] *= leak2;
325 }
326 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
327 #pragma TCS_unrollexact=0
328 #pragma TCS_unroll=0
329 #endif
330
331 residual_echo[N] *= leak2;
332
333 #ifndef NO_REMARK
334 (void)len;
335 #endif
336
337 SPEEXECHOGETRESIDUAL_STOP();
338 }
339 #endif
340
341
mdf_preemph(SpeexEchoState * restrict st,spx_word16_t * restrict x,const spx_int16_t * restrict far_end,int framesize)342 void mdf_preemph(
343 SpeexEchoState * restrict st,
344 spx_word16_t * restrict x,
345 const spx_int16_t * restrict far_end,
346 int framesize
347 )
348 {
349 register spx_word16_t preemph = st->preemph;
350 register spx_word16_t memX = st->memX;
351 register spx_word16_t memD = st->memD;
352 register spx_word16_t * restrict input = st->input;
353 register int i;
354 #ifdef FIXED_POINT
355 register int saturated = st->saturated;
356 #endif
357
358 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
359 #pragma TCS_unroll=4
360 #pragma TCS_unrollexact=1
361 #endif
362 for ( i=0 ; i<framesize ; ++i )
363 {
364 register spx_int16_t far_endi = far_end[i];
365 register spx_word32_t tmp32;
366 register spx_word16_t inputi = input[i];
367
368 tmp32 = SUB32(EXTEND32(far_endi), EXTEND32(MULT16_16_P15(preemph,memX)));
369
370 #ifdef FIXED_POINT
371 saturated = mux(iabs(tmp32) > 32767, M+1, saturated);
372 tmp32 = iclipi(tmp32,32767);
373 #endif
374
375 x[i] = EXTRACT16(tmp32);
376 memX = far_endi;
377 tmp32 = SUB32(EXTEND32(inputi), EXTEND32(MULT16_16_P15(preemph, memD)));
378
379 #ifdef FIXED_POINT
380 saturated = mux( ((tmp32 > 32767) && (saturated == 0)), 1,
381 mux( ((tmp32 <-32767) && (saturated == 0)), 1, saturated ));
382 tmp32 = iclipi(tmp32,32767);
383 #endif
384 memD = inputi;
385 input[i] = tmp32;
386 }
387 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
388 #pragma TCS_unrollexact=0
389 #pragma TCS_unroll=0
390 #endif
391
392 st->memD = memD;
393 st->memX = memX;
394
395 #ifdef FIXED_POINT
396 st->saturated = saturated;
397 #endif
398 }
399
mdf_sub(spx_word16_t * restrict dest,const spx_word16_t * restrict src1,const spx_word16_t * restrict src2,int framesize)400 void mdf_sub(
401 spx_word16_t * restrict dest,
402 const spx_word16_t * restrict src1,
403 const spx_word16_t * restrict src2,
404 int framesize
405 )
406 {
407 register int i;
408
409 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
410 #pragma TCS_unroll=4
411 #pragma TCS_unrollexact=1
412 #endif
413
414 #ifdef FIXED_POINT
415 for ( i=0,framesize<<=1 ; i<framesize ; i+=4 )
416 { register int src1i, src2i, desti;
417
418 src1i = ld32d(src1,i);
419 src2i = ld32d(src2,i);
420 desti = dspidualsub(src1i,src2i);
421 st32d(i, dest, desti);
422 }
423 #else
424 for ( i=0 ; i<framesize ; ++i )
425 { dest[i] = src1[i] - src2[i];
426 }
427 #endif
428
429 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
430 #pragma TCS_unrollexact=0
431 #pragma TCS_unroll=0
432 #endif
433 }
434
mdf_sub_int(spx_word16_t * restrict dest,const spx_int16_t * restrict src1,const spx_int16_t * restrict src2,int framesize)435 void mdf_sub_int(
436 spx_word16_t * restrict dest,
437 const spx_int16_t * restrict src1,
438 const spx_int16_t * restrict src2,
439 int framesize
440 )
441 {
442 register int i, j;
443
444 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
445 #pragma TCS_unroll=4
446 #pragma TCS_unrollexact=1
447 #endif
448
449 #ifdef FIXED_POINT
450 for ( i=0,framesize<<=1 ; i<framesize ; i+=4 )
451 { register int src1i, src2i, desti;
452
453 src1i = ld32d(src1,i);
454 src2i = ld32d(src2,i);
455 desti = dspidualsub(src1i,src2i);
456 st32d(i, dest, desti);
457 }
458 #else
459 for ( i=0,j=0 ; i<framesize ; i+=2,++j )
460 { register int src1i, src2i, desti;
461
462
463 src1i = ld32d(src1,j);
464 src2i = ld32d(src2,j);
465 desti = dspidualsub(src1i,src2i);
466
467 dest[i] = sex16(desti);
468 dest[i+1] = asri(16,desti);
469 }
470 #endif
471
472 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
473 #pragma TCS_unrollexact=0
474 #pragma TCS_unroll=0
475 #endif
476 }
477
mdf_compute_weight_gradient(SpeexEchoState * restrict st,spx_word16_t * restrict X,int N,int M)478 void mdf_compute_weight_gradient(
479 SpeexEchoState * restrict st,
480 spx_word16_t * restrict X,
481 int N,
482 int M
483 )
484 {
485 register int i, j;
486 register spx_word32_t * restrict PHI = st->PHI;
487
488 for (j=M-1;j>=0;j--)
489 {
490 register spx_word32_t * restrict W = &(st->W[j*N]);
491
492 weighted_spectral_mul_conj(
493 st->power_1,
494 FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15),
495 &X[(j+1)*N],
496 st->E,
497 st->PHI,
498 N);
499 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
500 #pragma TCS_unroll=4
501 #pragma TCS_unrollexact=1
502 #endif
503 for (i=0;i<N;i++)
504 { W[i] = ADD32(W[i],PHI[i]);
505 }
506 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
507 #pragma TCS_unrollexact=0
508 #pragma TCS_unroll=0
509 #endif
510 }
511 }
512
mdf_update_weight(SpeexEchoState * restrict st,int N,int M,int framesize)513 void mdf_update_weight(
514 SpeexEchoState * restrict st,
515 int N,
516 int M,
517 int framesize
518 )
519 {
520 register int j;
521 register int cancel_count = st->cancel_count;
522 register spx_word16_t * restrict wtmp = st->wtmp;
523 #ifdef FIXED_POINT
524 register spx_word16_t * restrict wtmp2 = st->wtmp2;
525 register int i;
526 #endif
527
528 for ( j=0 ; j<M ; j++ )
529 {
530 register spx_word32_t * restrict W = &(st->W[j*N]);
531
532 if (j==0 || cancel_count%(M-1) == j-1)
533 {
534 #ifdef FIXED_POINT
535
536 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
537 #pragma TCS_unroll=4
538 #pragma TCS_unrollexact=1
539 #endif
540 for ( i=0 ; i<N ; i++ )
541 wtmp2[i] = EXTRACT16(PSHR32(W[i],NORMALIZE_SCALEDOWN+16));
542 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
543 #pragma TCS_unrollexact=0
544 #pragma TCS_unroll=0
545 #endif
546 spx_ifft(st->fft_table, wtmp2, wtmp);
547 memset(wtmp, 0, framesize * sizeof(spx_word16_t));
548
549 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
550 #pragma TCS_unroll=4
551 #pragma TCS_unrollexact=1
552 #endif
553 for (j=framesize; j<N ; ++j)
554 { wtmp[j]=SHL16(wtmp[j],NORMALIZE_SCALEUP);
555 }
556 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
557 #pragma TCS_unrollexact=0
558 #pragma TCS_unroll=0
559 #endif
560
561 spx_fft(st->fft_table, wtmp, wtmp2);
562
563 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
564 #pragma TCS_unroll=4
565 #pragma TCS_unrollexact=1
566 #endif
567 for (i=0;i<N;i++)
568 { W[i] -= SHL32(EXTEND32(wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
569 }
570 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
571 #pragma TCS_unrollexact=0
572 #pragma TCS_unroll=0
573 #endif
574
575 #else
576 spx_ifft(st->fft_table, W, wtmp);
577 memset(&wtmp[framesize], 0, (N-framesize) * sizeof(spx_word16_t));
578 spx_fft(st->fft_table, wtmp, W);
579 #endif
580 }
581 }
582 }
583
584 #ifdef TWO_PATH
585 // first four parameters is passed by registers
586 // generate faster performance with 4 parameters functions
mdf_update_foreground(SpeexEchoState * restrict st,spx_word32_t Dbf,spx_word32_t Sff,spx_word32_t See)587 spx_word32_t mdf_update_foreground(
588 SpeexEchoState * restrict st,
589 spx_word32_t Dbf,
590 spx_word32_t Sff,
591 spx_word32_t See
592 )
593 {
594 register spx_word32_t Davg1 = st->Davg1;
595 register spx_word32_t Davg2 = st->Davg2;
596 register spx_word32_t Dvar1 = st->Dvar1;
597 register spx_word32_t Dvar2 = st->Dvar2;
598 register spx_word16_t * restrict input = st->input;
599 register int framesize = st->frame_size;
600 register spx_word16_t * restrict xx = st->x + framesize;
601 register spx_word16_t * restrict y = st->y + framesize;
602 register spx_word16_t * restrict ee = st->e + framesize;
603 register int update_foreground;
604 register int i;
605 register int N = st->window_size;
606 register int M = st->M;
607
608 #ifdef FIXED_POINT
609 register spx_word32_t sc0 = SUB32(Sff,See);
610 register spx_float_t sc1 = FLOAT_MUL32U(Sff,Dbf);
611
612 Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),Davg1), MULT16_32_Q15(QCONST16(.4f,15),sc0));
613 Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),Davg2), MULT16_32_Q15(QCONST16(.15f,15),sc0));
614 Dvar1 = FLOAT_ADD(
615 FLOAT_MULT(VAR1_SMOOTH,Dvar1),
616 FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff),
617 MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
618 Dvar2 = FLOAT_ADD(
619 FLOAT_MULT(VAR2_SMOOTH,Dvar2),
620 FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff),
621 MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
622 #else
623 register spx_word32_t sc0 = Sff - See;
624 register spx_word32_t sc1 = Sff * Dbf;
625
626 Davg1 = .6*Davg1 + .4*sc0;
627 Davg2 = .85*Davg2 + .15*sc0;
628 Dvar1 = VAR1_SMOOTH*Dvar1 + .16*sc1;
629 Dvar2 = VAR2_SMOOTH*Dvar2 + .0225*sc1;
630 #endif
631
632 update_foreground =
633 mux( FLOAT_GT(FLOAT_MUL32U(sc0, VABS(sc0)), sc1), 1,
634 mux( FLOAT_GT(FLOAT_MUL32U(Davg1, VABS(Davg1)), FLOAT_MULT(VAR1_UPDATE,(Dvar1))), 1,
635 mux( FLOAT_GT(FLOAT_MUL32U(Davg2, VABS(Davg2)), FLOAT_MULT(VAR2_UPDATE,(Dvar2))), 1, 0)));
636
637 if ( update_foreground )
638 {
639 register spx_word16_t * restrict windowf = st->window + framesize;
640 register spx_word16_t * restrict window = st->window;
641
642 st->Davg1 = st->Davg2 = 0;
643 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
644
645 memcpy(st->foreground, st->W, N*M*sizeof(spx_word32_t));
646
647 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
648 #pragma TCS_unroll=4
649 #pragma TCS_unrollexact=1
650 #endif
651 for ( i=0 ; i<framesize ; ++i)
652 { register spx_word16_t wi = window[i];
653 register spx_word16_t wfi = windowf[i];
654 register spx_word16_t ei = ee[i];
655 register spx_word16_t yi = y[i];
656
657 ee[i] = MULT16_16_Q15(wfi,ei) + MULT16_16_Q15(wi,yi);
658 }
659 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
660 #pragma TCS_unrollexact=0
661 #pragma TCS_unroll=0
662 #endif
663
664 } else
665 {
666 register int reset_background;
667
668 reset_background =
669 mux( FLOAT_GT(FLOAT_MUL32U(-(sc0),VABS(sc0)), FLOAT_MULT(VAR_BACKTRACK,sc1)), 1,
670 mux( FLOAT_GT(FLOAT_MUL32U(-(Davg1), VABS(Davg1)), FLOAT_MULT(VAR_BACKTRACK,Dvar1)), 1,
671 mux( FLOAT_GT(FLOAT_MUL32U(-(Davg2), VABS(Davg2)), FLOAT_MULT(VAR_BACKTRACK,Dvar2)), 1, 0)));
672
673 if ( reset_background )
674 {
675 memcpy(st->W, st->foreground, N*M*sizeof(spx_word32_t));
676 memcpy(y, ee, framesize * sizeof(spx_word16_t));
677 mdf_sub(xx,input,y,framesize);
678 See = Sff;
679 st->Davg1 = st->Davg2 = 0;
680 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
681 } else
682 {
683 st->Davg1 = Davg1;
684 st->Davg2 = Davg2;
685 st->Dvar1 = Dvar1;
686 st->Dvar2 = Dvar2;
687 }
688 }
689
690 return See;
691 }
692 #endif
693
mdf_compute_error_signal(SpeexEchoState * restrict st,const spx_int16_t * restrict in,spx_int16_t * restrict out,int framesize)694 void mdf_compute_error_signal(
695 SpeexEchoState * restrict st,
696 const spx_int16_t * restrict in,
697 spx_int16_t * restrict out,
698 int framesize
699 )
700 {
701 register spx_word16_t preemph = st->preemph;
702 register spx_word16_t memE = st->memE;
703 register int saturated = st->saturated;
704 register spx_word16_t * restrict e = st->e;
705 register spx_word16_t * restrict ee = st->e + framesize;
706 register spx_word16_t * restrict input = st->input;
707 register spx_word16_t * restrict xx = st->x + framesize;
708 register int i;
709
710 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
711 #pragma TCS_unroll=4
712 #pragma TCS_unrollexact=1
713 #endif
714 for ( i=0 ; i<framesize ; ++i )
715 {
716 register spx_word32_t tmp_out;
717 register spx_int16_t ini = in[i];
718 register int flg;
719
720 #ifdef FIXED_POINT
721
722 #ifdef TWO_PATH
723 tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(ee[i]));
724 tmp_out = iclipi(tmp_out,32767);
725 #else
726 tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(y[i]));
727 tmp_out = iclipi(tmp_out,32767);
728 #endif
729
730 #else
731 #ifdef TWO_PATH
732 tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(ee[i]));
733 #else
734 tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(y[i]));
735 #endif
736 tmp_out =
737 fmux( tmp_out > 32767, 32767,
738 fmux( tmp_out < -32768, -32768, tmp_out));
739 #endif
740
741 tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(preemph,memE)));
742 flg = iabs(ini) >= 32000;
743 tmp_out = VMUX( flg, 0, tmp_out);
744 saturated = mux( flg && (saturated == 0), 1, saturated);
745
746 out[i] = (spx_int16_t)tmp_out;
747 memE = tmp_out;
748 }
749 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
750 #pragma TCS_unrollexact=0
751 #pragma TCS_unroll=0
752 #endif
753
754 st->memE = memE;
755 st->saturated = saturated;
756 memset(e, 0, framesize * sizeof(spx_word16_t));
757 memcpy(ee, xx, framesize * sizeof(spx_word16_t));
758 }
759
mdf_check(SpeexEchoState * restrict st,spx_int16_t * out,spx_word32_t Syy,spx_word32_t Sxx,spx_word32_t See,spx_word32_t Sff,spx_word32_t Sdd)760 inline int mdf_check(
761 SpeexEchoState * restrict st,
762 spx_int16_t * out,
763 spx_word32_t Syy,
764 spx_word32_t Sxx,
765 spx_word32_t See,
766 spx_word32_t Sff,
767 spx_word32_t Sdd
768 )
769 {
770 register int N = st->window_size;
771 register spx_word32_t N1e9 = N * 1e9;
772 register int screwed_up = st->screwed_up;
773 register int framesize = st->frame_size;
774
775 if (!(Syy>=0 && Sxx>=0 && See >= 0)
776 #ifndef FIXED_POINT
777 || !(Sff < N1e9 && Syy < N1e9 && Sxx < N1e9 )
778 #endif
779 )
780 {
781 screwed_up += 50;
782 memset(out, 0, framesize * sizeof(spx_int16_t));
783
784 } else
785 { screwed_up = mux( SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)), screwed_up+1, 0);
786 }
787
788 st->screwed_up = screwed_up;
789
790 return screwed_up;
791 }
792
mdf_smooth(spx_word32_t * restrict power,spx_word32_t * restrict Xf,int framesize,int M)793 void mdf_smooth(
794 spx_word32_t * restrict power,
795 spx_word32_t * restrict Xf,
796 int framesize,
797 int M
798 )
799 {
800 register spx_word16_t ss, ss_1, pf, xff;
801 register int j;
802
803 #ifdef FIXED_POINT
804 ss=DIV32_16(11469,M);
805 ss_1 = SUB16(32767,ss);
806 #else
807 ss=.35/M;
808 ss_1 = 1-ss;
809 #endif
810
811 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
812 #pragma TCS_unroll=4
813 #pragma TCS_unrollexact=1
814 #endif
815 for ( j=0 ; j<framesize ; ++j )
816 { register spx_word32_t pi = power[j];
817 register spx_word32_t xfi = Xf[j];
818
819 power[j] = MULT16_32_Q15(ss_1,pi) + 1 + MULT16_32_Q15(ss,xfi);
820 }
821 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
822 #pragma TCS_unrollexact=0
823 #pragma TCS_unroll=0
824 #endif
825
826 pf = power[framesize];
827 xff = Xf[framesize];
828 power[framesize] = MULT16_32_Q15(ss_1,pf) + 1 + MULT16_32_Q15(ss,xff);
829 }
830
mdf_compute_filtered_spectra_crosscorrelations(SpeexEchoState * restrict st,spx_word32_t Syy,spx_word32_t See,int framesize)831 void mdf_compute_filtered_spectra_crosscorrelations(
832 SpeexEchoState * restrict st,
833 spx_word32_t Syy,
834 spx_word32_t See,
835 int framesize
836 )
837 {
838 register spx_float_t Pey = FLOAT_ONE;
839 register spx_float_t Pyy = FLOAT_ONE;
840 register spx_word16_t spec_average = st->spec_average;
841 register spx_word32_t * restrict pRf = st->Rf;
842 register spx_word32_t * restrict pYf = st->Yf;
843 register spx_word32_t * restrict pEh = st->Eh;
844 register spx_word32_t * restrict pYh = st->Yh;
845 register spx_word16_t beta0 = st->beta0;
846 register spx_word16_t beta_max = st->beta_max;
847 register spx_float_t alpha, alpha_1;
848 register spx_word32_t tmp32, tmpx;
849 register spx_float_t sPey = st->Pey;
850 register spx_float_t sPyy = st->Pyy;
851 register spx_float_t tmp;
852 register spx_word16_t leak_estimate;
853 register int j;
854 register spx_float_t Eh, Yh;
855 register spx_word32_t _Ehj, _Rfj, _Yfj, _Yhj;
856
857 #ifdef FIXED_POINT
858 register spx_word16_t spec_average1 = SUB16(32767,spec_average);
859 #else
860 register spx_word16_t spec_average1 = 1 - spec_average;
861 #endif
862
863 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
864 #pragma TCS_unroll=4
865 #pragma TCS_unrollexact=1
866 #endif
867 for (j=framesize; j>0 ; --j)
868 {
869 _Ehj = pEh[j];
870 _Rfj = pRf[j];
871 _Yfj = pYf[j];
872 _Yhj = pYh[j];
873
874 Eh = PSEUDOFLOAT(_Rfj - _Ehj);
875 Yh = PSEUDOFLOAT(_Yfj - _Yhj);
876
877 Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
878 Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
879
880 pEh[j] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Ehj), spec_average, _Rfj);
881 pYh[j] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Yhj), spec_average, _Yfj);
882 }
883 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
884 #pragma TCS_unrollexact=0
885 #pragma TCS_unroll=0
886 #endif
887 _Ehj = pEh[0];
888 _Rfj = pRf[0];
889 _Yfj = pYf[0];
890 _Yhj = pYh[0];
891
892 Eh = PSEUDOFLOAT(_Rfj - _Ehj);
893 Yh = PSEUDOFLOAT(_Yfj - _Yhj);
894
895 Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
896 Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
897
898 pEh[0] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Ehj), spec_average, _Rfj);
899 pYh[0] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Yhj), spec_average, _Yfj);
900
901 Pyy = FLOAT_SQRT(Pyy);
902 Pey = FLOAT_DIVU(Pey,Pyy);
903
904 tmp32 = MULT16_32_Q15(beta0,Syy);
905 tmpx = MULT16_32_Q15(beta_max,See);
906 tmp32 = VMUX(tmp32 > tmpx, tmpx, tmp32);
907 alpha = FLOAT_DIV32(tmp32, See);
908 alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
909
910 sPey = FLOAT_ADD(FLOAT_MULT(alpha_1,sPey) , FLOAT_MULT(alpha,Pey));
911 sPyy = FLOAT_ADD(FLOAT_MULT(alpha_1,sPyy) , FLOAT_MULT(alpha,Pyy));
912 tmp = FLOAT_MULT(MIN_LEAK,sPyy);
913
914 #ifndef FIXED_POINT
915 sPyy = VMUX(FLOAT_LT(sPyy, FLOAT_ONE), FLOAT_ONE, sPyy);
916 sPey = VMUX(FLOAT_LT(sPey, tmp), tmp, sPey);
917 sPey = VMUX(FLOAT_LT(sPey, sPyy), sPey, sPyy);
918 #else
919 sPyy = FLOAT_LT(sPyy, FLOAT_ONE) ? FLOAT_ONE : sPyy;
920 sPey = FLOAT_LT(sPey, tmp) ? tmp : sPey;
921 sPey = FLOAT_LT(sPey, sPyy) ? sPey : sPyy;
922 #endif
923
924 leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(sPey, sPyy),14));
925
926 leak_estimate = VMUX( leak_estimate > 16383, 32767, SHL16(leak_estimate,1));
927 st->Pey = sPey;
928 st->Pyy = sPyy;
929 st->leak_estimate = leak_estimate;
930 }
931
mdf_compute_RER(spx_word32_t See,spx_word32_t Syy,spx_word32_t Sey,spx_word32_t Sxx,spx_word16_t leake)932 inline spx_word16_t mdf_compute_RER(
933 spx_word32_t See,
934 spx_word32_t Syy,
935 spx_word32_t Sey,
936 spx_word32_t Sxx,
937 spx_word16_t leake
938 )
939 {
940 register spx_word16_t RER;
941
942 #ifdef FIXED_POINT
943 register spx_word32_t tmp32;
944 register spx_word32_t tmp;
945 spx_float_t bound = PSEUDOFLOAT(Sey);
946
947 tmp32 = MULT16_32_Q15(leake,Syy);
948 tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
949
950 bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
951 tmp = FLOAT_EXTRACT32(bound);
952 tmp32 = imux( FLOAT_GT(bound, PSEUDOFLOAT(See)), See,
953 imux( tmp32 < tmp, tmp, tmp32));
954
955 tmp = SHR32(See,1);
956 tmp32 = imux(tmp32 > tmp, tmp, tmp32);
957 RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
958 #else
959 register spx_word32_t r0;
960
961 r0 = (Sey * Sey)/(1 + See * Syy);
962
963 RER = (.0001*Sxx + 3.* MULT16_32_Q15(leake,Syy)) / See;
964 RER = fmux( RER < r0, r0, RER);
965 RER = fmux( RER > .5, .5, RER);
966 #endif
967
968 return RER;
969 }
970
mdf_adapt(SpeexEchoState * restrict st,spx_word16_t RER,spx_word32_t Syy,spx_word32_t See,spx_word32_t Sxx)971 void mdf_adapt(
972 SpeexEchoState * restrict st,
973 spx_word16_t RER,
974 spx_word32_t Syy,
975 spx_word32_t See,
976 spx_word32_t Sxx
977 )
978 {
979 register spx_float_t * restrict power_1 = st->power_1;
980 register spx_word32_t * restrict power = st->power;
981 register int adapted = st->adapted;
982 register spx_word32_t sum_adapt = st->sum_adapt;
983 register spx_word16_t leake = st->leak_estimate;
984 register int framesize = st->frame_size;
985 register int i;
986 register int M = st->M;
987
988 adapted = mux( !adapted && sum_adapt > QCONST32(M,15) &&
989 MULT16_32_Q15(leake,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy), 1, adapted);
990
991 if ( adapted )
992 { register spx_word32_t * restrict Yf = st->Yf;
993 register spx_word32_t * restrict Rf = st->Rf;
994 register spx_word32_t r, e, e2;
995
996 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
997 #pragma TCS_unroll=4
998 #pragma TCS_unrollexact=1
999 #endif
1000 for ( i=0 ; i<framesize ; ++i )
1001 {
1002 r = SHL32(Yf[i],3);
1003 r = MULT16_32_Q15(leake,r);
1004 e = SHL32(Rf[i],3)+1;
1005
1006 #ifdef FIXED_POINT
1007 e2 = SHR32(e,1);
1008 r = mux( r > e2, e2, r);
1009 #else
1010 e2 = e * .5;
1011 r = fmux( r > e2, e2, r);
1012 #endif
1013
1014 r = MULT16_32_Q15(QCONST16(.7,15),r) +
1015 MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1016
1017 power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,power[i]+10)),WEIGHT_SHIFT+16);
1018 }
1019 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
1020 #pragma TCS_unrollexact=0
1021 #pragma TCS_unroll=0
1022 #endif
1023
1024 r = SHL32(Yf[framesize],3);
1025 r = MULT16_32_Q15(leake,r);
1026 e = SHL32(Rf[framesize],3)+1;
1027
1028 #ifdef FIXED_POINT
1029 e2 = SHR32(e,1);
1030 r = mux( r > e2, e2, r);
1031 #else
1032 e2 = e * .5;
1033 r = fmux( r > e2, e2, r);
1034 #endif
1035
1036 r = MULT16_32_Q15(QCONST16(.7,15),r) +
1037 MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1038
1039 power_1[framesize] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,power[framesize]+10)),WEIGHT_SHIFT+16);
1040
1041 } else
1042 {
1043 register spx_word16_t adapt_rate=0;
1044 register int N = st->window_size;
1045
1046 if ( Sxx > SHR32(MULT16_16(N, 1000),6) )
1047 { register spx_word32_t tmp32, tmp32q;
1048
1049 tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
1050 #ifdef FIXED_POINT
1051 tmp32q = SHR32(See,2);
1052 tmp32 = mux(tmp32 > tmp32q, tmp32q, tmp32);
1053 #else
1054 tmp32q = 0.25 * See;
1055 tmp32 = fmux(tmp32 > tmp32q, tmp32q, tmp32);
1056 #endif
1057 adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
1058 }
1059
1060 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
1061 #pragma TCS_unroll=4
1062 #pragma TCS_unrollexact=1
1063 #endif
1064 for (i=0;i<framesize;i++)
1065 power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(power[i],10)),WEIGHT_SHIFT+1);
1066 #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
1067 #pragma TCS_unrollexact=0
1068 #pragma TCS_unroll=0
1069 #endif
1070 power_1[framesize] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(power[framesize],10)),WEIGHT_SHIFT+1);
1071 sum_adapt = ADD32(sum_adapt,adapt_rate);
1072 }
1073
1074 st->sum_adapt = sum_adapt;
1075 st->adapted = adapted;
1076 }
1077
1078 #define OVERRIDE_ECHO_CANCELLATION
speex_echo_cancellation(SpeexEchoState * restrict st,const spx_int16_t * restrict in,const spx_int16_t * restrict far_end,spx_int16_t * restrict out)1079 void speex_echo_cancellation(
1080 SpeexEchoState * restrict st,
1081 const spx_int16_t * restrict in,
1082 const spx_int16_t * restrict far_end,
1083 spx_int16_t * restrict out
1084 )
1085 {
1086 register int framesize = st->frame_size;
1087 register spx_word16_t * restrict x = st->x;
1088 register spx_word16_t * restrict xx = st->x + framesize;
1089 register spx_word16_t * restrict yy = st->y + framesize;
1090 register spx_word16_t * restrict ee = st->e + framesize;
1091 register spx_word32_t Syy, See, Sxx, Sdd, Sff;
1092 register spx_word16_t RER;
1093 register spx_word32_t Sey;
1094 register int j;
1095 register int N,M;
1096 #ifdef TWO_PATH
1097 register spx_word32_t Dbf;
1098 #endif
1099
1100 N = st->window_size;
1101 M = st->M;
1102 st->cancel_count++;
1103
1104 filter_dc_notch16(in, st->notch_radius, st->input, framesize, st->notch_mem);
1105 mdf_preemph(st, xx, far_end, framesize);
1106
1107 {
1108
1109 register spx_word16_t * restrict X = st->X;
1110
1111 for ( j=M-1 ; j>=0 ; j-- )
1112 { register spx_word16_t * restrict Xdes = &(X[(j+1)*N]);
1113 register spx_word16_t * restrict Xsrc = &(X[j*N]);
1114
1115 memcpy(Xdes, Xsrc, N * sizeof(spx_word16_t));
1116 }
1117
1118 spx_fft(st->fft_table, x, X);
1119 memcpy(st->last_y, st->x, N * sizeof(spx_word16_t));
1120 Sxx = mdf_inner_prod(xx, xx, framesize);
1121 memcpy(x, xx, framesize * sizeof(spx_word16_t));
1122
1123 #ifdef TWO_PATH
1124 spectral_mul_accum(st->X, st->foreground, st->Y, N, M);
1125 spx_ifft(st->fft_table, st->Y, st->e);
1126 mdf_sub(xx, st->input, ee, framesize);
1127 Sff = mdf_inner_prod(xx, xx, framesize);
1128 #endif
1129
1130 mdf_adjust_prop (st->W, N, M, st->prop);
1131
1132 if (st->saturated == 0)
1133 { mdf_compute_weight_gradient(st, X, N, M);
1134 } else
1135 { st->saturated--;
1136 }
1137 }
1138
1139 mdf_update_weight(st, N, M, framesize);
1140 spectral_mul_accum(st->X, st->W, st->Y, N, M);
1141 spx_ifft(st->fft_table, st->Y, st->y);
1142
1143 #ifdef TWO_PATH
1144 mdf_sub(xx, ee, yy, framesize);
1145 Dbf = 10+mdf_inner_prod(xx, xx, framesize);
1146 #endif
1147
1148 mdf_sub(xx, st->input, yy, framesize);
1149 See = mdf_inner_prod(xx, xx, framesize);
1150
1151 #ifndef TWO_PATH
1152 Sff = See;
1153 #else
1154 See = mdf_update_foreground(st,Dbf,Sff,See);
1155 #endif
1156
1157
1158 mdf_compute_error_signal(st, in, out, framesize);
1159 Sey = mdf_inner_prod(ee, yy, framesize);
1160 Syy = mdf_inner_prod(yy, yy, framesize);
1161 Sdd = mdf_inner_prod(st->input, st->input, framesize);
1162
1163 if ( mdf_check(st,out,Syy,Sxx,See,Sff,Sdd) >= 50 )
1164 { speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
1165 speex_echo_state_reset(st);
1166 return;
1167 }
1168
1169 See = MAX32(See, SHR32(MULT16_16(N, 100),6));
1170 spx_fft(st->fft_table, st->e, st->E);
1171 memset(st->y, 0, framesize * sizeof(spx_word16_t));
1172 spx_fft(st->fft_table, st->y, st->Y);
1173 power_spectrum(st->E, st->Rf, N);
1174 power_spectrum(st->Y, st->Yf, N);
1175 power_spectrum(st->X, st->Xf, N);
1176
1177 mdf_smooth(st->power,st->Xf,framesize, M);
1178 mdf_compute_filtered_spectra_crosscorrelations(st,Syy,See,framesize);
1179 RER = mdf_compute_RER(See,Syy,Sey,Sxx,st->leak_estimate);
1180 mdf_adapt(st, RER, Syy, See, Sxx);
1181
1182 if ( st->adapted )
1183 { register spx_word16_t * restrict last_yy = st->last_y + framesize;
1184
1185 memcpy(st->last_y, last_yy, framesize * sizeof(spx_word16_t));
1186 mdf_sub_int(last_yy, in, out, framesize);
1187
1188 }
1189 }
1190
1191
1192
1193