xref: /aosp_15_r20/external/webrtc/modules/audio_processing/aecm/aecm_core_c.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <stddef.h>
12 #include <stdlib.h>
13 
14 #include "modules/audio_processing/aecm/aecm_core.h"
15 
16 extern "C" {
17 #include "common_audio/ring_buffer.h"
18 #include "common_audio/signal_processing/include/real_fft.h"
19 }
20 #include "modules/audio_processing/aecm/echo_control_mobile.h"
21 #include "modules/audio_processing/utility/delay_estimator_wrapper.h"
22 extern "C" {
23 #include "system_wrappers/include/cpu_features_wrapper.h"
24 }
25 
26 #include "rtc_base/checks.h"
27 #include "rtc_base/numerics/safe_conversions.h"
28 #include "rtc_base/sanitizer.h"
29 
30 namespace webrtc {
31 
32 namespace {
33 
34 // Square root of Hanning window in Q14.
35 static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
36     0,     399,   798,   1196,  1594,  1990,  2386,  2780,  3172,  3562,  3951,
37     4337,  4720,  5101,  5478,  5853,  6224,  6591,  6954,  7313,  7668,  8019,
38     8364,  8705,  9040,  9370,  9695,  10013, 10326, 10633, 10933, 11227, 11514,
39     11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, 13773, 13985, 14189,
40     14384, 14571, 14749, 14918, 15079, 15231, 15373, 15506, 15631, 15746, 15851,
41     15947, 16034, 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384};
42 
43 #ifdef AECM_WITH_ABS_APPROX
44 // Q15 alpha = 0.99439986968132  const Factor for magnitude approximation
45 static const uint16_t kAlpha1 = 32584;
46 // Q15 beta = 0.12967166976970   const Factor for magnitude approximation
47 static const uint16_t kBeta1 = 4249;
48 // Q15 alpha = 0.94234827210087  const Factor for magnitude approximation
49 static const uint16_t kAlpha2 = 30879;
50 // Q15 beta = 0.33787806009150   const Factor for magnitude approximation
51 static const uint16_t kBeta2 = 11072;
52 // Q15 alpha = 0.82247698684306  const Factor for magnitude approximation
53 static const uint16_t kAlpha3 = 26951;
54 // Q15 beta = 0.57762063060713   const Factor for magnitude approximation
55 static const uint16_t kBeta3 = 18927;
56 #endif
57 
58 static const int16_t kNoiseEstQDomain = 15;
59 static const int16_t kNoiseEstIncCount = 5;
60 
ComfortNoise(AecmCore * aecm,const uint16_t * dfa,ComplexInt16 * out,const int16_t * lambda)61 static void ComfortNoise(AecmCore* aecm,
62                          const uint16_t* dfa,
63                          ComplexInt16* out,
64                          const int16_t* lambda) {
65   int16_t i;
66   int16_t tmp16;
67   int32_t tmp32;
68 
69   int16_t randW16[PART_LEN];
70   int16_t uReal[PART_LEN1];
71   int16_t uImag[PART_LEN1];
72   int32_t outLShift32;
73   int16_t noiseRShift16[PART_LEN1];
74 
75   int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
76   int16_t minTrackShift;
77 
78   RTC_DCHECK_GE(shiftFromNearToNoise, 0);
79   RTC_DCHECK_LT(shiftFromNearToNoise, 16);
80 
81   if (aecm->noiseEstCtr < 100) {
82     // Track the minimum more quickly initially.
83     aecm->noiseEstCtr++;
84     minTrackShift = 6;
85   } else {
86     minTrackShift = 9;
87   }
88 
89   // Estimate noise power.
90   for (i = 0; i < PART_LEN1; i++) {
91     // Shift to the noise domain.
92     tmp32 = (int32_t)dfa[i];
93     outLShift32 = tmp32 << shiftFromNearToNoise;
94 
95     if (outLShift32 < aecm->noiseEst[i]) {
96       // Reset "too low" counter
97       aecm->noiseEstTooLowCtr[i] = 0;
98       // Track the minimum.
99       if (aecm->noiseEst[i] < (1 << minTrackShift)) {
100         // For small values, decrease noiseEst[i] every
101         // `kNoiseEstIncCount` block. The regular approach below can not
102         // go further down due to truncation.
103         aecm->noiseEstTooHighCtr[i]++;
104         if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) {
105           aecm->noiseEst[i]--;
106           aecm->noiseEstTooHighCtr[i] = 0;  // Reset the counter
107         }
108       } else {
109         aecm->noiseEst[i] -=
110             ((aecm->noiseEst[i] - outLShift32) >> minTrackShift);
111       }
112     } else {
113       // Reset "too high" counter
114       aecm->noiseEstTooHighCtr[i] = 0;
115       // Ramp slowly upwards until we hit the minimum again.
116       if ((aecm->noiseEst[i] >> 19) > 0) {
117         // Avoid overflow.
118         // Multiplication with 2049 will cause wrap around. Scale
119         // down first and then multiply
120         aecm->noiseEst[i] >>= 11;
121         aecm->noiseEst[i] *= 2049;
122       } else if ((aecm->noiseEst[i] >> 11) > 0) {
123         // Large enough for relative increase
124         aecm->noiseEst[i] *= 2049;
125         aecm->noiseEst[i] >>= 11;
126       } else {
127         // Make incremental increases based on size every
128         // `kNoiseEstIncCount` block
129         aecm->noiseEstTooLowCtr[i]++;
130         if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) {
131           aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1;
132           aecm->noiseEstTooLowCtr[i] = 0;  // Reset counter
133         }
134       }
135     }
136   }
137 
138   for (i = 0; i < PART_LEN1; i++) {
139     tmp32 = aecm->noiseEst[i] >> shiftFromNearToNoise;
140     if (tmp32 > 32767) {
141       tmp32 = 32767;
142       aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;
143     }
144     noiseRShift16[i] = (int16_t)tmp32;
145 
146     tmp16 = ONE_Q14 - lambda[i];
147     noiseRShift16[i] = (int16_t)((tmp16 * noiseRShift16[i]) >> 14);
148   }
149 
150   // Generate a uniform random array on [0 2^15-1].
151   WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
152 
153   // Generate noise according to estimated energy.
154   uReal[0] = 0;  // Reject LF noise.
155   uImag[0] = 0;
156   for (i = 1; i < PART_LEN1; i++) {
157     // Get a random index for the cos and sin tables over [0 359].
158     tmp16 = (int16_t)((359 * randW16[i - 1]) >> 15);
159 
160     // Tables are in Q13.
161     uReal[i] =
162         (int16_t)((noiseRShift16[i] * WebRtcAecm_kCosTable[tmp16]) >> 13);
163     uImag[i] =
164         (int16_t)((-noiseRShift16[i] * WebRtcAecm_kSinTable[tmp16]) >> 13);
165   }
166   uImag[PART_LEN] = 0;
167 
168   for (i = 0; i < PART_LEN1; i++) {
169     out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]);
170     out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]);
171   }
172 }
173 
WindowAndFFT(AecmCore * aecm,int16_t * fft,const int16_t * time_signal,ComplexInt16 * freq_signal,int time_signal_scaling)174 static void WindowAndFFT(AecmCore* aecm,
175                          int16_t* fft,
176                          const int16_t* time_signal,
177                          ComplexInt16* freq_signal,
178                          int time_signal_scaling) {
179   int i = 0;
180 
181   // FFT of signal
182   for (i = 0; i < PART_LEN; i++) {
183     // Window time domain signal and insert into real part of
184     // transformation array `fft`
185     int16_t scaled_time_signal = time_signal[i] * (1 << time_signal_scaling);
186     fft[i] = (int16_t)((scaled_time_signal * WebRtcAecm_kSqrtHanning[i]) >> 14);
187     scaled_time_signal = time_signal[i + PART_LEN] * (1 << time_signal_scaling);
188     fft[PART_LEN + i] = (int16_t)(
189         (scaled_time_signal * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14);
190   }
191 
192   // Do forward FFT, then take only the first PART_LEN complex samples,
193   // and change signs of the imaginary parts.
194   WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
195   for (i = 0; i < PART_LEN; i++) {
196     freq_signal[i].imag = -freq_signal[i].imag;
197   }
198 }
199 
InverseFFTAndWindow(AecmCore * aecm,int16_t * fft,ComplexInt16 * efw,int16_t * output,const int16_t * nearendClean)200 static void InverseFFTAndWindow(AecmCore* aecm,
201                                 int16_t* fft,
202                                 ComplexInt16* efw,
203                                 int16_t* output,
204                                 const int16_t* nearendClean) {
205   int i, j, outCFFT;
206   int32_t tmp32no1;
207   // Reuse `efw` for the inverse FFT output after transferring
208   // the contents to `fft`.
209   int16_t* ifft_out = (int16_t*)efw;
210 
211   // Synthesis
212   for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
213     fft[j] = efw[i].real;
214     fft[j + 1] = -efw[i].imag;
215   }
216   fft[0] = efw[0].real;
217   fft[1] = -efw[0].imag;
218 
219   fft[PART_LEN2] = efw[PART_LEN].real;
220   fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
221 
222   // Inverse FFT. Keep outCFFT to scale the samples in the next block.
223   outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out);
224   for (i = 0; i < PART_LEN; i++) {
225     ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
226         ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14);
227     tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i],
228                                     outCFFT - aecm->dfaCleanQDomain);
229     output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
230                                         tmp32no1 + aecm->outBuf[i],
231                                         WEBRTC_SPL_WORD16_MIN);
232 
233     tmp32no1 =
234         (ifft_out[PART_LEN + i] * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14;
235     tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, outCFFT - aecm->dfaCleanQDomain);
236     aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, tmp32no1,
237                                               WEBRTC_SPL_WORD16_MIN);
238   }
239 
240   // Copy the current block to the old position
241   // (aecm->outBuf is shifted elsewhere)
242   memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
243   memcpy(aecm->dBufNoisy, aecm->dBufNoisy + PART_LEN,
244          sizeof(int16_t) * PART_LEN);
245   if (nearendClean != NULL) {
246     memcpy(aecm->dBufClean, aecm->dBufClean + PART_LEN,
247            sizeof(int16_t) * PART_LEN);
248   }
249 }
250 
251 // Transforms a time domain signal into the frequency domain, outputting the
252 // complex valued signal, absolute value and sum of absolute values.
253 //
254 // time_signal          [in]    Pointer to time domain signal
255 // freq_signal_real     [out]   Pointer to real part of frequency domain array
256 // freq_signal_imag     [out]   Pointer to imaginary part of frequency domain
257 //                              array
258 // freq_signal_abs      [out]   Pointer to absolute value of frequency domain
259 //                              array
260 // freq_signal_sum_abs  [out]   Pointer to the sum of all absolute values in
261 //                              the frequency domain array
262 // return value                 The Q-domain of current frequency values
263 //
TimeToFrequencyDomain(AecmCore * aecm,const int16_t * time_signal,ComplexInt16 * freq_signal,uint16_t * freq_signal_abs,uint32_t * freq_signal_sum_abs)264 static int TimeToFrequencyDomain(AecmCore* aecm,
265                                  const int16_t* time_signal,
266                                  ComplexInt16* freq_signal,
267                                  uint16_t* freq_signal_abs,
268                                  uint32_t* freq_signal_sum_abs) {
269   int i = 0;
270   int time_signal_scaling = 0;
271 
272   int32_t tmp32no1 = 0;
273   int32_t tmp32no2 = 0;
274 
275   // In fft_buf, +16 for 32-byte alignment.
276   int16_t fft_buf[PART_LEN4 + 16];
277   int16_t* fft = (int16_t*)(((uintptr_t)fft_buf + 31) & ~31);
278 
279   int16_t tmp16no1;
280 #ifndef WEBRTC_ARCH_ARM_V7
281   int16_t tmp16no2;
282 #endif
283 #ifdef AECM_WITH_ABS_APPROX
284   int16_t max_value = 0;
285   int16_t min_value = 0;
286   uint16_t alpha = 0;
287   uint16_t beta = 0;
288 #endif
289 
290 #ifdef AECM_DYNAMIC_Q
291   tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
292   time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
293 #endif
294 
295   WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
296 
297   // Extract imaginary and real part, calculate the magnitude for
298   // all frequency bins
299   freq_signal[0].imag = 0;
300   freq_signal[PART_LEN].imag = 0;
301   freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
302   freq_signal_abs[PART_LEN] =
303       (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[PART_LEN].real);
304   (*freq_signal_sum_abs) =
305       (uint32_t)(freq_signal_abs[0]) + (uint32_t)(freq_signal_abs[PART_LEN]);
306 
307   for (i = 1; i < PART_LEN; i++) {
308     if (freq_signal[i].real == 0) {
309       freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
310     } else if (freq_signal[i].imag == 0) {
311       freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real);
312     } else {
313       // Approximation for magnitude of complex fft output
314       // magn = sqrt(real^2 + imag^2)
315       // magn ~= alpha * max(`imag`,`real`) + beta * min(`imag`,`real`)
316       //
317       // The parameters alpha and beta are stored in Q15
318 
319 #ifdef AECM_WITH_ABS_APPROX
320       tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
321       tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
322 
323       if (tmp16no1 > tmp16no2) {
324         max_value = tmp16no1;
325         min_value = tmp16no2;
326       } else {
327         max_value = tmp16no2;
328         min_value = tmp16no1;
329       }
330 
331       // Magnitude in Q(-6)
332       if ((max_value >> 2) > min_value) {
333         alpha = kAlpha1;
334         beta = kBeta1;
335       } else if ((max_value >> 1) > min_value) {
336         alpha = kAlpha2;
337         beta = kBeta2;
338       } else {
339         alpha = kAlpha3;
340         beta = kBeta3;
341       }
342       tmp16no1 = (int16_t)((max_value * alpha) >> 15);
343       tmp16no2 = (int16_t)((min_value * beta) >> 15);
344       freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2;
345 #else
346 #ifdef WEBRTC_ARCH_ARM_V7
347       __asm __volatile(
348           "smulbb %[tmp32no1], %[real], %[real]\n\t"
349           "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t"
350           : [tmp32no1] "+&r"(tmp32no1), [tmp32no2] "=r"(tmp32no2)
351           : [real] "r"(freq_signal[i].real), [imag] "r"(freq_signal[i].imag));
352 #else
353       tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
354       tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
355       tmp32no1 = tmp16no1 * tmp16no1;
356       tmp32no2 = tmp16no2 * tmp16no2;
357       tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);
358 #endif  // WEBRTC_ARCH_ARM_V7
359       tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
360 
361       freq_signal_abs[i] = (uint16_t)tmp32no1;
362 #endif  // AECM_WITH_ABS_APPROX
363     }
364     (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
365   }
366 
367   return time_signal_scaling;
368 }
369 
370 }  // namespace
371 
372 int RTC_NO_SANITIZE("signed-integer-overflow")  // bugs.webrtc.org/8200
WebRtcAecm_ProcessBlock(AecmCore * aecm,const int16_t * farend,const int16_t * nearendNoisy,const int16_t * nearendClean,int16_t * output)373     WebRtcAecm_ProcessBlock(AecmCore* aecm,
374                             const int16_t* farend,
375                             const int16_t* nearendNoisy,
376                             const int16_t* nearendClean,
377                             int16_t* output) {
378   int i;
379 
380   uint32_t xfaSum;
381   uint32_t dfaNoisySum;
382   uint32_t dfaCleanSum;
383   uint32_t echoEst32Gained;
384   uint32_t tmpU32;
385 
386   int32_t tmp32no1;
387 
388   uint16_t xfa[PART_LEN1];
389   uint16_t dfaNoisy[PART_LEN1];
390   uint16_t dfaClean[PART_LEN1];
391   uint16_t* ptrDfaClean = dfaClean;
392   const uint16_t* far_spectrum_ptr = NULL;
393 
394   // 32 byte aligned buffers (with +8 or +16).
395   // TODO(kma): define fft with ComplexInt16.
396   int16_t fft_buf[PART_LEN4 + 2 + 16];  // +2 to make a loop safe.
397   int32_t echoEst32_buf[PART_LEN1 + 8];
398   int32_t dfw_buf[PART_LEN2 + 8];
399   int32_t efw_buf[PART_LEN2 + 8];
400 
401   int16_t* fft = (int16_t*)(((uintptr_t)fft_buf + 31) & ~31);
402   int32_t* echoEst32 = (int32_t*)(((uintptr_t)echoEst32_buf + 31) & ~31);
403   ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31);
404   ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31);
405 
406   int16_t hnl[PART_LEN1];
407   int16_t numPosCoef = 0;
408   int16_t nlpGain = ONE_Q14;
409   int delay;
410   int16_t tmp16no1;
411   int16_t tmp16no2;
412   int16_t mu;
413   int16_t supGain;
414   int16_t zeros32, zeros16;
415   int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
416   int far_q;
417   int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;
418 
419   const int kMinPrefBand = 4;
420   const int kMaxPrefBand = 24;
421   int32_t avgHnl32 = 0;
422 
423   // Determine startup state. There are three states:
424   // (0) the first CONV_LEN blocks
425   // (1) another CONV_LEN blocks
426   // (2) the rest
427 
428   if (aecm->startupState < 2) {
429     aecm->startupState =
430         (aecm->totCount >= CONV_LEN) + (aecm->totCount >= CONV_LEN2);
431   }
432   // END: Determine startup state
433 
434   // Buffer near and far end signals
435   memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
436   memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN);
437   if (nearendClean != NULL) {
438     memcpy(aecm->dBufClean + PART_LEN, nearendClean,
439            sizeof(int16_t) * PART_LEN);
440   }
441 
442   // Transform far end signal from time domain to frequency domain.
443   far_q = TimeToFrequencyDomain(aecm, aecm->xBuf, dfw, xfa, &xfaSum);
444 
445   // Transform noisy near end signal from time domain to frequency domain.
446   zerosDBufNoisy =
447       TimeToFrequencyDomain(aecm, aecm->dBufNoisy, dfw, dfaNoisy, &dfaNoisySum);
448   aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
449   aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
450 
451   if (nearendClean == NULL) {
452     ptrDfaClean = dfaNoisy;
453     aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
454     aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
455     dfaCleanSum = dfaNoisySum;
456   } else {
457     // Transform clean near end signal from time domain to frequency domain.
458     zerosDBufClean = TimeToFrequencyDomain(aecm, aecm->dBufClean, dfw, dfaClean,
459                                            &dfaCleanSum);
460     aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
461     aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
462   }
463 
464   // Get the delay
465   // Save far-end history and estimate delay
466   WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
467   if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1,
468                                far_q) == -1) {
469     return -1;
470   }
471   delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, dfaNoisy,
472                                           PART_LEN1, zerosDBufNoisy);
473   if (delay == -1) {
474     return -1;
475   } else if (delay == -2) {
476     // If the delay is unknown, we assume zero.
477     // NOTE: this will have to be adjusted if we ever add lookahead.
478     delay = 0;
479   }
480 
481   if (aecm->fixedDelay >= 0) {
482     // Use fixed delay
483     delay = aecm->fixedDelay;
484   }
485 
486   // Get aligned far end spectrum
487   far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
488   zerosXBuf = (int16_t)far_q;
489   if (far_spectrum_ptr == NULL) {
490     return -1;
491   }
492 
493   // Calculate log(energy) and update energy threshold levels
494   WebRtcAecm_CalcEnergies(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisySum,
495                           echoEst32);
496 
497   // Calculate stepsize
498   mu = WebRtcAecm_CalcStepSize(aecm);
499 
500   // Update counters
501   aecm->totCount++;
502 
503   // This is the channel estimation algorithm.
504   // It is base on NLMS but has a variable step length,
505   // which was calculated above.
506   WebRtcAecm_UpdateChannel(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisy, mu,
507                            echoEst32);
508   supGain = WebRtcAecm_CalcSuppressionGain(aecm);
509 
510   // Calculate Wiener filter hnl[]
511   for (i = 0; i < PART_LEN1; i++) {
512     // Far end signal through channel estimate in Q8
513     // How much can we shift right to preserve resolution
514     tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
515     aecm->echoFilt[i] +=
516         rtc::dchecked_cast<int32_t>((int64_t{tmp32no1} * 50) >> 8);
517 
518     zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
519     zeros16 = WebRtcSpl_NormW16(supGain) + 1;
520     if (zeros32 + zeros16 > 16) {
521       // Multiplication is safe
522       // Result in
523       // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+
524       //   aecm->xfaQDomainBuf[diff])
525       echoEst32Gained =
526           WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], (uint16_t)supGain);
527       resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
528       resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
529     } else {
530       tmp16no1 = 17 - zeros32 - zeros16;
531       resolutionDiff =
532           14 + tmp16no1 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
533       resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
534       if (zeros32 > tmp16no1) {
535         echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
536                                                 supGain >> tmp16no1);
537       } else {
538         // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
539         echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain;
540       }
541     }
542 
543     zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
544     RTC_DCHECK_GE(zeros16, 0);  // `zeros16` is a norm, hence non-negative.
545     dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;
546     if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {
547       tmp16no1 = aecm->nearFilt[i] * (1 << zeros16);
548       qDomainDiff = zeros16 - dfa_clean_q_domain_diff;
549       tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;
550     } else {
551       tmp16no1 = dfa_clean_q_domain_diff < 0
552                      ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff
553                      : aecm->nearFilt[i] * (1 << dfa_clean_q_domain_diff);
554       qDomainDiff = 0;
555       tmp16no2 = ptrDfaClean[i];
556     }
557     tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
558     tmp16no2 = (int16_t)(tmp32no1 >> 4);
559     tmp16no2 += tmp16no1;
560     zeros16 = WebRtcSpl_NormW16(tmp16no2);
561     if ((tmp16no2) & (-qDomainDiff > zeros16)) {
562       aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
563     } else {
564       aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 * (1 << -qDomainDiff)
565                                           : tmp16no2 >> qDomainDiff;
566     }
567 
568     // Wiener filter coefficients, resulting hnl in Q14
569     if (echoEst32Gained == 0) {
570       hnl[i] = ONE_Q14;
571     } else if (aecm->nearFilt[i] == 0) {
572       hnl[i] = 0;
573     } else {
574       // Multiply the suppression gain
575       // Rounding
576       echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
577       tmpU32 =
578           WebRtcSpl_DivU32U16(echoEst32Gained, (uint16_t)aecm->nearFilt[i]);
579 
580       // Current resolution is
581       // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32))
582       // Make sure we are in Q14
583       tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
584       if (tmp32no1 > ONE_Q14) {
585         hnl[i] = 0;
586       } else if (tmp32no1 < 0) {
587         hnl[i] = ONE_Q14;
588       } else {
589         // 1-echoEst/dfa
590         hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
591         if (hnl[i] < 0) {
592           hnl[i] = 0;
593         }
594       }
595     }
596     if (hnl[i]) {
597       numPosCoef++;
598     }
599   }
600   // Only in wideband. Prevent the gain in upper band from being larger than
601   // in lower band.
602   if (aecm->mult == 2) {
603     // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
604     //               speech distortion in double-talk.
605     for (i = 0; i < PART_LEN1; i++) {
606       hnl[i] = (int16_t)((hnl[i] * hnl[i]) >> 14);
607     }
608 
609     for (i = kMinPrefBand; i <= kMaxPrefBand; i++) {
610       avgHnl32 += (int32_t)hnl[i];
611     }
612     RTC_DCHECK_GT(kMaxPrefBand - kMinPrefBand + 1, 0);
613     avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
614 
615     for (i = kMaxPrefBand; i < PART_LEN1; i++) {
616       if (hnl[i] > (int16_t)avgHnl32) {
617         hnl[i] = (int16_t)avgHnl32;
618       }
619     }
620   }
621 
622   // Calculate NLP gain, result is in Q14
623   if (aecm->nlpFlag) {
624     for (i = 0; i < PART_LEN1; i++) {
625       // Truncate values close to zero and one.
626       if (hnl[i] > NLP_COMP_HIGH) {
627         hnl[i] = ONE_Q14;
628       } else if (hnl[i] < NLP_COMP_LOW) {
629         hnl[i] = 0;
630       }
631 
632       // Remove outliers
633       if (numPosCoef < 3) {
634         nlpGain = 0;
635       } else {
636         nlpGain = ONE_Q14;
637       }
638 
639       // NLP
640       if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14)) {
641         hnl[i] = ONE_Q14;
642       } else {
643         hnl[i] = (int16_t)((hnl[i] * nlpGain) >> 14);
644       }
645 
646       // multiply with Wiener coefficients
647       efw[i].real = (int16_t)(
648           WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, hnl[i], 14));
649       efw[i].imag = (int16_t)(
650           WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, hnl[i], 14));
651     }
652   } else {
653     // multiply with Wiener coefficients
654     for (i = 0; i < PART_LEN1; i++) {
655       efw[i].real = (int16_t)(
656           WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, hnl[i], 14));
657       efw[i].imag = (int16_t)(
658           WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, hnl[i], 14));
659     }
660   }
661 
662   if (aecm->cngMode == AecmTrue) {
663     ComfortNoise(aecm, ptrDfaClean, efw, hnl);
664   }
665 
666   InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
667 
668   return 0;
669 }
670 
671 }  // namespace webrtc
672