1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/aec3/residual_echo_estimator.h"
12
13 #include <stddef.h>
14
15 #include <algorithm>
16 #include <vector>
17
18 #include "api/array_view.h"
19 #include "modules/audio_processing/aec3/reverb_model.h"
20 #include "rtc_base/checks.h"
21 #include "system_wrappers/include/field_trial.h"
22
23 namespace webrtc {
24 namespace {
25
26 constexpr float kDefaultTransparentModeGain = 0.01f;
27
GetTransparentModeGain()28 float GetTransparentModeGain() {
29 return kDefaultTransparentModeGain;
30 }
31
GetEarlyReflectionsDefaultModeGain(const EchoCanceller3Config::EpStrength & config)32 float GetEarlyReflectionsDefaultModeGain(
33 const EchoCanceller3Config::EpStrength& config) {
34 if (field_trial::IsEnabled("WebRTC-Aec3UseLowEarlyReflectionsDefaultGain")) {
35 return 0.1f;
36 }
37 return config.default_gain;
38 }
39
GetLateReflectionsDefaultModeGain(const EchoCanceller3Config::EpStrength & config)40 float GetLateReflectionsDefaultModeGain(
41 const EchoCanceller3Config::EpStrength& config) {
42 if (field_trial::IsEnabled("WebRTC-Aec3UseLowLateReflectionsDefaultGain")) {
43 return 0.1f;
44 }
45 return config.default_gain;
46 }
47
UseErleOnsetCompensationInDominantNearend(const EchoCanceller3Config::EpStrength & config)48 bool UseErleOnsetCompensationInDominantNearend(
49 const EchoCanceller3Config::EpStrength& config) {
50 return config.erle_onset_compensation_in_dominant_nearend ||
51 field_trial::IsEnabled(
52 "WebRTC-Aec3UseErleOnsetCompensationInDominantNearend");
53 }
54
55 // Computes the indexes that will be used for computing spectral power over
56 // the blocks surrounding the delay.
GetRenderIndexesToAnalyze(const SpectrumBuffer & spectrum_buffer,const EchoCanceller3Config::EchoModel & echo_model,int filter_delay_blocks,int * idx_start,int * idx_stop)57 void GetRenderIndexesToAnalyze(
58 const SpectrumBuffer& spectrum_buffer,
59 const EchoCanceller3Config::EchoModel& echo_model,
60 int filter_delay_blocks,
61 int* idx_start,
62 int* idx_stop) {
63 RTC_DCHECK(idx_start);
64 RTC_DCHECK(idx_stop);
65 size_t window_start;
66 size_t window_end;
67 window_start =
68 std::max(0, filter_delay_blocks -
69 static_cast<int>(echo_model.render_pre_window_size));
70 window_end = filter_delay_blocks +
71 static_cast<int>(echo_model.render_post_window_size);
72 *idx_start = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_start);
73 *idx_stop = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_end + 1);
74 }
75
76 // Estimates the residual echo power based on the echo return loss enhancement
77 // (ERLE) and the linear power estimate.
LinearEstimate(rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> S2_linear,rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> erle,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2)78 void LinearEstimate(
79 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
80 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle,
81 rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
82 RTC_DCHECK_EQ(S2_linear.size(), erle.size());
83 RTC_DCHECK_EQ(S2_linear.size(), R2.size());
84
85 const size_t num_capture_channels = R2.size();
86 for (size_t ch = 0; ch < num_capture_channels; ++ch) {
87 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
88 RTC_DCHECK_LT(0.f, erle[ch][k]);
89 R2[ch][k] = S2_linear[ch][k] / erle[ch][k];
90 }
91 }
92 }
93
94 // Estimates the residual echo power based on the estimate of the echo path
95 // gain.
NonLinearEstimate(float echo_path_gain,const std::array<float,kFftLengthBy2Plus1> & X2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2)96 void NonLinearEstimate(
97 float echo_path_gain,
98 const std::array<float, kFftLengthBy2Plus1>& X2,
99 rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
100 const size_t num_capture_channels = R2.size();
101 for (size_t ch = 0; ch < num_capture_channels; ++ch) {
102 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
103 R2[ch][k] = X2[k] * echo_path_gain;
104 }
105 }
106 }
107
108 // Applies a soft noise gate to the echo generating power.
ApplyNoiseGate(const EchoCanceller3Config::EchoModel & config,rtc::ArrayView<float,kFftLengthBy2Plus1> X2)109 void ApplyNoiseGate(const EchoCanceller3Config::EchoModel& config,
110 rtc::ArrayView<float, kFftLengthBy2Plus1> X2) {
111 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
112 if (config.noise_gate_power > X2[k]) {
113 X2[k] = std::max(0.f, X2[k] - config.noise_gate_slope *
114 (config.noise_gate_power - X2[k]));
115 }
116 }
117 }
118
119 // Estimates the echo generating signal power as gated maximal power over a
120 // time window.
EchoGeneratingPower(size_t num_render_channels,const SpectrumBuffer & spectrum_buffer,const EchoCanceller3Config::EchoModel & echo_model,int filter_delay_blocks,rtc::ArrayView<float,kFftLengthBy2Plus1> X2)121 void EchoGeneratingPower(size_t num_render_channels,
122 const SpectrumBuffer& spectrum_buffer,
123 const EchoCanceller3Config::EchoModel& echo_model,
124 int filter_delay_blocks,
125 rtc::ArrayView<float, kFftLengthBy2Plus1> X2) {
126 int idx_stop;
127 int idx_start;
128 GetRenderIndexesToAnalyze(spectrum_buffer, echo_model, filter_delay_blocks,
129 &idx_start, &idx_stop);
130
131 std::fill(X2.begin(), X2.end(), 0.f);
132 if (num_render_channels == 1) {
133 for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) {
134 for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
135 X2[j] = std::max(X2[j], spectrum_buffer.buffer[k][/*channel=*/0][j]);
136 }
137 }
138 } else {
139 for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) {
140 std::array<float, kFftLengthBy2Plus1> render_power;
141 render_power.fill(0.f);
142 for (size_t ch = 0; ch < num_render_channels; ++ch) {
143 const auto& channel_power = spectrum_buffer.buffer[k][ch];
144 for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
145 render_power[j] += channel_power[j];
146 }
147 }
148 for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
149 X2[j] = std::max(X2[j], render_power[j]);
150 }
151 }
152 }
153 }
154
155 } // namespace
156
ResidualEchoEstimator(const EchoCanceller3Config & config,size_t num_render_channels)157 ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config,
158 size_t num_render_channels)
159 : config_(config),
160 num_render_channels_(num_render_channels),
161 early_reflections_transparent_mode_gain_(GetTransparentModeGain()),
162 late_reflections_transparent_mode_gain_(GetTransparentModeGain()),
163 early_reflections_general_gain_(
164 GetEarlyReflectionsDefaultModeGain(config_.ep_strength)),
165 late_reflections_general_gain_(
166 GetLateReflectionsDefaultModeGain(config_.ep_strength)),
167 erle_onset_compensation_in_dominant_nearend_(
168 UseErleOnsetCompensationInDominantNearend(config_.ep_strength)) {
169 Reset();
170 }
171
172 ResidualEchoEstimator::~ResidualEchoEstimator() = default;
173
Estimate(const AecState & aec_state,const RenderBuffer & render_buffer,rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> S2_linear,rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> Y2,bool dominant_nearend,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2_unbounded)174 void ResidualEchoEstimator::Estimate(
175 const AecState& aec_state,
176 const RenderBuffer& render_buffer,
177 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
178 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
179 bool dominant_nearend,
180 rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2,
181 rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded) {
182 RTC_DCHECK_EQ(R2.size(), Y2.size());
183 RTC_DCHECK_EQ(R2.size(), S2_linear.size());
184
185 const size_t num_capture_channels = R2.size();
186
187 // Estimate the power of the stationary noise in the render signal.
188 UpdateRenderNoisePower(render_buffer);
189
190 // Estimate the residual echo power.
191 if (aec_state.UsableLinearEstimate()) {
192 // When there is saturated echo, assume the same spectral content as is
193 // present in the microphone signal.
194 if (aec_state.SaturatedEcho()) {
195 for (size_t ch = 0; ch < num_capture_channels; ++ch) {
196 std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
197 std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin());
198 }
199 } else {
200 const bool onset_compensated =
201 erle_onset_compensation_in_dominant_nearend_ || !dominant_nearend;
202 LinearEstimate(S2_linear, aec_state.Erle(onset_compensated), R2);
203 LinearEstimate(S2_linear, aec_state.ErleUnbounded(), R2_unbounded);
204 }
205
206 UpdateReverb(ReverbType::kLinear, aec_state, render_buffer,
207 dominant_nearend);
208 AddReverb(R2);
209 AddReverb(R2_unbounded);
210 } else {
211 const float echo_path_gain =
212 GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/true);
213
214 // When there is saturated echo, assume the same spectral content as is
215 // present in the microphone signal.
216 if (aec_state.SaturatedEcho()) {
217 for (size_t ch = 0; ch < num_capture_channels; ++ch) {
218 std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
219 std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin());
220 }
221 } else {
222 // Estimate the echo generating signal power.
223 std::array<float, kFftLengthBy2Plus1> X2;
224 EchoGeneratingPower(num_render_channels_,
225 render_buffer.GetSpectrumBuffer(), config_.echo_model,
226 aec_state.MinDirectPathFilterDelay(), X2);
227 if (!aec_state.UseStationarityProperties()) {
228 ApplyNoiseGate(config_.echo_model, X2);
229 }
230
231 // Subtract the stationary noise power to avoid stationary noise causing
232 // excessive echo suppression.
233 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
234 X2[k] -= config_.echo_model.stationary_gate_slope * X2_noise_floor_[k];
235 X2[k] = std::max(0.f, X2[k]);
236 }
237
238 NonLinearEstimate(echo_path_gain, X2, R2);
239 NonLinearEstimate(echo_path_gain, X2, R2_unbounded);
240 }
241
242 if (config_.echo_model.model_reverb_in_nonlinear_mode &&
243 !aec_state.TransparentModeActive()) {
244 UpdateReverb(ReverbType::kNonLinear, aec_state, render_buffer,
245 dominant_nearend);
246 AddReverb(R2);
247 AddReverb(R2_unbounded);
248 }
249 }
250
251 if (aec_state.UseStationarityProperties()) {
252 // Scale the echo according to echo audibility.
253 std::array<float, kFftLengthBy2Plus1> residual_scaling;
254 aec_state.GetResidualEchoScaling(residual_scaling);
255 for (size_t ch = 0; ch < num_capture_channels; ++ch) {
256 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
257 R2[ch][k] *= residual_scaling[k];
258 R2_unbounded[ch][k] *= residual_scaling[k];
259 }
260 }
261 }
262 }
263
Reset()264 void ResidualEchoEstimator::Reset() {
265 echo_reverb_.Reset();
266 X2_noise_floor_counter_.fill(config_.echo_model.noise_floor_hold);
267 X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power);
268 }
269
UpdateRenderNoisePower(const RenderBuffer & render_buffer)270 void ResidualEchoEstimator::UpdateRenderNoisePower(
271 const RenderBuffer& render_buffer) {
272 std::array<float, kFftLengthBy2Plus1> render_power_data;
273 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> X2 =
274 render_buffer.Spectrum(0);
275 rtc::ArrayView<const float, kFftLengthBy2Plus1> render_power =
276 X2[/*channel=*/0];
277 if (num_render_channels_ > 1) {
278 render_power_data.fill(0.f);
279 for (size_t ch = 0; ch < num_render_channels_; ++ch) {
280 const auto& channel_power = X2[ch];
281 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
282 render_power_data[k] += channel_power[k];
283 }
284 }
285 render_power = render_power_data;
286 }
287
288 // Estimate the stationary noise power in a minimum statistics manner.
289 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
290 // Decrease rapidly.
291 if (render_power[k] < X2_noise_floor_[k]) {
292 X2_noise_floor_[k] = render_power[k];
293 X2_noise_floor_counter_[k] = 0;
294 } else {
295 // Increase in a delayed, leaky manner.
296 if (X2_noise_floor_counter_[k] >=
297 static_cast<int>(config_.echo_model.noise_floor_hold)) {
298 X2_noise_floor_[k] = std::max(X2_noise_floor_[k] * 1.1f,
299 config_.echo_model.min_noise_floor_power);
300 } else {
301 ++X2_noise_floor_counter_[k];
302 }
303 }
304 }
305 }
306
307 // Updates the reverb estimation.
UpdateReverb(ReverbType reverb_type,const AecState & aec_state,const RenderBuffer & render_buffer,bool dominant_nearend)308 void ResidualEchoEstimator::UpdateReverb(ReverbType reverb_type,
309 const AecState& aec_state,
310 const RenderBuffer& render_buffer,
311 bool dominant_nearend) {
312 // Choose reverb partition based on what type of echo power model is used.
313 const size_t first_reverb_partition =
314 reverb_type == ReverbType::kLinear
315 ? aec_state.FilterLengthBlocks() + 1
316 : aec_state.MinDirectPathFilterDelay() + 1;
317
318 // Compute render power for the reverb.
319 std::array<float, kFftLengthBy2Plus1> render_power_data;
320 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> X2 =
321 render_buffer.Spectrum(first_reverb_partition);
322 rtc::ArrayView<const float, kFftLengthBy2Plus1> render_power =
323 X2[/*channel=*/0];
324 if (num_render_channels_ > 1) {
325 render_power_data.fill(0.f);
326 for (size_t ch = 0; ch < num_render_channels_; ++ch) {
327 const auto& channel_power = X2[ch];
328 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
329 render_power_data[k] += channel_power[k];
330 }
331 }
332 render_power = render_power_data;
333 }
334
335 // Update the reverb estimate.
336 float reverb_decay = aec_state.ReverbDecay(/*mild=*/dominant_nearend);
337 if (reverb_type == ReverbType::kLinear) {
338 echo_reverb_.UpdateReverb(
339 render_power, aec_state.GetReverbFrequencyResponse(), reverb_decay);
340 } else {
341 const float echo_path_gain =
342 GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/false);
343 echo_reverb_.UpdateReverbNoFreqShaping(render_power, echo_path_gain,
344 reverb_decay);
345 }
346 }
347 // Adds the estimated power of the reverb to the residual echo power.
AddReverb(rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2) const348 void ResidualEchoEstimator::AddReverb(
349 rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) const {
350 const size_t num_capture_channels = R2.size();
351
352 // Add the reverb power.
353 rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
354 echo_reverb_.reverb();
355 for (size_t ch = 0; ch < num_capture_channels; ++ch) {
356 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
357 R2[ch][k] += reverb_power[k];
358 }
359 }
360 }
361
362 // Chooses the echo path gain to use.
GetEchoPathGain(const AecState & aec_state,bool gain_for_early_reflections) const363 float ResidualEchoEstimator::GetEchoPathGain(
364 const AecState& aec_state,
365 bool gain_for_early_reflections) const {
366 float gain_amplitude;
367 if (aec_state.TransparentModeActive()) {
368 gain_amplitude = gain_for_early_reflections
369 ? early_reflections_transparent_mode_gain_
370 : late_reflections_transparent_mode_gain_;
371 } else {
372 gain_amplitude = gain_for_early_reflections
373 ? early_reflections_general_gain_
374 : late_reflections_general_gain_;
375 }
376 return gain_amplitude * gain_amplitude;
377 }
378
379 } // namespace webrtc
380