1 /*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/agc2/speech_level_estimator.h"
12
13 #include <memory>
14
15 #include "modules/audio_processing/agc2/agc2_common.h"
16 #include "modules/audio_processing/include/audio_processing.h"
17 #include "modules/audio_processing/logging/apm_data_dumper.h"
18 #include "rtc_base/gunit.h"
19
20 namespace webrtc {
21 namespace {
22
23 using AdaptiveDigitalConfig =
24 AudioProcessing::Config::GainController2::AdaptiveDigital;
25
26 // Number of speech frames that the level estimator must observe in order to
27 // become confident about the estimated level.
28 constexpr int kNumFramesToConfidence =
29 kLevelEstimatorTimeToConfidenceMs / kFrameDurationMs;
30 static_assert(kNumFramesToConfidence > 0, "");
31
32 constexpr float kConvergenceSpeedTestsLevelTolerance = 0.5f;
33
34 // Provides the `vad_level` value `num_iterations` times to `level_estimator`.
RunOnConstantLevel(int num_iterations,float rms_dbfs,float peak_dbfs,float speech_probability,SpeechLevelEstimator & level_estimator)35 void RunOnConstantLevel(int num_iterations,
36 float rms_dbfs,
37 float peak_dbfs,
38 float speech_probability,
39 SpeechLevelEstimator& level_estimator) {
40 for (int i = 0; i < num_iterations; ++i) {
41 level_estimator.Update(rms_dbfs, peak_dbfs, speech_probability);
42 }
43 }
44
GetAdaptiveDigitalConfig(int adjacent_speech_frames_threshold)45 constexpr AdaptiveDigitalConfig GetAdaptiveDigitalConfig(
46 int adjacent_speech_frames_threshold) {
47 AdaptiveDigitalConfig config;
48 config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold;
49 return config;
50 }
51
52 constexpr float kNoSpeechProbability = 0.0f;
53 constexpr float kLowSpeechProbability = kVadConfidenceThreshold / 2.0f;
54 constexpr float kMaxSpeechProbability = 1.0f;
55
56 // Level estimator with data dumper.
57 struct TestLevelEstimator {
TestLevelEstimatorwebrtc::__anonddce31470111::TestLevelEstimator58 explicit TestLevelEstimator(int adjacent_speech_frames_threshold)
59 : data_dumper(0),
60 estimator(std::make_unique<SpeechLevelEstimator>(
61 &data_dumper,
62 GetAdaptiveDigitalConfig(adjacent_speech_frames_threshold))),
63 initial_speech_level_dbfs(estimator->level_dbfs()),
64 level_rms_dbfs(initial_speech_level_dbfs / 2.0f),
65 level_peak_dbfs(initial_speech_level_dbfs / 3.0f) {
66 RTC_DCHECK_LT(level_rms_dbfs, level_peak_dbfs);
67 RTC_DCHECK_LT(initial_speech_level_dbfs, level_rms_dbfs);
68 RTC_DCHECK_GT(level_rms_dbfs - initial_speech_level_dbfs, 5.0f)
69 << "Adjust `level_rms_dbfs` so that the difference from the initial "
70 "level is wide enough for the tests";
71 }
72 ApmDataDumper data_dumper;
73 std::unique_ptr<SpeechLevelEstimator> estimator;
74 const float initial_speech_level_dbfs;
75 const float level_rms_dbfs;
76 const float level_peak_dbfs;
77 };
78
79 // Checks that the level estimator converges to a constant input speech level.
TEST(GainController2SpeechLevelEstimator,LevelStabilizes)80 TEST(GainController2SpeechLevelEstimator, LevelStabilizes) {
81 TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
82 RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
83 level_estimator.level_rms_dbfs,
84 level_estimator.level_peak_dbfs, kMaxSpeechProbability,
85 *level_estimator.estimator);
86 const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
87 RunOnConstantLevel(/*num_iterations=*/1, level_estimator.level_rms_dbfs,
88 level_estimator.level_peak_dbfs, kMaxSpeechProbability,
89 *level_estimator.estimator);
90 EXPECT_NEAR(level_estimator.estimator->level_dbfs(), estimated_level_dbfs,
91 0.1f);
92 }
93
94 // Checks that the level controller does not become confident when too few
95 // speech frames are observed.
TEST(GainController2SpeechLevelEstimator,IsNotConfident)96 TEST(GainController2SpeechLevelEstimator, IsNotConfident) {
97 TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
98 RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2,
99 level_estimator.level_rms_dbfs,
100 level_estimator.level_peak_dbfs, kMaxSpeechProbability,
101 *level_estimator.estimator);
102 EXPECT_FALSE(level_estimator.estimator->IsConfident());
103 }
104
105 // Checks that the level controller becomes confident when enough speech frames
106 // are observed.
TEST(GainController2SpeechLevelEstimator,IsConfident)107 TEST(GainController2SpeechLevelEstimator, IsConfident) {
108 TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
109 RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
110 level_estimator.level_rms_dbfs,
111 level_estimator.level_peak_dbfs, kMaxSpeechProbability,
112 *level_estimator.estimator);
113 EXPECT_TRUE(level_estimator.estimator->IsConfident());
114 }
115
116 // Checks that the estimated level is not affected by the level of non-speech
117 // frames.
TEST(GainController2SpeechLevelEstimator,EstimatorIgnoresNonSpeechFrames)118 TEST(GainController2SpeechLevelEstimator, EstimatorIgnoresNonSpeechFrames) {
119 TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
120 // Simulate speech.
121 RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
122 level_estimator.level_rms_dbfs,
123 level_estimator.level_peak_dbfs, kMaxSpeechProbability,
124 *level_estimator.estimator);
125 const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
126 // Simulate full-scale non-speech.
127 RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
128 /*rms_dbfs=*/0.0f, /*peak_dbfs=*/0.0f,
129 kNoSpeechProbability, *level_estimator.estimator);
130 // No estimated level change is expected.
131 EXPECT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
132 estimated_level_dbfs);
133 }
134
135 // Checks the convergence speed of the estimator before it becomes confident.
TEST(GainController2SpeechLevelEstimator,ConvergenceSpeedBeforeConfidence)136 TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedBeforeConfidence) {
137 TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
138 RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
139 level_estimator.level_rms_dbfs,
140 level_estimator.level_peak_dbfs, kMaxSpeechProbability,
141 *level_estimator.estimator);
142 EXPECT_NEAR(level_estimator.estimator->level_dbfs(),
143 level_estimator.level_rms_dbfs,
144 kConvergenceSpeedTestsLevelTolerance);
145 }
146
147 // Checks the convergence speed of the estimator after it becomes confident.
TEST(GainController2SpeechLevelEstimator,ConvergenceSpeedAfterConfidence)148 TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedAfterConfidence) {
149 TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
150 // Reach confidence using the initial level estimate.
151 RunOnConstantLevel(
152 /*num_iterations=*/kNumFramesToConfidence,
153 /*rms_dbfs=*/level_estimator.initial_speech_level_dbfs,
154 /*peak_dbfs=*/level_estimator.initial_speech_level_dbfs + 6.0f,
155 kMaxSpeechProbability, *level_estimator.estimator);
156 // No estimate change should occur, but confidence is achieved.
157 ASSERT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
158 level_estimator.initial_speech_level_dbfs);
159 ASSERT_TRUE(level_estimator.estimator->IsConfident());
160 // After confidence.
161 constexpr float kConvergenceTimeAfterConfidenceNumFrames = 600; // 6 seconds.
162 static_assert(
163 kConvergenceTimeAfterConfidenceNumFrames > kNumFramesToConfidence, "");
164 RunOnConstantLevel(
165 /*num_iterations=*/kConvergenceTimeAfterConfidenceNumFrames,
166 level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs,
167 kMaxSpeechProbability, *level_estimator.estimator);
168 EXPECT_NEAR(level_estimator.estimator->level_dbfs(),
169 level_estimator.level_rms_dbfs,
170 kConvergenceSpeedTestsLevelTolerance);
171 }
172
173 class SpeechLevelEstimatorParametrization
174 : public ::testing::TestWithParam<int> {
175 protected:
adjacent_speech_frames_threshold() const176 int adjacent_speech_frames_threshold() const { return GetParam(); }
177 };
178
TEST_P(SpeechLevelEstimatorParametrization,DoNotAdaptToShortSpeechSegments)179 TEST_P(SpeechLevelEstimatorParametrization, DoNotAdaptToShortSpeechSegments) {
180 TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
181 const float initial_level = level_estimator.estimator->level_dbfs();
182 ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
183 for (int i = 0; i < adjacent_speech_frames_threshold() - 1; ++i) {
184 SCOPED_TRACE(i);
185 level_estimator.estimator->Update(level_estimator.level_rms_dbfs,
186 level_estimator.level_peak_dbfs,
187 kMaxSpeechProbability);
188 EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
189 }
190 level_estimator.estimator->Update(level_estimator.level_rms_dbfs,
191 level_estimator.level_peak_dbfs,
192 kLowSpeechProbability);
193 EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
194 }
195
TEST_P(SpeechLevelEstimatorParametrization,AdaptToEnoughSpeechSegments)196 TEST_P(SpeechLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
197 TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
198 const float initial_level = level_estimator.estimator->level_dbfs();
199 ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
200 for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
201 level_estimator.estimator->Update(level_estimator.level_rms_dbfs,
202 level_estimator.level_peak_dbfs,
203 kMaxSpeechProbability);
204 }
205 EXPECT_LT(initial_level, level_estimator.estimator->level_dbfs());
206 }
207
208 INSTANTIATE_TEST_SUITE_P(GainController2,
209 SpeechLevelEstimatorParametrization,
210 ::testing::Values(1, 9, 17));
211
212 } // namespace
213 } // namespace webrtc
214