xref: /aosp_15_r20/external/webrtc/modules/audio_processing/agc2/speech_level_estimator_unittest.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/agc2/speech_level_estimator.h"
12 
13 #include <memory>
14 
15 #include "modules/audio_processing/agc2/agc2_common.h"
16 #include "modules/audio_processing/include/audio_processing.h"
17 #include "modules/audio_processing/logging/apm_data_dumper.h"
18 #include "rtc_base/gunit.h"
19 
20 namespace webrtc {
21 namespace {
22 
23 using AdaptiveDigitalConfig =
24     AudioProcessing::Config::GainController2::AdaptiveDigital;
25 
26 // Number of speech frames that the level estimator must observe in order to
27 // become confident about the estimated level.
28 constexpr int kNumFramesToConfidence =
29     kLevelEstimatorTimeToConfidenceMs / kFrameDurationMs;
30 static_assert(kNumFramesToConfidence > 0, "");
31 
32 constexpr float kConvergenceSpeedTestsLevelTolerance = 0.5f;
33 
34 // Provides the `vad_level` value `num_iterations` times to `level_estimator`.
RunOnConstantLevel(int num_iterations,float rms_dbfs,float peak_dbfs,float speech_probability,SpeechLevelEstimator & level_estimator)35 void RunOnConstantLevel(int num_iterations,
36                         float rms_dbfs,
37                         float peak_dbfs,
38                         float speech_probability,
39                         SpeechLevelEstimator& level_estimator) {
40   for (int i = 0; i < num_iterations; ++i) {
41     level_estimator.Update(rms_dbfs, peak_dbfs, speech_probability);
42   }
43 }
44 
GetAdaptiveDigitalConfig(int adjacent_speech_frames_threshold)45 constexpr AdaptiveDigitalConfig GetAdaptiveDigitalConfig(
46     int adjacent_speech_frames_threshold) {
47   AdaptiveDigitalConfig config;
48   config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold;
49   return config;
50 }
51 
52 constexpr float kNoSpeechProbability = 0.0f;
53 constexpr float kLowSpeechProbability = kVadConfidenceThreshold / 2.0f;
54 constexpr float kMaxSpeechProbability = 1.0f;
55 
56 // Level estimator with data dumper.
57 struct TestLevelEstimator {
TestLevelEstimatorwebrtc::__anonddce31470111::TestLevelEstimator58   explicit TestLevelEstimator(int adjacent_speech_frames_threshold)
59       : data_dumper(0),
60         estimator(std::make_unique<SpeechLevelEstimator>(
61             &data_dumper,
62             GetAdaptiveDigitalConfig(adjacent_speech_frames_threshold))),
63         initial_speech_level_dbfs(estimator->level_dbfs()),
64         level_rms_dbfs(initial_speech_level_dbfs / 2.0f),
65         level_peak_dbfs(initial_speech_level_dbfs / 3.0f) {
66     RTC_DCHECK_LT(level_rms_dbfs, level_peak_dbfs);
67     RTC_DCHECK_LT(initial_speech_level_dbfs, level_rms_dbfs);
68     RTC_DCHECK_GT(level_rms_dbfs - initial_speech_level_dbfs, 5.0f)
69         << "Adjust `level_rms_dbfs` so that the difference from the initial "
70            "level is wide enough for the tests";
71   }
72   ApmDataDumper data_dumper;
73   std::unique_ptr<SpeechLevelEstimator> estimator;
74   const float initial_speech_level_dbfs;
75   const float level_rms_dbfs;
76   const float level_peak_dbfs;
77 };
78 
79 // Checks that the level estimator converges to a constant input speech level.
TEST(GainController2SpeechLevelEstimator,LevelStabilizes)80 TEST(GainController2SpeechLevelEstimator, LevelStabilizes) {
81   TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
82   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
83                      level_estimator.level_rms_dbfs,
84                      level_estimator.level_peak_dbfs, kMaxSpeechProbability,
85                      *level_estimator.estimator);
86   const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
87   RunOnConstantLevel(/*num_iterations=*/1, level_estimator.level_rms_dbfs,
88                      level_estimator.level_peak_dbfs, kMaxSpeechProbability,
89                      *level_estimator.estimator);
90   EXPECT_NEAR(level_estimator.estimator->level_dbfs(), estimated_level_dbfs,
91               0.1f);
92 }
93 
94 // Checks that the level controller does not become confident when too few
95 // speech frames are observed.
TEST(GainController2SpeechLevelEstimator,IsNotConfident)96 TEST(GainController2SpeechLevelEstimator, IsNotConfident) {
97   TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
98   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2,
99                      level_estimator.level_rms_dbfs,
100                      level_estimator.level_peak_dbfs, kMaxSpeechProbability,
101                      *level_estimator.estimator);
102   EXPECT_FALSE(level_estimator.estimator->IsConfident());
103 }
104 
105 // Checks that the level controller becomes confident when enough speech frames
106 // are observed.
TEST(GainController2SpeechLevelEstimator,IsConfident)107 TEST(GainController2SpeechLevelEstimator, IsConfident) {
108   TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
109   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
110                      level_estimator.level_rms_dbfs,
111                      level_estimator.level_peak_dbfs, kMaxSpeechProbability,
112                      *level_estimator.estimator);
113   EXPECT_TRUE(level_estimator.estimator->IsConfident());
114 }
115 
116 // Checks that the estimated level is not affected by the level of non-speech
117 // frames.
TEST(GainController2SpeechLevelEstimator,EstimatorIgnoresNonSpeechFrames)118 TEST(GainController2SpeechLevelEstimator, EstimatorIgnoresNonSpeechFrames) {
119   TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
120   // Simulate speech.
121   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
122                      level_estimator.level_rms_dbfs,
123                      level_estimator.level_peak_dbfs, kMaxSpeechProbability,
124                      *level_estimator.estimator);
125   const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
126   // Simulate full-scale non-speech.
127   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
128                      /*rms_dbfs=*/0.0f, /*peak_dbfs=*/0.0f,
129                      kNoSpeechProbability, *level_estimator.estimator);
130   // No estimated level change is expected.
131   EXPECT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
132                   estimated_level_dbfs);
133 }
134 
135 // Checks the convergence speed of the estimator before it becomes confident.
TEST(GainController2SpeechLevelEstimator,ConvergenceSpeedBeforeConfidence)136 TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedBeforeConfidence) {
137   TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
138   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
139                      level_estimator.level_rms_dbfs,
140                      level_estimator.level_peak_dbfs, kMaxSpeechProbability,
141                      *level_estimator.estimator);
142   EXPECT_NEAR(level_estimator.estimator->level_dbfs(),
143               level_estimator.level_rms_dbfs,
144               kConvergenceSpeedTestsLevelTolerance);
145 }
146 
147 // Checks the convergence speed of the estimator after it becomes confident.
TEST(GainController2SpeechLevelEstimator,ConvergenceSpeedAfterConfidence)148 TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedAfterConfidence) {
149   TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
150   // Reach confidence using the initial level estimate.
151   RunOnConstantLevel(
152       /*num_iterations=*/kNumFramesToConfidence,
153       /*rms_dbfs=*/level_estimator.initial_speech_level_dbfs,
154       /*peak_dbfs=*/level_estimator.initial_speech_level_dbfs + 6.0f,
155       kMaxSpeechProbability, *level_estimator.estimator);
156   // No estimate change should occur, but confidence is achieved.
157   ASSERT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
158                   level_estimator.initial_speech_level_dbfs);
159   ASSERT_TRUE(level_estimator.estimator->IsConfident());
160   // After confidence.
161   constexpr float kConvergenceTimeAfterConfidenceNumFrames = 600;  // 6 seconds.
162   static_assert(
163       kConvergenceTimeAfterConfidenceNumFrames > kNumFramesToConfidence, "");
164   RunOnConstantLevel(
165       /*num_iterations=*/kConvergenceTimeAfterConfidenceNumFrames,
166       level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs,
167       kMaxSpeechProbability, *level_estimator.estimator);
168   EXPECT_NEAR(level_estimator.estimator->level_dbfs(),
169               level_estimator.level_rms_dbfs,
170               kConvergenceSpeedTestsLevelTolerance);
171 }
172 
173 class SpeechLevelEstimatorParametrization
174     : public ::testing::TestWithParam<int> {
175  protected:
adjacent_speech_frames_threshold() const176   int adjacent_speech_frames_threshold() const { return GetParam(); }
177 };
178 
TEST_P(SpeechLevelEstimatorParametrization,DoNotAdaptToShortSpeechSegments)179 TEST_P(SpeechLevelEstimatorParametrization, DoNotAdaptToShortSpeechSegments) {
180   TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
181   const float initial_level = level_estimator.estimator->level_dbfs();
182   ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
183   for (int i = 0; i < adjacent_speech_frames_threshold() - 1; ++i) {
184     SCOPED_TRACE(i);
185     level_estimator.estimator->Update(level_estimator.level_rms_dbfs,
186                                       level_estimator.level_peak_dbfs,
187                                       kMaxSpeechProbability);
188     EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
189   }
190   level_estimator.estimator->Update(level_estimator.level_rms_dbfs,
191                                     level_estimator.level_peak_dbfs,
192                                     kLowSpeechProbability);
193   EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
194 }
195 
TEST_P(SpeechLevelEstimatorParametrization,AdaptToEnoughSpeechSegments)196 TEST_P(SpeechLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
197   TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
198   const float initial_level = level_estimator.estimator->level_dbfs();
199   ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
200   for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
201     level_estimator.estimator->Update(level_estimator.level_rms_dbfs,
202                                       level_estimator.level_peak_dbfs,
203                                       kMaxSpeechProbability);
204   }
205   EXPECT_LT(initial_level, level_estimator.estimator->level_dbfs());
206 }
207 
208 INSTANTIATE_TEST_SUITE_P(GainController2,
209                          SpeechLevelEstimatorParametrization,
210                          ::testing::Values(1, 9, 17));
211 
212 }  // namespace
213 }  // namespace webrtc
214