xref: /aosp_15_r20/external/webrtc/modules/audio_processing/agc2/input_volume_controller.h (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_CONTROLLER_H_
12 #define MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_CONTROLLER_H_
13 
14 #include <memory>
15 #include <vector>
16 
17 #include "absl/types/optional.h"
18 #include "api/array_view.h"
19 #include "modules/audio_processing/agc2/clipping_predictor.h"
20 #include "modules/audio_processing/audio_buffer.h"
21 #include "modules/audio_processing/include/audio_processing.h"
22 #include "rtc_base/gtest_prod_util.h"
23 
24 namespace webrtc {
25 
26 class MonoInputVolumeController;
27 
28 // The input volume controller recommends what volume to use, handles volume
29 // changes and clipping detection and prediction. In particular, it handles
30 // changes triggered by the user (e.g., volume set to zero by a HW mute button).
31 // This class is not thread-safe.
32 // TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming
33 // convention.
34 class InputVolumeController final {
35  public:
36   // Config for the constructor.
37   struct Config {
38     // Lowest input volume level that will be applied in response to clipping.
39     int clipped_level_min = 70;
40     // Amount input volume level is lowered with every clipping event. Limited
41     // to (0, 255].
42     int clipped_level_step = 15;
43     // Proportion of clipped samples required to declare a clipping event.
44     // Limited to (0.0f, 1.0f).
45     float clipped_ratio_threshold = 0.1f;
46     // Time in frames to wait after a clipping event before checking again.
47     // Limited to values higher than 0.
48     int clipped_wait_frames = 300;
49     // Enables clipping prediction functionality.
50     bool enable_clipping_predictor = false;
51     // Speech level target range (dBFS). If the speech level is in the range
52     // [`target_range_min_dbfs`, `target_range_max_dbfs`], no input volume
53     // adjustments are done based on the speech level. For speech levels below
54     // and above the range, the targets `target_range_min_dbfs` and
55     // `target_range_max_dbfs` are used, respectively. The example values
56     // `target_range_max_dbfs` -18 and `target_range_min_dbfs` -48 refer to a
57     // configuration where the zero-digital-gain target is -18 dBFS and the
58     // digital gain control is expected to compensate for speech level errors
59     // up to -30 dB.
60     int target_range_max_dbfs = -18;
61     int target_range_min_dbfs = -48;
62     // Number of wait frames between the recommended input volume updates.
63     int update_input_volume_wait_frames = 100;
64     // Speech probability threshold: speech probabilities below the threshold
65     // are considered silence. Limited to [0.0f, 1.0f].
66     float speech_probability_threshold = 0.7f;
67     // Minimum speech frame ratio for volume updates to be allowed. Limited to
68     // [0.0f, 1.0f].
69     float speech_ratio_threshold = 0.9f;
70   };
71 
72   // Ctor. `num_capture_channels` specifies the number of channels for the audio
73   // passed to `AnalyzePreProcess()` and `Process()`. Clamps
74   // `config.startup_min_level` in the [12, 255] range.
75   InputVolumeController(int num_capture_channels, const Config& config);
76 
77   ~InputVolumeController();
78   InputVolumeController(const InputVolumeController&) = delete;
79   InputVolumeController& operator=(const InputVolumeController&) = delete;
80 
81   // TODO(webrtc:7494): Integrate initialization into ctor and remove.
82   void Initialize();
83 
84   // Sets the applied input volume.
85   void set_stream_analog_level(int level);
86 
87   // TODO(bugs.webrtc.org/7494): Add argument for the applied input volume and
88   // remove `set_stream_analog_level()`.
89   // Analyzes `audio` before `Process()` is called so that the analysis can be
90   // performed before digital processing operations take place (e.g., echo
91   // cancellation). The analysis consists of input clipping detection and
92   // prediction (if enabled). Must be called after `set_stream_analog_level()`.
93   void AnalyzePreProcess(const AudioBuffer& audio_buffer);
94 
95   // TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
96   // Adjusts the recommended input volume upwards/downwards based on
97   // `speech_level_dbfs`. Must be called after `AnalyzePreProcess()`. The value
98   // of `speech_probability` is expected to be in the range [0.0f, 1.0f] and
99   // `speech_level_dbfs` in the the range [-90.f, 30.0f].
100   void Process(float speech_probability,
101                absl::optional<float> speech_level_dbfs);
102 
103   // TODO(bugs.webrtc.org/7494): Return recommended input volume and remove
104   // `recommended_analog_level()`.
105   // Returns the recommended input volume. If the input volume contoller is
106   // disabled, returns the input volume set via the latest
107   // `set_stream_analog_level()` call. Must be called after
108   // `AnalyzePreProcess()` and `Process()`.
recommended_analog_level()109   int recommended_analog_level() const { return recommended_input_volume_; }
110 
111   // Stores whether the capture output will be used or not. Call when the
112   // capture stream output has been flagged to be used/not-used. If unused, the
113   // controller disregards all incoming audio.
114   void HandleCaptureOutputUsedChange(bool capture_output_used);
115 
116   // Returns true if clipping prediction is enabled.
117   // TODO(bugs.webrtc.org/7494): Deprecate this method.
clipping_predictor_enabled()118   bool clipping_predictor_enabled() const { return !!clipping_predictor_; }
119 
120   // Returns true if clipping prediction is used to adjust the input volume.
121   // TODO(bugs.webrtc.org/7494): Deprecate this method.
use_clipping_predictor_step()122   bool use_clipping_predictor_step() const {
123     return use_clipping_predictor_step_;
124   }
125 
126  private:
127   friend class InputVolumeControllerTestHelper;
128 
129   FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, MinInputVolumeDefault);
130   FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, MinInputVolumeDisabled);
131   FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest,
132                            MinInputVolumeOutOfRangeAbove);
133   FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest,
134                            MinInputVolumeOutOfRangeBelow);
135   FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, MinInputVolumeEnabled50);
136   FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerParametrizedTest,
137                            ClippingParametersVerified);
138 
139   void AggregateChannelLevels();
140 
141   const int num_capture_channels_;
142 
143   // Minimum input volume that can be recommended.
144   const int min_input_volume_;
145 
146   // TODO(bugs.webrtc.org/7494): Create a separate member for the applied input
147   // volume.
148   // TODO(bugs.webrtc.org/7494): Once
149   // `AudioProcessingImpl::recommended_stream_analog_level()` becomes a trivial
150   // getter, leave uninitialized.
151   // Recommended input volume. After `set_stream_analog_level()` is called it
152   // holds the observed input volume. Possibly updated by `AnalyzePreProcess()`
153   // and `Process()`; after these calls, holds the recommended input volume.
154   int recommended_input_volume_ = 0;
155 
156   bool capture_output_used_;
157 
158   // Clipping detection and prediction.
159   const int clipped_level_step_;
160   const float clipped_ratio_threshold_;
161   const int clipped_wait_frames_;
162   const std::unique_ptr<ClippingPredictor> clipping_predictor_;
163   const bool use_clipping_predictor_step_;
164   int frames_since_clipped_;
165   int clipping_rate_log_counter_;
166   float clipping_rate_log_;
167 
168   // Target range minimum and maximum. If the seech level is in the range
169   // [`target_range_min_dbfs`, `target_range_max_dbfs`], no volume adjustments
170   // take place. Instead, the digital gain controller is assumed to adapt to
171   // compensate for the speech level RMS error.
172   const int target_range_max_dbfs_;
173   const int target_range_min_dbfs_;
174 
175   // Channel controllers updating the gain upwards/downwards.
176   std::vector<std::unique_ptr<MonoInputVolumeController>> channel_controllers_;
177   int channel_controlling_gain_ = 0;
178 };
179 
180 // TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming
181 // convention.
182 class MonoInputVolumeController {
183  public:
184   MonoInputVolumeController(int min_input_volume_after_clipping,
185                             int min_input_volume,
186                             int update_input_volume_wait_frames,
187                             float speech_probability_threshold,
188                             float speech_ratio_threshold);
189   ~MonoInputVolumeController();
190   MonoInputVolumeController(const MonoInputVolumeController&) = delete;
191   MonoInputVolumeController& operator=(const MonoInputVolumeController&) =
192       delete;
193 
194   void Initialize();
195   void HandleCaptureOutputUsedChange(bool capture_output_used);
196 
197   // Sets the current input volume.
set_stream_analog_level(int input_volume)198   void set_stream_analog_level(int input_volume) {
199     recommended_input_volume_ = input_volume;
200   }
201 
202   // Lowers the recommended input volume in response to clipping based on the
203   // suggested reduction `clipped_level_step`. Must be called after
204   // `set_stream_analog_level()`.
205   void HandleClipping(int clipped_level_step);
206 
207   // TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
208   // Adjusts the recommended input volume upwards/downwards depending on
209   // whether `rms_error_dbfs` is positive or negative. Updates are only allowed
210   // for active speech segments and when `rms_error_dbfs` is not empty. Must be
211   // called after `HandleClipping()`.
212   void Process(absl::optional<int> rms_error_dbfs, float speech_probability);
213 
214   // Returns the recommended input volume. Must be called after `Process()`.
recommended_analog_level()215   int recommended_analog_level() const { return recommended_input_volume_; }
216 
ActivateLogging()217   void ActivateLogging() { log_to_histograms_ = true; }
218 
min_input_volume_after_clipping()219   int min_input_volume_after_clipping() const {
220     return min_input_volume_after_clipping_;
221   }
222 
223   // Only used for testing.
min_input_volume()224   int min_input_volume() const { return min_input_volume_; }
225 
226  private:
227   // Sets a new input volume, after first checking that it hasn't been updated
228   // by the user, in which case no action is taken.
229   void SetInputVolume(int new_volume);
230 
231   // Sets the maximum input volume that the input volume controller is allowed
232   // to apply. The volume must be at least `kClippedLevelMin`.
233   void SetMaxLevel(int level);
234 
235   int CheckVolumeAndReset();
236 
237   // Updates the recommended input volume. If the volume slider needs to be
238   // moved, we check first if the user has adjusted it, in which case we take no
239   // action and cache the updated level.
240   void UpdateInputVolume(int rms_error_dbfs);
241 
242   const int min_input_volume_;
243   const int min_input_volume_after_clipping_;
244   int max_input_volume_;
245 
246   // Last recommended input volume.
247   int input_volume_ = 0;
248 
249   bool capture_output_used_ = true;
250   bool check_volume_on_next_process_ = true;
251   bool startup_ = true;
252 
253   // TODO(bugs.webrtc.org/7494): Create a separate member for the applied
254   // input volume.
255   // Recommended input volume. After `set_stream_analog_level()` is
256   // called, it holds the observed applied input volume. Possibly updated by
257   // `HandleClipping()` and `Process()`; after these calls, holds the
258   // recommended input volume.
259   int recommended_input_volume_ = 0;
260 
261   bool log_to_histograms_ = false;
262 
263   // Counters for frames and speech frames since the last update in the
264   // recommended input volume.
265   const int update_input_volume_wait_frames_;
266   int frames_since_update_input_volume_ = 0;
267   int speech_frames_since_update_input_volume_ = 0;
268   bool is_first_frame_ = true;
269 
270   // Speech probability threshold for a frame to be considered speech (instead
271   // of silence). Limited to [0.0f, 1.0f].
272   const float speech_probability_threshold_;
273   // Minimum ratio of speech frames. Limited to [0.0f, 1.0f].
274   const float speech_ratio_threshold_;
275 };
276 
277 }  // namespace webrtc
278 
279 #endif  // MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_CONTROLLER_H_
280