1 /*
2 * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/agc2/clipping_predictor.h"
12
13 #include <algorithm>
14 #include <memory>
15
16 #include "common_audio/include/audio_util.h"
17 #include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h"
18 #include "modules/audio_processing/agc2/gain_map_internal.h"
19 #include "rtc_base/checks.h"
20 #include "rtc_base/logging.h"
21 #include "rtc_base/numerics/safe_minmax.h"
22
23 namespace webrtc {
24 namespace {
25
26 constexpr int kClippingPredictorMaxGainChange = 15;
27
28 // Returns an input volume in the [`min_input_volume`, `max_input_volume`] range
29 // that reduces `gain_error_db`, which is a gain error estimated when
30 // `input_volume` was applied, according to a fixed gain map.
ComputeVolumeUpdate(int gain_error_db,int input_volume,int min_input_volume,int max_input_volume)31 int ComputeVolumeUpdate(int gain_error_db,
32 int input_volume,
33 int min_input_volume,
34 int max_input_volume) {
35 RTC_DCHECK_GE(input_volume, 0);
36 RTC_DCHECK_LE(input_volume, max_input_volume);
37 if (gain_error_db == 0) {
38 return input_volume;
39 }
40 int new_volume = input_volume;
41 if (gain_error_db > 0) {
42 while (kGainMap[new_volume] - kGainMap[input_volume] < gain_error_db &&
43 new_volume < max_input_volume) {
44 ++new_volume;
45 }
46 } else {
47 while (kGainMap[new_volume] - kGainMap[input_volume] > gain_error_db &&
48 new_volume > min_input_volume) {
49 --new_volume;
50 }
51 }
52 return new_volume;
53 }
54
ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level & level)55 float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) {
56 const float crest_factor =
57 FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average));
58 return crest_factor;
59 }
60
61 // Crest factor-based clipping prediction and clipped level step estimation.
62 class ClippingEventPredictor : public ClippingPredictor {
63 public:
64 // ClippingEventPredictor with `num_channels` channels (limited to values
65 // higher than zero); window size `window_length` and reference window size
66 // `reference_window_length` (both referring to the number of frames in the
67 // respective sliding windows and limited to values higher than zero);
68 // reference window delay `reference_window_delay` (delay in frames, limited
69 // to values zero and higher with an additional requirement of
70 // `window_length` < `reference_window_length` + reference_window_delay`);
71 // and an estimation peak threshold `clipping_threshold` and a crest factor
72 // drop threshold `crest_factor_margin` (both in dB).
ClippingEventPredictor(int num_channels,int window_length,int reference_window_length,int reference_window_delay,float clipping_threshold,float crest_factor_margin)73 ClippingEventPredictor(int num_channels,
74 int window_length,
75 int reference_window_length,
76 int reference_window_delay,
77 float clipping_threshold,
78 float crest_factor_margin)
79 : window_length_(window_length),
80 reference_window_length_(reference_window_length),
81 reference_window_delay_(reference_window_delay),
82 clipping_threshold_(clipping_threshold),
83 crest_factor_margin_(crest_factor_margin) {
84 RTC_DCHECK_GT(num_channels, 0);
85 RTC_DCHECK_GT(window_length, 0);
86 RTC_DCHECK_GT(reference_window_length, 0);
87 RTC_DCHECK_GE(reference_window_delay, 0);
88 RTC_DCHECK_GT(reference_window_length + reference_window_delay,
89 window_length);
90 const int buffer_length = GetMinFramesProcessed();
91 RTC_DCHECK_GT(buffer_length, 0);
92 for (int i = 0; i < num_channels; ++i) {
93 ch_buffers_.push_back(
94 std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
95 }
96 }
97
98 ClippingEventPredictor(const ClippingEventPredictor&) = delete;
99 ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete;
~ClippingEventPredictor()100 ~ClippingEventPredictor() {}
101
Reset()102 void Reset() {
103 const int num_channels = ch_buffers_.size();
104 for (int i = 0; i < num_channels; ++i) {
105 ch_buffers_[i]->Reset();
106 }
107 }
108
109 // Analyzes a frame of audio and stores the framewise metrics in
110 // `ch_buffers_`.
Analyze(const AudioFrameView<const float> & frame)111 void Analyze(const AudioFrameView<const float>& frame) {
112 const int num_channels = frame.num_channels();
113 RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
114 const int samples_per_channel = frame.samples_per_channel();
115 RTC_DCHECK_GT(samples_per_channel, 0);
116 for (int channel = 0; channel < num_channels; ++channel) {
117 float sum_squares = 0.0f;
118 float peak = 0.0f;
119 for (const auto& sample : frame.channel(channel)) {
120 sum_squares += sample * sample;
121 peak = std::max(std::fabs(sample), peak);
122 }
123 ch_buffers_[channel]->Push(
124 {sum_squares / static_cast<float>(samples_per_channel), peak});
125 }
126 }
127
128 // Estimates the analog gain adjustment for channel `channel` using a
129 // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
130 // estimate for the clipped level step equal to `default_clipped_level_step_`
131 // if at least `GetMinFramesProcessed()` frames have been processed since the
132 // last reset and a clipping event is predicted. `level`, `min_mic_level`, and
133 // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
EstimateClippedLevelStep(int channel,int level,int default_step,int min_mic_level,int max_mic_level) const134 absl::optional<int> EstimateClippedLevelStep(int channel,
135 int level,
136 int default_step,
137 int min_mic_level,
138 int max_mic_level) const {
139 RTC_CHECK_GE(channel, 0);
140 RTC_CHECK_LT(channel, ch_buffers_.size());
141 RTC_DCHECK_GE(level, 0);
142 RTC_DCHECK_LE(level, 255);
143 RTC_DCHECK_GT(default_step, 0);
144 RTC_DCHECK_LE(default_step, 255);
145 RTC_DCHECK_GE(min_mic_level, 0);
146 RTC_DCHECK_LE(min_mic_level, 255);
147 RTC_DCHECK_GE(max_mic_level, 0);
148 RTC_DCHECK_LE(max_mic_level, 255);
149 if (level <= min_mic_level) {
150 return absl::nullopt;
151 }
152 if (PredictClippingEvent(channel)) {
153 const int new_level =
154 rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level);
155 const int step = level - new_level;
156 if (step > 0) {
157 return step;
158 }
159 }
160 return absl::nullopt;
161 }
162
163 private:
GetMinFramesProcessed() const164 int GetMinFramesProcessed() const {
165 return reference_window_delay_ + reference_window_length_;
166 }
167
168 // Predicts clipping events based on the processed audio frames. Returns
169 // true if a clipping event is likely.
PredictClippingEvent(int channel) const170 bool PredictClippingEvent(int channel) const {
171 const auto metrics =
172 ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
173 if (!metrics.has_value() ||
174 !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
175 return false;
176 }
177 const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
178 reference_window_delay_, reference_window_length_);
179 if (!reference_metrics.has_value()) {
180 return false;
181 }
182 const float crest_factor = ComputeCrestFactor(metrics.value());
183 const float reference_crest_factor =
184 ComputeCrestFactor(reference_metrics.value());
185 if (crest_factor < reference_crest_factor - crest_factor_margin_) {
186 return true;
187 }
188 return false;
189 }
190
191 std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
192 const int window_length_;
193 const int reference_window_length_;
194 const int reference_window_delay_;
195 const float clipping_threshold_;
196 const float crest_factor_margin_;
197 };
198
199 // Performs crest factor-based clipping peak prediction.
200 class ClippingPeakPredictor : public ClippingPredictor {
201 public:
202 // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values
203 // higher than zero); window size `window_length` and reference window size
204 // `reference_window_length` (both referring to the number of frames in the
205 // respective sliding windows and limited to values higher than zero);
206 // reference window delay `reference_window_delay` (delay in frames, limited
207 // to values zero and higher with an additional requirement of
208 // `window_length` < `reference_window_length` + reference_window_delay`);
209 // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive
210 // clipped level step estimation is used if `adaptive_step_estimation` is
211 // true.
ClippingPeakPredictor(int num_channels,int window_length,int reference_window_length,int reference_window_delay,int clipping_threshold,bool adaptive_step_estimation)212 explicit ClippingPeakPredictor(int num_channels,
213 int window_length,
214 int reference_window_length,
215 int reference_window_delay,
216 int clipping_threshold,
217 bool adaptive_step_estimation)
218 : window_length_(window_length),
219 reference_window_length_(reference_window_length),
220 reference_window_delay_(reference_window_delay),
221 clipping_threshold_(clipping_threshold),
222 adaptive_step_estimation_(adaptive_step_estimation) {
223 RTC_DCHECK_GT(num_channels, 0);
224 RTC_DCHECK_GT(window_length, 0);
225 RTC_DCHECK_GT(reference_window_length, 0);
226 RTC_DCHECK_GE(reference_window_delay, 0);
227 RTC_DCHECK_GT(reference_window_length + reference_window_delay,
228 window_length);
229 const int buffer_length = GetMinFramesProcessed();
230 RTC_DCHECK_GT(buffer_length, 0);
231 for (int i = 0; i < num_channels; ++i) {
232 ch_buffers_.push_back(
233 std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
234 }
235 }
236
237 ClippingPeakPredictor(const ClippingPeakPredictor&) = delete;
238 ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete;
~ClippingPeakPredictor()239 ~ClippingPeakPredictor() {}
240
Reset()241 void Reset() {
242 const int num_channels = ch_buffers_.size();
243 for (int i = 0; i < num_channels; ++i) {
244 ch_buffers_[i]->Reset();
245 }
246 }
247
248 // Analyzes a frame of audio and stores the framewise metrics in
249 // `ch_buffers_`.
Analyze(const AudioFrameView<const float> & frame)250 void Analyze(const AudioFrameView<const float>& frame) {
251 const int num_channels = frame.num_channels();
252 RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
253 const int samples_per_channel = frame.samples_per_channel();
254 RTC_DCHECK_GT(samples_per_channel, 0);
255 for (int channel = 0; channel < num_channels; ++channel) {
256 float sum_squares = 0.0f;
257 float peak = 0.0f;
258 for (const auto& sample : frame.channel(channel)) {
259 sum_squares += sample * sample;
260 peak = std::max(std::fabs(sample), peak);
261 }
262 ch_buffers_[channel]->Push(
263 {sum_squares / static_cast<float>(samples_per_channel), peak});
264 }
265 }
266
267 // Estimates the analog gain adjustment for channel `channel` using a
268 // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
269 // estimate for the clipped level step (equal to
270 // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at
271 // least `GetMinFramesProcessed()` frames have been processed since the last
272 // reset and a clipping event is predicted. `level`, `min_mic_level`, and
273 // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
EstimateClippedLevelStep(int channel,int level,int default_step,int min_mic_level,int max_mic_level) const274 absl::optional<int> EstimateClippedLevelStep(int channel,
275 int level,
276 int default_step,
277 int min_mic_level,
278 int max_mic_level) const {
279 RTC_DCHECK_GE(channel, 0);
280 RTC_DCHECK_LT(channel, ch_buffers_.size());
281 RTC_DCHECK_GE(level, 0);
282 RTC_DCHECK_LE(level, 255);
283 RTC_DCHECK_GT(default_step, 0);
284 RTC_DCHECK_LE(default_step, 255);
285 RTC_DCHECK_GE(min_mic_level, 0);
286 RTC_DCHECK_LE(min_mic_level, 255);
287 RTC_DCHECK_GE(max_mic_level, 0);
288 RTC_DCHECK_LE(max_mic_level, 255);
289 if (level <= min_mic_level) {
290 return absl::nullopt;
291 }
292 absl::optional<float> estimate_db = EstimatePeakValue(channel);
293 if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
294 int step = 0;
295 if (!adaptive_step_estimation_) {
296 step = default_step;
297 } else {
298 const int estimated_gain_change =
299 rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())),
300 -kClippingPredictorMaxGainChange, 0);
301 step =
302 std::max(level - ComputeVolumeUpdate(estimated_gain_change, level,
303 min_mic_level, max_mic_level),
304 default_step);
305 }
306 const int new_level =
307 rtc::SafeClamp(level - step, min_mic_level, max_mic_level);
308 if (level > new_level) {
309 return level - new_level;
310 }
311 }
312 return absl::nullopt;
313 }
314
315 private:
GetMinFramesProcessed()316 int GetMinFramesProcessed() {
317 return reference_window_delay_ + reference_window_length_;
318 }
319
320 // Predicts clipping sample peaks based on the processed audio frames.
321 // Returns the estimated peak value if clipping is predicted. Otherwise
322 // returns absl::nullopt.
EstimatePeakValue(int channel) const323 absl::optional<float> EstimatePeakValue(int channel) const {
324 const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
325 reference_window_delay_, reference_window_length_);
326 if (!reference_metrics.has_value()) {
327 return absl::nullopt;
328 }
329 const auto metrics =
330 ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
331 if (!metrics.has_value() ||
332 !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
333 return absl::nullopt;
334 }
335 const float reference_crest_factor =
336 ComputeCrestFactor(reference_metrics.value());
337 const float& mean_squares = metrics.value().average;
338 const float projected_peak =
339 reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares));
340 return projected_peak;
341 }
342
343 std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
344 const int window_length_;
345 const int reference_window_length_;
346 const int reference_window_delay_;
347 const int clipping_threshold_;
348 const bool adaptive_step_estimation_;
349 };
350
351 } // namespace
352
CreateClippingPredictor(int num_channels,const AudioProcessing::Config::GainController1::AnalogGainController::ClippingPredictor & config)353 std::unique_ptr<ClippingPredictor> CreateClippingPredictor(
354 int num_channels,
355 const AudioProcessing::Config::GainController1::AnalogGainController::
356 ClippingPredictor& config) {
357 if (!config.enabled) {
358 RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction disabled.";
359 return nullptr;
360 }
361 RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction enabled.";
362 using ClippingPredictorMode = AudioProcessing::Config::GainController1::
363 AnalogGainController::ClippingPredictor::Mode;
364 switch (config.mode) {
365 case ClippingPredictorMode::kClippingEventPrediction:
366 return std::make_unique<ClippingEventPredictor>(
367 num_channels, config.window_length, config.reference_window_length,
368 config.reference_window_delay, config.clipping_threshold,
369 config.crest_factor_margin);
370 case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction:
371 return std::make_unique<ClippingPeakPredictor>(
372 num_channels, config.window_length, config.reference_window_length,
373 config.reference_window_delay, config.clipping_threshold,
374 /*adaptive_step_estimation=*/true);
375 case ClippingPredictorMode::kFixedStepClippingPeakPrediction:
376 return std::make_unique<ClippingPeakPredictor>(
377 num_channels, config.window_length, config.reference_window_length,
378 config.reference_window_delay, config.clipping_threshold,
379 /*adaptive_step_estimation=*/false);
380 }
381 RTC_DCHECK_NOTREACHED();
382 }
383
384 } // namespace webrtc
385