xref: /aosp_15_r20/external/webrtc/modules/audio_processing/agc2/clipping_predictor.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/agc2/clipping_predictor.h"
12 
13 #include <algorithm>
14 #include <memory>
15 
16 #include "common_audio/include/audio_util.h"
17 #include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h"
18 #include "modules/audio_processing/agc2/gain_map_internal.h"
19 #include "rtc_base/checks.h"
20 #include "rtc_base/logging.h"
21 #include "rtc_base/numerics/safe_minmax.h"
22 
23 namespace webrtc {
24 namespace {
25 
26 constexpr int kClippingPredictorMaxGainChange = 15;
27 
28 // Returns an input volume in the [`min_input_volume`, `max_input_volume`] range
29 // that reduces `gain_error_db`, which is a gain error estimated when
30 // `input_volume` was applied, according to a fixed gain map.
ComputeVolumeUpdate(int gain_error_db,int input_volume,int min_input_volume,int max_input_volume)31 int ComputeVolumeUpdate(int gain_error_db,
32                         int input_volume,
33                         int min_input_volume,
34                         int max_input_volume) {
35   RTC_DCHECK_GE(input_volume, 0);
36   RTC_DCHECK_LE(input_volume, max_input_volume);
37   if (gain_error_db == 0) {
38     return input_volume;
39   }
40   int new_volume = input_volume;
41   if (gain_error_db > 0) {
42     while (kGainMap[new_volume] - kGainMap[input_volume] < gain_error_db &&
43            new_volume < max_input_volume) {
44       ++new_volume;
45     }
46   } else {
47     while (kGainMap[new_volume] - kGainMap[input_volume] > gain_error_db &&
48            new_volume > min_input_volume) {
49       --new_volume;
50     }
51   }
52   return new_volume;
53 }
54 
ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level & level)55 float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) {
56   const float crest_factor =
57       FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average));
58   return crest_factor;
59 }
60 
61 // Crest factor-based clipping prediction and clipped level step estimation.
62 class ClippingEventPredictor : public ClippingPredictor {
63  public:
64   // ClippingEventPredictor with `num_channels` channels (limited to values
65   // higher than zero); window size `window_length` and reference window size
66   // `reference_window_length` (both referring to the number of frames in the
67   // respective sliding windows and limited to values higher than zero);
68   // reference window delay `reference_window_delay` (delay in frames, limited
69   // to values zero and higher with an additional requirement of
70   // `window_length` < `reference_window_length` + reference_window_delay`);
71   // and an estimation peak threshold `clipping_threshold` and a crest factor
72   // drop threshold `crest_factor_margin` (both in dB).
ClippingEventPredictor(int num_channels,int window_length,int reference_window_length,int reference_window_delay,float clipping_threshold,float crest_factor_margin)73   ClippingEventPredictor(int num_channels,
74                          int window_length,
75                          int reference_window_length,
76                          int reference_window_delay,
77                          float clipping_threshold,
78                          float crest_factor_margin)
79       : window_length_(window_length),
80         reference_window_length_(reference_window_length),
81         reference_window_delay_(reference_window_delay),
82         clipping_threshold_(clipping_threshold),
83         crest_factor_margin_(crest_factor_margin) {
84     RTC_DCHECK_GT(num_channels, 0);
85     RTC_DCHECK_GT(window_length, 0);
86     RTC_DCHECK_GT(reference_window_length, 0);
87     RTC_DCHECK_GE(reference_window_delay, 0);
88     RTC_DCHECK_GT(reference_window_length + reference_window_delay,
89                   window_length);
90     const int buffer_length = GetMinFramesProcessed();
91     RTC_DCHECK_GT(buffer_length, 0);
92     for (int i = 0; i < num_channels; ++i) {
93       ch_buffers_.push_back(
94           std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
95     }
96   }
97 
98   ClippingEventPredictor(const ClippingEventPredictor&) = delete;
99   ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete;
~ClippingEventPredictor()100   ~ClippingEventPredictor() {}
101 
Reset()102   void Reset() {
103     const int num_channels = ch_buffers_.size();
104     for (int i = 0; i < num_channels; ++i) {
105       ch_buffers_[i]->Reset();
106     }
107   }
108 
109   // Analyzes a frame of audio and stores the framewise metrics in
110   // `ch_buffers_`.
Analyze(const AudioFrameView<const float> & frame)111   void Analyze(const AudioFrameView<const float>& frame) {
112     const int num_channels = frame.num_channels();
113     RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
114     const int samples_per_channel = frame.samples_per_channel();
115     RTC_DCHECK_GT(samples_per_channel, 0);
116     for (int channel = 0; channel < num_channels; ++channel) {
117       float sum_squares = 0.0f;
118       float peak = 0.0f;
119       for (const auto& sample : frame.channel(channel)) {
120         sum_squares += sample * sample;
121         peak = std::max(std::fabs(sample), peak);
122       }
123       ch_buffers_[channel]->Push(
124           {sum_squares / static_cast<float>(samples_per_channel), peak});
125     }
126   }
127 
128   // Estimates the analog gain adjustment for channel `channel` using a
129   // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
130   // estimate for the clipped level step equal to `default_clipped_level_step_`
131   // if at least `GetMinFramesProcessed()` frames have been processed since the
132   // last reset and a clipping event is predicted. `level`, `min_mic_level`, and
133   // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
EstimateClippedLevelStep(int channel,int level,int default_step,int min_mic_level,int max_mic_level) const134   absl::optional<int> EstimateClippedLevelStep(int channel,
135                                                int level,
136                                                int default_step,
137                                                int min_mic_level,
138                                                int max_mic_level) const {
139     RTC_CHECK_GE(channel, 0);
140     RTC_CHECK_LT(channel, ch_buffers_.size());
141     RTC_DCHECK_GE(level, 0);
142     RTC_DCHECK_LE(level, 255);
143     RTC_DCHECK_GT(default_step, 0);
144     RTC_DCHECK_LE(default_step, 255);
145     RTC_DCHECK_GE(min_mic_level, 0);
146     RTC_DCHECK_LE(min_mic_level, 255);
147     RTC_DCHECK_GE(max_mic_level, 0);
148     RTC_DCHECK_LE(max_mic_level, 255);
149     if (level <= min_mic_level) {
150       return absl::nullopt;
151     }
152     if (PredictClippingEvent(channel)) {
153       const int new_level =
154           rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level);
155       const int step = level - new_level;
156       if (step > 0) {
157         return step;
158       }
159     }
160     return absl::nullopt;
161   }
162 
163  private:
GetMinFramesProcessed() const164   int GetMinFramesProcessed() const {
165     return reference_window_delay_ + reference_window_length_;
166   }
167 
168   // Predicts clipping events based on the processed audio frames. Returns
169   // true if a clipping event is likely.
PredictClippingEvent(int channel) const170   bool PredictClippingEvent(int channel) const {
171     const auto metrics =
172         ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
173     if (!metrics.has_value() ||
174         !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
175       return false;
176     }
177     const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
178         reference_window_delay_, reference_window_length_);
179     if (!reference_metrics.has_value()) {
180       return false;
181     }
182     const float crest_factor = ComputeCrestFactor(metrics.value());
183     const float reference_crest_factor =
184         ComputeCrestFactor(reference_metrics.value());
185     if (crest_factor < reference_crest_factor - crest_factor_margin_) {
186       return true;
187     }
188     return false;
189   }
190 
191   std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
192   const int window_length_;
193   const int reference_window_length_;
194   const int reference_window_delay_;
195   const float clipping_threshold_;
196   const float crest_factor_margin_;
197 };
198 
199 // Performs crest factor-based clipping peak prediction.
200 class ClippingPeakPredictor : public ClippingPredictor {
201  public:
202   // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values
203   // higher than zero); window size `window_length` and reference window size
204   // `reference_window_length` (both referring to the number of frames in the
205   // respective sliding windows and limited to values higher than zero);
206   // reference window delay `reference_window_delay` (delay in frames, limited
207   // to values zero and higher with an additional requirement of
208   // `window_length` < `reference_window_length` + reference_window_delay`);
209   // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive
210   // clipped level step estimation is used if `adaptive_step_estimation` is
211   // true.
ClippingPeakPredictor(int num_channels,int window_length,int reference_window_length,int reference_window_delay,int clipping_threshold,bool adaptive_step_estimation)212   explicit ClippingPeakPredictor(int num_channels,
213                                  int window_length,
214                                  int reference_window_length,
215                                  int reference_window_delay,
216                                  int clipping_threshold,
217                                  bool adaptive_step_estimation)
218       : window_length_(window_length),
219         reference_window_length_(reference_window_length),
220         reference_window_delay_(reference_window_delay),
221         clipping_threshold_(clipping_threshold),
222         adaptive_step_estimation_(adaptive_step_estimation) {
223     RTC_DCHECK_GT(num_channels, 0);
224     RTC_DCHECK_GT(window_length, 0);
225     RTC_DCHECK_GT(reference_window_length, 0);
226     RTC_DCHECK_GE(reference_window_delay, 0);
227     RTC_DCHECK_GT(reference_window_length + reference_window_delay,
228                   window_length);
229     const int buffer_length = GetMinFramesProcessed();
230     RTC_DCHECK_GT(buffer_length, 0);
231     for (int i = 0; i < num_channels; ++i) {
232       ch_buffers_.push_back(
233           std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
234     }
235   }
236 
237   ClippingPeakPredictor(const ClippingPeakPredictor&) = delete;
238   ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete;
~ClippingPeakPredictor()239   ~ClippingPeakPredictor() {}
240 
Reset()241   void Reset() {
242     const int num_channels = ch_buffers_.size();
243     for (int i = 0; i < num_channels; ++i) {
244       ch_buffers_[i]->Reset();
245     }
246   }
247 
248   // Analyzes a frame of audio and stores the framewise metrics in
249   // `ch_buffers_`.
Analyze(const AudioFrameView<const float> & frame)250   void Analyze(const AudioFrameView<const float>& frame) {
251     const int num_channels = frame.num_channels();
252     RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
253     const int samples_per_channel = frame.samples_per_channel();
254     RTC_DCHECK_GT(samples_per_channel, 0);
255     for (int channel = 0; channel < num_channels; ++channel) {
256       float sum_squares = 0.0f;
257       float peak = 0.0f;
258       for (const auto& sample : frame.channel(channel)) {
259         sum_squares += sample * sample;
260         peak = std::max(std::fabs(sample), peak);
261       }
262       ch_buffers_[channel]->Push(
263           {sum_squares / static_cast<float>(samples_per_channel), peak});
264     }
265   }
266 
267   // Estimates the analog gain adjustment for channel `channel` using a
268   // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
269   // estimate for the clipped level step (equal to
270   // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at
271   // least `GetMinFramesProcessed()` frames have been processed since the last
272   // reset and a clipping event is predicted. `level`, `min_mic_level`, and
273   // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
EstimateClippedLevelStep(int channel,int level,int default_step,int min_mic_level,int max_mic_level) const274   absl::optional<int> EstimateClippedLevelStep(int channel,
275                                                int level,
276                                                int default_step,
277                                                int min_mic_level,
278                                                int max_mic_level) const {
279     RTC_DCHECK_GE(channel, 0);
280     RTC_DCHECK_LT(channel, ch_buffers_.size());
281     RTC_DCHECK_GE(level, 0);
282     RTC_DCHECK_LE(level, 255);
283     RTC_DCHECK_GT(default_step, 0);
284     RTC_DCHECK_LE(default_step, 255);
285     RTC_DCHECK_GE(min_mic_level, 0);
286     RTC_DCHECK_LE(min_mic_level, 255);
287     RTC_DCHECK_GE(max_mic_level, 0);
288     RTC_DCHECK_LE(max_mic_level, 255);
289     if (level <= min_mic_level) {
290       return absl::nullopt;
291     }
292     absl::optional<float> estimate_db = EstimatePeakValue(channel);
293     if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
294       int step = 0;
295       if (!adaptive_step_estimation_) {
296         step = default_step;
297       } else {
298         const int estimated_gain_change =
299             rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())),
300                            -kClippingPredictorMaxGainChange, 0);
301         step =
302             std::max(level - ComputeVolumeUpdate(estimated_gain_change, level,
303                                                  min_mic_level, max_mic_level),
304                      default_step);
305       }
306       const int new_level =
307           rtc::SafeClamp(level - step, min_mic_level, max_mic_level);
308       if (level > new_level) {
309         return level - new_level;
310       }
311     }
312     return absl::nullopt;
313   }
314 
315  private:
GetMinFramesProcessed()316   int GetMinFramesProcessed() {
317     return reference_window_delay_ + reference_window_length_;
318   }
319 
320   // Predicts clipping sample peaks based on the processed audio frames.
321   // Returns the estimated peak value if clipping is predicted. Otherwise
322   // returns absl::nullopt.
EstimatePeakValue(int channel) const323   absl::optional<float> EstimatePeakValue(int channel) const {
324     const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
325         reference_window_delay_, reference_window_length_);
326     if (!reference_metrics.has_value()) {
327       return absl::nullopt;
328     }
329     const auto metrics =
330         ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
331     if (!metrics.has_value() ||
332         !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
333       return absl::nullopt;
334     }
335     const float reference_crest_factor =
336         ComputeCrestFactor(reference_metrics.value());
337     const float& mean_squares = metrics.value().average;
338     const float projected_peak =
339         reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares));
340     return projected_peak;
341   }
342 
343   std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
344   const int window_length_;
345   const int reference_window_length_;
346   const int reference_window_delay_;
347   const int clipping_threshold_;
348   const bool adaptive_step_estimation_;
349 };
350 
351 }  // namespace
352 
CreateClippingPredictor(int num_channels,const AudioProcessing::Config::GainController1::AnalogGainController::ClippingPredictor & config)353 std::unique_ptr<ClippingPredictor> CreateClippingPredictor(
354     int num_channels,
355     const AudioProcessing::Config::GainController1::AnalogGainController::
356         ClippingPredictor& config) {
357   if (!config.enabled) {
358     RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction disabled.";
359     return nullptr;
360   }
361   RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction enabled.";
362   using ClippingPredictorMode = AudioProcessing::Config::GainController1::
363       AnalogGainController::ClippingPredictor::Mode;
364   switch (config.mode) {
365     case ClippingPredictorMode::kClippingEventPrediction:
366       return std::make_unique<ClippingEventPredictor>(
367           num_channels, config.window_length, config.reference_window_length,
368           config.reference_window_delay, config.clipping_threshold,
369           config.crest_factor_margin);
370     case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction:
371       return std::make_unique<ClippingPeakPredictor>(
372           num_channels, config.window_length, config.reference_window_length,
373           config.reference_window_delay, config.clipping_threshold,
374           /*adaptive_step_estimation=*/true);
375     case ClippingPredictorMode::kFixedStepClippingPeakPrediction:
376       return std::make_unique<ClippingPeakPredictor>(
377           num_channels, config.window_length, config.reference_window_length,
378           config.reference_window_delay, config.clipping_threshold,
379           /*adaptive_step_estimation=*/false);
380   }
381   RTC_DCHECK_NOTREACHED();
382 }
383 
384 }  // namespace webrtc
385