xref: /aosp_15_r20/external/webrtc/modules/audio_processing/agc2/saturation_protector.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/agc2/saturation_protector.h"
12 
13 #include <memory>
14 
15 #include "modules/audio_processing/agc2/agc2_common.h"
16 #include "modules/audio_processing/agc2/saturation_protector_buffer.h"
17 #include "modules/audio_processing/logging/apm_data_dumper.h"
18 #include "rtc_base/checks.h"
19 #include "rtc_base/numerics/safe_minmax.h"
20 
21 namespace webrtc {
22 namespace {
23 
24 constexpr int kPeakEnveloperSuperFrameLengthMs = 400;
25 constexpr float kMinMarginDb = 12.0f;
26 constexpr float kMaxMarginDb = 25.0f;
27 constexpr float kAttack = 0.9988493699365052f;
28 constexpr float kDecay = 0.9997697679981565f;
29 
30 // Saturation protector state. Defined outside of `SaturationProtectorImpl` to
31 // implement check-point and restore ops.
32 struct SaturationProtectorState {
operator ==webrtc::__anone0dcfa6d0111::SaturationProtectorState33   bool operator==(const SaturationProtectorState& s) const {
34     return headroom_db == s.headroom_db &&
35            peak_delay_buffer == s.peak_delay_buffer &&
36            max_peaks_dbfs == s.max_peaks_dbfs &&
37            time_since_push_ms == s.time_since_push_ms;
38   }
operator !=webrtc::__anone0dcfa6d0111::SaturationProtectorState39   inline bool operator!=(const SaturationProtectorState& s) const {
40     return !(*this == s);
41   }
42 
43   float headroom_db;
44   SaturationProtectorBuffer peak_delay_buffer;
45   float max_peaks_dbfs;
46   int time_since_push_ms;  // Time since the last ring buffer push operation.
47 };
48 
49 // Resets the saturation protector state.
ResetSaturationProtectorState(float initial_headroom_db,SaturationProtectorState & state)50 void ResetSaturationProtectorState(float initial_headroom_db,
51                                    SaturationProtectorState& state) {
52   state.headroom_db = initial_headroom_db;
53   state.peak_delay_buffer.Reset();
54   state.max_peaks_dbfs = kMinLevelDbfs;
55   state.time_since_push_ms = 0;
56 }
57 
58 // Updates `state` by analyzing the estimated speech level `speech_level_dbfs`
59 // and the peak level `peak_dbfs` for an observed frame. `state` must not be
60 // modified without calling this function.
UpdateSaturationProtectorState(float peak_dbfs,float speech_level_dbfs,SaturationProtectorState & state)61 void UpdateSaturationProtectorState(float peak_dbfs,
62                                     float speech_level_dbfs,
63                                     SaturationProtectorState& state) {
64   // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
65   state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs);
66   state.time_since_push_ms += kFrameDurationMs;
67   if (rtc::SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) {
68     // Push `max_peaks_dbfs` back into the ring buffer.
69     state.peak_delay_buffer.PushBack(state.max_peaks_dbfs);
70     // Reset.
71     state.max_peaks_dbfs = kMinLevelDbfs;
72     state.time_since_push_ms = 0;
73   }
74 
75   // Update the headroom by comparing the estimated speech level and the delayed
76   // max speech peak.
77   const float delayed_peak_dbfs =
78       state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs);
79   const float difference_db = delayed_peak_dbfs - speech_level_dbfs;
80   if (difference_db > state.headroom_db) {
81     // Attack.
82     state.headroom_db =
83         state.headroom_db * kAttack + difference_db * (1.0f - kAttack);
84   } else {
85     // Decay.
86     state.headroom_db =
87         state.headroom_db * kDecay + difference_db * (1.0f - kDecay);
88   }
89 
90   state.headroom_db =
91       rtc::SafeClamp<float>(state.headroom_db, kMinMarginDb, kMaxMarginDb);
92 }
93 
94 // Saturation protector which recommends a headroom based on the recent peaks.
95 class SaturationProtectorImpl : public SaturationProtector {
96  public:
SaturationProtectorImpl(float initial_headroom_db,int adjacent_speech_frames_threshold,ApmDataDumper * apm_data_dumper)97   explicit SaturationProtectorImpl(float initial_headroom_db,
98                                    int adjacent_speech_frames_threshold,
99                                    ApmDataDumper* apm_data_dumper)
100       : apm_data_dumper_(apm_data_dumper),
101         initial_headroom_db_(initial_headroom_db),
102         adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) {
103     Reset();
104   }
105   SaturationProtectorImpl(const SaturationProtectorImpl&) = delete;
106   SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete;
107   ~SaturationProtectorImpl() = default;
108 
HeadroomDb()109   float HeadroomDb() override { return headroom_db_; }
110 
Analyze(float speech_probability,float peak_dbfs,float speech_level_dbfs)111   void Analyze(float speech_probability,
112                float peak_dbfs,
113                float speech_level_dbfs) override {
114     if (speech_probability < kVadConfidenceThreshold) {
115       // Not a speech frame.
116       if (adjacent_speech_frames_threshold_ > 1) {
117         // When two or more adjacent speech frames are required in order to
118         // update the state, we need to decide whether to discard or confirm the
119         // updates based on the speech sequence length.
120         if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
121           // First non-speech frame after a long enough sequence of speech
122           // frames. Update the reliable state.
123           reliable_state_ = preliminary_state_;
124         } else if (num_adjacent_speech_frames_ > 0) {
125           // First non-speech frame after a too short sequence of speech frames.
126           // Reset to the last reliable state.
127           preliminary_state_ = reliable_state_;
128         }
129       }
130       num_adjacent_speech_frames_ = 0;
131     } else {
132       // Speech frame observed.
133       num_adjacent_speech_frames_++;
134 
135       // Update preliminary level estimate.
136       UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs,
137                                      preliminary_state_);
138 
139       if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
140         // `preliminary_state_` is now reliable. Update the headroom.
141         headroom_db_ = preliminary_state_.headroom_db;
142       }
143     }
144     DumpDebugData();
145   }
146 
Reset()147   void Reset() override {
148     num_adjacent_speech_frames_ = 0;
149     headroom_db_ = initial_headroom_db_;
150     ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_);
151     ResetSaturationProtectorState(initial_headroom_db_, reliable_state_);
152   }
153 
154  private:
DumpDebugData()155   void DumpDebugData() {
156     apm_data_dumper_->DumpRaw(
157         "agc2_saturation_protector_preliminary_max_peak_dbfs",
158         preliminary_state_.max_peaks_dbfs);
159     apm_data_dumper_->DumpRaw(
160         "agc2_saturation_protector_reliable_max_peak_dbfs",
161         reliable_state_.max_peaks_dbfs);
162   }
163 
164   ApmDataDumper* const apm_data_dumper_;
165   const float initial_headroom_db_;
166   const int adjacent_speech_frames_threshold_;
167   int num_adjacent_speech_frames_;
168   float headroom_db_;
169   SaturationProtectorState preliminary_state_;
170   SaturationProtectorState reliable_state_;
171 };
172 
173 }  // namespace
174 
CreateSaturationProtector(float initial_headroom_db,int adjacent_speech_frames_threshold,ApmDataDumper * apm_data_dumper)175 std::unique_ptr<SaturationProtector> CreateSaturationProtector(
176     float initial_headroom_db,
177     int adjacent_speech_frames_threshold,
178     ApmDataDumper* apm_data_dumper) {
179   return std::make_unique<SaturationProtectorImpl>(
180       initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper);
181 }
182 
183 }  // namespace webrtc
184