1 /*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/agc2/saturation_protector.h"
12
13 #include <memory>
14
15 #include "modules/audio_processing/agc2/agc2_common.h"
16 #include "modules/audio_processing/agc2/saturation_protector_buffer.h"
17 #include "modules/audio_processing/logging/apm_data_dumper.h"
18 #include "rtc_base/checks.h"
19 #include "rtc_base/numerics/safe_minmax.h"
20
21 namespace webrtc {
22 namespace {
23
24 constexpr int kPeakEnveloperSuperFrameLengthMs = 400;
25 constexpr float kMinMarginDb = 12.0f;
26 constexpr float kMaxMarginDb = 25.0f;
27 constexpr float kAttack = 0.9988493699365052f;
28 constexpr float kDecay = 0.9997697679981565f;
29
30 // Saturation protector state. Defined outside of `SaturationProtectorImpl` to
31 // implement check-point and restore ops.
32 struct SaturationProtectorState {
operator ==webrtc::__anone0dcfa6d0111::SaturationProtectorState33 bool operator==(const SaturationProtectorState& s) const {
34 return headroom_db == s.headroom_db &&
35 peak_delay_buffer == s.peak_delay_buffer &&
36 max_peaks_dbfs == s.max_peaks_dbfs &&
37 time_since_push_ms == s.time_since_push_ms;
38 }
operator !=webrtc::__anone0dcfa6d0111::SaturationProtectorState39 inline bool operator!=(const SaturationProtectorState& s) const {
40 return !(*this == s);
41 }
42
43 float headroom_db;
44 SaturationProtectorBuffer peak_delay_buffer;
45 float max_peaks_dbfs;
46 int time_since_push_ms; // Time since the last ring buffer push operation.
47 };
48
49 // Resets the saturation protector state.
ResetSaturationProtectorState(float initial_headroom_db,SaturationProtectorState & state)50 void ResetSaturationProtectorState(float initial_headroom_db,
51 SaturationProtectorState& state) {
52 state.headroom_db = initial_headroom_db;
53 state.peak_delay_buffer.Reset();
54 state.max_peaks_dbfs = kMinLevelDbfs;
55 state.time_since_push_ms = 0;
56 }
57
58 // Updates `state` by analyzing the estimated speech level `speech_level_dbfs`
59 // and the peak level `peak_dbfs` for an observed frame. `state` must not be
60 // modified without calling this function.
UpdateSaturationProtectorState(float peak_dbfs,float speech_level_dbfs,SaturationProtectorState & state)61 void UpdateSaturationProtectorState(float peak_dbfs,
62 float speech_level_dbfs,
63 SaturationProtectorState& state) {
64 // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
65 state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs);
66 state.time_since_push_ms += kFrameDurationMs;
67 if (rtc::SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) {
68 // Push `max_peaks_dbfs` back into the ring buffer.
69 state.peak_delay_buffer.PushBack(state.max_peaks_dbfs);
70 // Reset.
71 state.max_peaks_dbfs = kMinLevelDbfs;
72 state.time_since_push_ms = 0;
73 }
74
75 // Update the headroom by comparing the estimated speech level and the delayed
76 // max speech peak.
77 const float delayed_peak_dbfs =
78 state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs);
79 const float difference_db = delayed_peak_dbfs - speech_level_dbfs;
80 if (difference_db > state.headroom_db) {
81 // Attack.
82 state.headroom_db =
83 state.headroom_db * kAttack + difference_db * (1.0f - kAttack);
84 } else {
85 // Decay.
86 state.headroom_db =
87 state.headroom_db * kDecay + difference_db * (1.0f - kDecay);
88 }
89
90 state.headroom_db =
91 rtc::SafeClamp<float>(state.headroom_db, kMinMarginDb, kMaxMarginDb);
92 }
93
94 // Saturation protector which recommends a headroom based on the recent peaks.
95 class SaturationProtectorImpl : public SaturationProtector {
96 public:
SaturationProtectorImpl(float initial_headroom_db,int adjacent_speech_frames_threshold,ApmDataDumper * apm_data_dumper)97 explicit SaturationProtectorImpl(float initial_headroom_db,
98 int adjacent_speech_frames_threshold,
99 ApmDataDumper* apm_data_dumper)
100 : apm_data_dumper_(apm_data_dumper),
101 initial_headroom_db_(initial_headroom_db),
102 adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) {
103 Reset();
104 }
105 SaturationProtectorImpl(const SaturationProtectorImpl&) = delete;
106 SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete;
107 ~SaturationProtectorImpl() = default;
108
HeadroomDb()109 float HeadroomDb() override { return headroom_db_; }
110
Analyze(float speech_probability,float peak_dbfs,float speech_level_dbfs)111 void Analyze(float speech_probability,
112 float peak_dbfs,
113 float speech_level_dbfs) override {
114 if (speech_probability < kVadConfidenceThreshold) {
115 // Not a speech frame.
116 if (adjacent_speech_frames_threshold_ > 1) {
117 // When two or more adjacent speech frames are required in order to
118 // update the state, we need to decide whether to discard or confirm the
119 // updates based on the speech sequence length.
120 if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
121 // First non-speech frame after a long enough sequence of speech
122 // frames. Update the reliable state.
123 reliable_state_ = preliminary_state_;
124 } else if (num_adjacent_speech_frames_ > 0) {
125 // First non-speech frame after a too short sequence of speech frames.
126 // Reset to the last reliable state.
127 preliminary_state_ = reliable_state_;
128 }
129 }
130 num_adjacent_speech_frames_ = 0;
131 } else {
132 // Speech frame observed.
133 num_adjacent_speech_frames_++;
134
135 // Update preliminary level estimate.
136 UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs,
137 preliminary_state_);
138
139 if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
140 // `preliminary_state_` is now reliable. Update the headroom.
141 headroom_db_ = preliminary_state_.headroom_db;
142 }
143 }
144 DumpDebugData();
145 }
146
Reset()147 void Reset() override {
148 num_adjacent_speech_frames_ = 0;
149 headroom_db_ = initial_headroom_db_;
150 ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_);
151 ResetSaturationProtectorState(initial_headroom_db_, reliable_state_);
152 }
153
154 private:
DumpDebugData()155 void DumpDebugData() {
156 apm_data_dumper_->DumpRaw(
157 "agc2_saturation_protector_preliminary_max_peak_dbfs",
158 preliminary_state_.max_peaks_dbfs);
159 apm_data_dumper_->DumpRaw(
160 "agc2_saturation_protector_reliable_max_peak_dbfs",
161 reliable_state_.max_peaks_dbfs);
162 }
163
164 ApmDataDumper* const apm_data_dumper_;
165 const float initial_headroom_db_;
166 const int adjacent_speech_frames_threshold_;
167 int num_adjacent_speech_frames_;
168 float headroom_db_;
169 SaturationProtectorState preliminary_state_;
170 SaturationProtectorState reliable_state_;
171 };
172
173 } // namespace
174
CreateSaturationProtector(float initial_headroom_db,int adjacent_speech_frames_threshold,ApmDataDumper * apm_data_dumper)175 std::unique_ptr<SaturationProtector> CreateSaturationProtector(
176 float initial_headroom_db,
177 int adjacent_speech_frames_threshold,
178 ApmDataDumper* apm_data_dumper) {
179 return std::make_unique<SaturationProtectorImpl>(
180 initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper);
181 }
182
183 } // namespace webrtc
184