saturation_protector.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422) - OpenGrok cross reference for /aosp_15_r20/external/webrtc/modules/audio_processing/agc2/saturation_protector.cc

/*
 *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "modules/audio_processing/agc2/saturation_protector.h"

#include <memory>

#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/saturation_protector_buffer.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_minmax.h"

namespace webrtc {
namespace {

constexpr int kPeakEnveloperSuperFrameLengthMs = 400;
constexpr float kMinMarginDb = 12.0f;
constexpr float kMaxMarginDb = 25.0f;
constexpr float kAttack = 0.9988493699365052f;
constexpr float kDecay = 0.9997697679981565f;

// Saturation protector state. Defined outside of `SaturationProtectorImpl` to
// implement check-point and restore ops.
struct SaturationProtectorState {
  bool operator==(const SaturationProtectorState& s) const {
    return headroom_db == s.headroom_db &&
           peak_delay_buffer == s.peak_delay_buffer &&
           max_peaks_dbfs == s.max_peaks_dbfs &&
           time_since_push_ms == s.time_since_push_ms;
  }
  inline bool operator!=(const SaturationProtectorState& s) const {
    return !(*this == s);
  }

  float headroom_db;
  SaturationProtectorBuffer peak_delay_buffer;
  float max_peaks_dbfs;
  int time_since_push_ms;  // Time since the last ring buffer push operation.
};

// Resets the saturation protector state.
void ResetSaturationProtectorState(float initial_headroom_db,
                                   SaturationProtectorState& state) {
  state.headroom_db = initial_headroom_db;
  state.peak_delay_buffer.Reset();
  state.max_peaks_dbfs = kMinLevelDbfs;
  state.time_since_push_ms = 0;
}

// Updates `state` by analyzing the estimated speech level `speech_level_dbfs`
// and the peak level `peak_dbfs` for an observed frame. `state` must not be
// modified without calling this function.
void UpdateSaturationProtectorState(float peak_dbfs,
                                    float speech_level_dbfs,
                                    SaturationProtectorState& state) {
  // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
  state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs);
  state.time_since_push_ms += kFrameDurationMs;
  if (rtc::SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) {
    // Push `max_peaks_dbfs` back into the ring buffer.
    state.peak_delay_buffer.PushBack(state.max_peaks_dbfs);
    // Reset.
    state.max_peaks_dbfs = kMinLevelDbfs;
    state.time_since_push_ms = 0;
  }

  // Update the headroom by comparing the estimated speech level and the delayed
  // max speech peak.
  const float delayed_peak_dbfs =
      state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs);
  const float difference_db = delayed_peak_dbfs - speech_level_dbfs;
  if (difference_db > state.headroom_db) {
    // Attack.
    state.headroom_db =
        state.headroom_db * kAttack + difference_db * (1.0f - kAttack);
  } else {
    // Decay.
    state.headroom_db =
        state.headroom_db * kDecay + difference_db * (1.0f - kDecay);
  }

  state.headroom_db =
      rtc::SafeClamp<float>(state.headroom_db, kMinMarginDb, kMaxMarginDb);
}

// Saturation protector which recommends a headroom based on the recent peaks.
class SaturationProtectorImpl : public SaturationProtector {
 public:
  explicit SaturationProtectorImpl(float initial_headroom_db,
                                   int adjacent_speech_frames_threshold,
                                   ApmDataDumper* apm_data_dumper)
      : apm_data_dumper_(apm_data_dumper),
        initial_headroom_db_(initial_headroom_db),
        adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) {
    Reset();
  }
  SaturationProtectorImpl(const SaturationProtectorImpl&) = delete;
  SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete;
  ~SaturationProtectorImpl() = default;

  float HeadroomDb() override { return headroom_db_; }

  void Analyze(float speech_probability,
               float peak_dbfs,
               float speech_level_dbfs) override {
    if (speech_probability < kVadConfidenceThreshold) {
      // Not a speech frame.
      if (adjacent_speech_frames_threshold_ > 1) {
        // When two or more adjacent speech frames are required in order to
        // update the state, we need to decide whether to discard or confirm the
        // updates based on the speech sequence length.
        if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
          // First non-speech frame after a long enough sequence of speech
          // frames. Update the reliable state.
          reliable_state_ = preliminary_state_;
        } else if (num_adjacent_speech_frames_ > 0) {
          // First non-speech frame after a too short sequence of speech frames.
          // Reset to the last reliable state.
          preliminary_state_ = reliable_state_;
        }
      }
      num_adjacent_speech_frames_ = 0;
    } else {
      // Speech frame observed.
      num_adjacent_speech_frames_++;

      // Update preliminary level estimate.
      UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs,
                                     preliminary_state_);

      if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
        // `preliminary_state_` is now reliable. Update the headroom.
        headroom_db_ = preliminary_state_.headroom_db;
      }
    }
    DumpDebugData();
  }

  void Reset() override {
    num_adjacent_speech_frames_ = 0;
    headroom_db_ = initial_headroom_db_;
    ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_);
    ResetSaturationProtectorState(initial_headroom_db_, reliable_state_);
  }

 private:
  void DumpDebugData() {
    apm_data_dumper_->DumpRaw(
        "agc2_saturation_protector_preliminary_max_peak_dbfs",
        preliminary_state_.max_peaks_dbfs);
    apm_data_dumper_->DumpRaw(
        "agc2_saturation_protector_reliable_max_peak_dbfs",
        reliable_state_.max_peaks_dbfs);
  }

  ApmDataDumper* const apm_data_dumper_;
  const float initial_headroom_db_;
  const int adjacent_speech_frames_threshold_;
  int num_adjacent_speech_frames_;
  float headroom_db_;
  SaturationProtectorState preliminary_state_;
  SaturationProtectorState reliable_state_;
};

}  // namespace

std::unique_ptr<SaturationProtector> CreateSaturationProtector(
    float initial_headroom_db,
    int adjacent_speech_frames_threshold,
    ApmDataDumper* apm_data_dumper) {
  return std::make_unique<SaturationProtectorImpl>(
      initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper);
}

}  // namespace webrtc