xref: /aosp_15_r20/external/webrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.h (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FEATURES_EXTRACTION_H_
12 #define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FEATURES_EXTRACTION_H_
13 
14 #include <vector>
15 
16 #include "api/array_view.h"
17 #include "modules/audio_processing/agc2/biquad_filter.h"
18 #include "modules/audio_processing/agc2/rnn_vad/common.h"
19 #include "modules/audio_processing/agc2/rnn_vad/pitch_search.h"
20 #include "modules/audio_processing/agc2/rnn_vad/sequence_buffer.h"
21 #include "modules/audio_processing/agc2/rnn_vad/spectral_features.h"
22 
23 namespace webrtc {
24 namespace rnn_vad {
25 
26 // Feature extractor to feed the VAD RNN.
27 class FeaturesExtractor {
28  public:
29   explicit FeaturesExtractor(const AvailableCpuFeatures& cpu_features);
30   FeaturesExtractor(const FeaturesExtractor&) = delete;
31   FeaturesExtractor& operator=(const FeaturesExtractor&) = delete;
32   ~FeaturesExtractor();
33   void Reset();
34   // Analyzes the samples, computes the feature vector and returns true if
35   // silence is detected (false if not). When silence is detected,
36   // `feature_vector` is partially written and therefore must not be used to
37   // feed the VAD RNN.
38   bool CheckSilenceComputeFeatures(
39       rtc::ArrayView<const float, kFrameSize10ms24kHz> samples,
40       rtc::ArrayView<float, kFeatureVectorSize> feature_vector);
41 
42  private:
43   const bool use_high_pass_filter_;
44   // TODO(bugs.webrtc.org/7494): Remove HPF depending on how AGC2 is used in APM
45   // and on whether an HPF is already used as pre-processing step in APM.
46   BiQuadFilter hpf_;
47   SequenceBuffer<float, kBufSize24kHz, kFrameSize10ms24kHz, kFrameSize20ms24kHz>
48       pitch_buf_24kHz_;
49   rtc::ArrayView<const float, kBufSize24kHz> pitch_buf_24kHz_view_;
50   std::vector<float> lp_residual_;
51   rtc::ArrayView<float, kBufSize24kHz> lp_residual_view_;
52   PitchEstimator pitch_estimator_;
53   rtc::ArrayView<const float, kFrameSize20ms24kHz> reference_frame_view_;
54   SpectralFeaturesExtractor spectral_features_extractor_;
55   int pitch_period_48kHz_;
56 };
57 
58 }  // namespace rnn_vad
59 }  // namespace webrtc
60 
61 #endif  // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FEATURES_EXTRACTION_H_
62