xref: /aosp_15_r20/external/webrtc/modules/audio_processing/agc2/vad_wrapper.h (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_
12 #define MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_
13 
14 #include <memory>
15 #include <vector>
16 
17 #include "api/array_view.h"
18 #include "common_audio/resampler/include/push_resampler.h"
19 #include "modules/audio_processing/agc2/cpu_features.h"
20 #include "modules/audio_processing/include/audio_frame_view.h"
21 
22 namespace webrtc {
23 
24 // Wraps a single-channel Voice Activity Detector (VAD) which is used to analyze
25 // the first channel of the input audio frames. Takes care of resampling the
26 // input frames to match the sample rate of the wrapped VAD and periodically
27 // resets the VAD.
28 class VoiceActivityDetectorWrapper {
29  public:
30   // Single channel VAD interface.
31   class MonoVad {
32    public:
33     virtual ~MonoVad() = default;
34     // Returns the sample rate (Hz) required for the input frames analyzed by
35     // `ComputeProbability`.
36     virtual int SampleRateHz() const = 0;
37     // Resets the internal state.
38     virtual void Reset() = 0;
39     // Analyzes an audio frame and returns the speech probability.
40     virtual float Analyze(rtc::ArrayView<const float> frame) = 0;
41   };
42 
43   // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call
44   // `MonoVad::Reset()`; it must be equal to or greater than the duration of two
45   // frames. Uses `cpu_features` to instantiate the default VAD.
46   VoiceActivityDetectorWrapper(int vad_reset_period_ms,
47                                const AvailableCpuFeatures& cpu_features,
48                                int sample_rate_hz);
49   // Ctor. Uses a custom `vad`.
50   VoiceActivityDetectorWrapper(int vad_reset_period_ms,
51                                std::unique_ptr<MonoVad> vad,
52                                int sample_rate_hz);
53 
54   VoiceActivityDetectorWrapper(const VoiceActivityDetectorWrapper&) = delete;
55   VoiceActivityDetectorWrapper& operator=(const VoiceActivityDetectorWrapper&) =
56       delete;
57   ~VoiceActivityDetectorWrapper();
58 
59   // Initializes the VAD wrapper.
60   void Initialize(int sample_rate_hz);
61 
62   // Analyzes the first channel of `frame` and returns the speech probability.
63   // `frame` must be a 10 ms frame with the sample rate specified in the last
64   // `Initialize()` call.
65   float Analyze(AudioFrameView<const float> frame);
66 
67  private:
68   const int vad_reset_period_frames_;
69   int frame_size_;
70   int time_to_vad_reset_;
71   PushResampler<float> resampler_;
72   std::unique_ptr<MonoVad> vad_;
73   std::vector<float> resampled_buffer_;
74 };
75 
76 }  // namespace webrtc
77 
78 #endif  // MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_
79