1 /* 2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ 12 #define MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ 13 14 #include <memory> 15 #include <vector> 16 17 #include "api/array_view.h" 18 #include "common_audio/resampler/include/push_resampler.h" 19 #include "modules/audio_processing/agc2/cpu_features.h" 20 #include "modules/audio_processing/include/audio_frame_view.h" 21 22 namespace webrtc { 23 24 // Wraps a single-channel Voice Activity Detector (VAD) which is used to analyze 25 // the first channel of the input audio frames. Takes care of resampling the 26 // input frames to match the sample rate of the wrapped VAD and periodically 27 // resets the VAD. 28 class VoiceActivityDetectorWrapper { 29 public: 30 // Single channel VAD interface. 31 class MonoVad { 32 public: 33 virtual ~MonoVad() = default; 34 // Returns the sample rate (Hz) required for the input frames analyzed by 35 // `ComputeProbability`. 36 virtual int SampleRateHz() const = 0; 37 // Resets the internal state. 38 virtual void Reset() = 0; 39 // Analyzes an audio frame and returns the speech probability. 40 virtual float Analyze(rtc::ArrayView<const float> frame) = 0; 41 }; 42 43 // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call 44 // `MonoVad::Reset()`; it must be equal to or greater than the duration of two 45 // frames. Uses `cpu_features` to instantiate the default VAD. 46 VoiceActivityDetectorWrapper(int vad_reset_period_ms, 47 const AvailableCpuFeatures& cpu_features, 48 int sample_rate_hz); 49 // Ctor. Uses a custom `vad`. 50 VoiceActivityDetectorWrapper(int vad_reset_period_ms, 51 std::unique_ptr<MonoVad> vad, 52 int sample_rate_hz); 53 54 VoiceActivityDetectorWrapper(const VoiceActivityDetectorWrapper&) = delete; 55 VoiceActivityDetectorWrapper& operator=(const VoiceActivityDetectorWrapper&) = 56 delete; 57 ~VoiceActivityDetectorWrapper(); 58 59 // Initializes the VAD wrapper. 60 void Initialize(int sample_rate_hz); 61 62 // Analyzes the first channel of `frame` and returns the speech probability. 63 // `frame` must be a 10 ms frame with the sample rate specified in the last 64 // `Initialize()` call. 65 float Analyze(AudioFrameView<const float> frame); 66 67 private: 68 const int vad_reset_period_frames_; 69 int frame_size_; 70 int time_to_vad_reset_; 71 PushResampler<float> resampler_; 72 std::unique_ptr<MonoVad> vad_; 73 std::vector<float> resampled_buffer_; 74 }; 75 76 } // namespace webrtc 77 78 #endif // MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ 79