1 /*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/agc2/rnn_vad/features_extraction.h"
12
13 #include <array>
14
15 #include "modules/audio_processing/agc2/rnn_vad/lp_residual.h"
16 #include "rtc_base/checks.h"
17
18 namespace webrtc {
19 namespace rnn_vad {
20 namespace {
21
22 // Computed as `scipy.signal.butter(N=2, Wn=60/24000, btype='highpass')`.
23 constexpr BiQuadFilter::Config kHpfConfig24k{
24 {0.99446179f, -1.98892358f, 0.99446179f},
25 {-1.98889291f, 0.98895425f}};
26
27 } // namespace
28
FeaturesExtractor(const AvailableCpuFeatures & cpu_features)29 FeaturesExtractor::FeaturesExtractor(const AvailableCpuFeatures& cpu_features)
30 : use_high_pass_filter_(false),
31 hpf_(kHpfConfig24k),
32 pitch_buf_24kHz_(),
33 pitch_buf_24kHz_view_(pitch_buf_24kHz_.GetBufferView()),
34 lp_residual_(kBufSize24kHz),
35 lp_residual_view_(lp_residual_.data(), kBufSize24kHz),
36 pitch_estimator_(cpu_features),
37 reference_frame_view_(pitch_buf_24kHz_.GetMostRecentValuesView()) {
38 RTC_DCHECK_EQ(kBufSize24kHz, lp_residual_.size());
39 Reset();
40 }
41
42 FeaturesExtractor::~FeaturesExtractor() = default;
43
Reset()44 void FeaturesExtractor::Reset() {
45 pitch_buf_24kHz_.Reset();
46 spectral_features_extractor_.Reset();
47 if (use_high_pass_filter_) {
48 hpf_.Reset();
49 }
50 }
51
CheckSilenceComputeFeatures(rtc::ArrayView<const float,kFrameSize10ms24kHz> samples,rtc::ArrayView<float,kFeatureVectorSize> feature_vector)52 bool FeaturesExtractor::CheckSilenceComputeFeatures(
53 rtc::ArrayView<const float, kFrameSize10ms24kHz> samples,
54 rtc::ArrayView<float, kFeatureVectorSize> feature_vector) {
55 // Pre-processing.
56 if (use_high_pass_filter_) {
57 std::array<float, kFrameSize10ms24kHz> samples_filtered;
58 hpf_.Process(samples, samples_filtered);
59 // Feed buffer with the pre-processed version of `samples`.
60 pitch_buf_24kHz_.Push(samples_filtered);
61 } else {
62 // Feed buffer with `samples`.
63 pitch_buf_24kHz_.Push(samples);
64 }
65 // Extract the LP residual.
66 float lpc_coeffs[kNumLpcCoefficients];
67 ComputeAndPostProcessLpcCoefficients(pitch_buf_24kHz_view_, lpc_coeffs);
68 ComputeLpResidual(lpc_coeffs, pitch_buf_24kHz_view_, lp_residual_view_);
69 // Estimate pitch on the LP-residual and write the normalized pitch period
70 // into the output vector (normalization based on training data stats).
71 pitch_period_48kHz_ = pitch_estimator_.Estimate(lp_residual_view_);
72 feature_vector[kFeatureVectorSize - 2] = 0.01f * (pitch_period_48kHz_ - 300);
73 // Extract lagged frames (according to the estimated pitch period).
74 RTC_DCHECK_LE(pitch_period_48kHz_ / 2, kMaxPitch24kHz);
75 auto lagged_frame = pitch_buf_24kHz_view_.subview(
76 kMaxPitch24kHz - pitch_period_48kHz_ / 2, kFrameSize20ms24kHz);
77 // Analyze reference and lagged frames checking if silence has been detected
78 // and write the feature vector.
79 return spectral_features_extractor_.CheckSilenceComputeFeatures(
80 reference_frame_view_, {lagged_frame.data(), kFrameSize20ms24kHz},
81 {feature_vector.data() + kNumLowerBands, kNumBands - kNumLowerBands},
82 {feature_vector.data(), kNumLowerBands},
83 {feature_vector.data() + kNumBands, kNumLowerBands},
84 {feature_vector.data() + kNumBands + kNumLowerBands, kNumLowerBands},
85 {feature_vector.data() + kNumBands + 2 * kNumLowerBands, kNumLowerBands},
86 &feature_vector[kFeatureVectorSize - 1]);
87 }
88
89 } // namespace rnn_vad
90 } // namespace webrtc
91