xref: /aosp_15_r20/external/webrtc/modules/audio_coding/neteq/time_stretch.h (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_
12 #define MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_
13 
14 #include <string.h>  // memset, size_t
15 
16 #include "modules/audio_coding/neteq/audio_multi_vector.h"
17 
18 namespace webrtc {
19 
20 // Forward declarations.
21 class BackgroundNoise;
22 
23 // This is the base class for Accelerate and PreemptiveExpand. This class
24 // cannot be instantiated, but must be used through either of the derived
25 // classes.
26 class TimeStretch {
27  public:
28   enum ReturnCodes {
29     kSuccess = 0,
30     kSuccessLowEnergy = 1,
31     kNoStretch = 2,
32     kError = -1
33   };
34 
TimeStretch(int sample_rate_hz,size_t num_channels,const BackgroundNoise & background_noise)35   TimeStretch(int sample_rate_hz,
36               size_t num_channels,
37               const BackgroundNoise& background_noise)
38       : sample_rate_hz_(sample_rate_hz),
39         fs_mult_(sample_rate_hz / 8000),
40         num_channels_(num_channels),
41         background_noise_(background_noise),
42         max_input_value_(0) {
43     RTC_DCHECK(sample_rate_hz_ == 8000 || sample_rate_hz_ == 16000 ||
44                sample_rate_hz_ == 32000 || sample_rate_hz_ == 48000);
45     RTC_DCHECK_GT(num_channels_, 0);
46     memset(auto_correlation_, 0, sizeof(auto_correlation_));
47   }
48 
~TimeStretch()49   virtual ~TimeStretch() {}
50 
51   TimeStretch(const TimeStretch&) = delete;
52   TimeStretch& operator=(const TimeStretch&) = delete;
53 
54   // This method performs the processing common to both Accelerate and
55   // PreemptiveExpand.
56   ReturnCodes Process(const int16_t* input,
57                       size_t input_len,
58                       bool fast_mode,
59                       AudioMultiVector* output,
60                       size_t* length_change_samples);
61 
62  protected:
63   // Sets the parameters `best_correlation` and `peak_index` to suitable
64   // values when the signal contains no active speech. This method must be
65   // implemented by the sub-classes.
66   virtual void SetParametersForPassiveSpeech(size_t input_length,
67                                              int16_t* best_correlation,
68                                              size_t* peak_index) const = 0;
69 
70   // Checks the criteria for performing the time-stretching operation and,
71   // if possible, performs the time-stretching. This method must be implemented
72   // by the sub-classes.
73   virtual ReturnCodes CheckCriteriaAndStretch(
74       const int16_t* input,
75       size_t input_length,
76       size_t peak_index,
77       int16_t best_correlation,
78       bool active_speech,
79       bool fast_mode,
80       AudioMultiVector* output) const = 0;
81 
82   static const size_t kCorrelationLen = 50;
83   static const size_t kLogCorrelationLen = 6;  // >= log2(kCorrelationLen).
84   static const size_t kMinLag = 10;
85   static const size_t kMaxLag = 60;
86   static const size_t kDownsampledLen = kCorrelationLen + kMaxLag;
87   static const int kCorrelationThreshold = 14746;  // 0.9 in Q14.
88   static constexpr size_t kRefChannel = 0;  // First channel is reference.
89 
90   const int sample_rate_hz_;
91   const int fs_mult_;  // Sample rate multiplier = sample_rate_hz_ / 8000.
92   const size_t num_channels_;
93   const BackgroundNoise& background_noise_;
94   int16_t max_input_value_;
95   int16_t downsampled_input_[kDownsampledLen];
96   // Adding 1 to the size of `auto_correlation_` because of how it is used
97   // by the peak-detection algorithm.
98   int16_t auto_correlation_[kCorrelationLen + 1];
99 
100  private:
101   // Calculates the auto-correlation of `downsampled_input_` and writes the
102   // result to `auto_correlation_`.
103   void AutoCorrelation();
104 
105   // Performs a simple voice-activity detection based on the input parameters.
106   bool SpeechDetection(int32_t vec1_energy,
107                        int32_t vec2_energy,
108                        size_t peak_index,
109                        int scaling) const;
110 };
111 
112 }  // namespace webrtc
113 #endif  // MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_
114