xref: /aosp_15_r20/external/webrtc/modules/audio_processing/transient/transient_suppressor.h (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
12 #define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
13 
14 #include <cstddef>
15 
16 namespace webrtc {
17 
18 // Detects transients in an audio stream and suppress them using a simple
19 // restoration algorithm that attenuates unexpected spikes in the spectrum.
20 class TransientSuppressor {
21  public:
22   // Type of VAD used by the caller to compute the `voice_probability` argument
23   // `Suppress()`.
24   enum class VadMode {
25     // By default, `TransientSuppressor` assumes that `voice_probability` is
26     // computed by `AgcManagerDirect`.
27     kDefault = 0,
28     // Use this mode when `TransientSuppressor` must assume that
29     // `voice_probability` is computed by the RNN VAD.
30     kRnnVad,
31     // Use this mode to let `TransientSuppressor::Suppressor()` ignore
32     // `voice_probability` and behave as if voice information is unavailable
33     // (regardless of the passed value).
34     kNoVad,
35   };
36 
~TransientSuppressor()37   virtual ~TransientSuppressor() {}
38 
39   virtual void Initialize(int sample_rate_hz,
40                           int detector_rate_hz,
41                           int num_channels) = 0;
42 
43   // Processes a `data` chunk, and returns it with keystrokes suppressed from
44   // it. The float format is assumed to be int16 ranged. If there are more than
45   // one channel, the chunks are concatenated one after the other in `data`.
46   // `data_length` must be equal to `data_length_`.
47   // `num_channels` must be equal to `num_channels_`.
48   // A sub-band, ideally the higher, can be used as `detection_data`. If it is
49   // NULL, `data` is used for the detection too. The `detection_data` is always
50   // assumed mono.
51   // If a reference signal (e.g. keyboard microphone) is available, it can be
52   // passed in as `reference_data`. It is assumed mono and must have the same
53   // length as `data`. NULL is accepted if unavailable.
54   // This suppressor performs better if voice information is available.
55   // `voice_probability` is the probability of voice being present in this chunk
56   // of audio. If voice information is not available, `voice_probability` must
57   // always be set to 1.
58   // `key_pressed` determines if a key was pressed on this audio chunk.
59   // Returns a delayed version of `voice_probability` according to the
60   // algorithmic delay introduced by this method. In this way, the modified
61   // `data` and the returned voice probability will be temporally aligned.
62   virtual float Suppress(float* data,
63                          size_t data_length,
64                          int num_channels,
65                          const float* detection_data,
66                          size_t detection_length,
67                          const float* reference_data,
68                          size_t reference_length,
69                          float voice_probability,
70                          bool key_pressed) = 0;
71 };
72 
73 }  // namespace webrtc
74 
75 #endif  // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
76