1 /* 2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ 12 #define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ 13 14 #include <cstddef> 15 16 namespace webrtc { 17 18 // Detects transients in an audio stream and suppress them using a simple 19 // restoration algorithm that attenuates unexpected spikes in the spectrum. 20 class TransientSuppressor { 21 public: 22 // Type of VAD used by the caller to compute the `voice_probability` argument 23 // `Suppress()`. 24 enum class VadMode { 25 // By default, `TransientSuppressor` assumes that `voice_probability` is 26 // computed by `AgcManagerDirect`. 27 kDefault = 0, 28 // Use this mode when `TransientSuppressor` must assume that 29 // `voice_probability` is computed by the RNN VAD. 30 kRnnVad, 31 // Use this mode to let `TransientSuppressor::Suppressor()` ignore 32 // `voice_probability` and behave as if voice information is unavailable 33 // (regardless of the passed value). 34 kNoVad, 35 }; 36 ~TransientSuppressor()37 virtual ~TransientSuppressor() {} 38 39 virtual void Initialize(int sample_rate_hz, 40 int detector_rate_hz, 41 int num_channels) = 0; 42 43 // Processes a `data` chunk, and returns it with keystrokes suppressed from 44 // it. The float format is assumed to be int16 ranged. If there are more than 45 // one channel, the chunks are concatenated one after the other in `data`. 46 // `data_length` must be equal to `data_length_`. 47 // `num_channels` must be equal to `num_channels_`. 48 // A sub-band, ideally the higher, can be used as `detection_data`. If it is 49 // NULL, `data` is used for the detection too. The `detection_data` is always 50 // assumed mono. 51 // If a reference signal (e.g. keyboard microphone) is available, it can be 52 // passed in as `reference_data`. It is assumed mono and must have the same 53 // length as `data`. NULL is accepted if unavailable. 54 // This suppressor performs better if voice information is available. 55 // `voice_probability` is the probability of voice being present in this chunk 56 // of audio. If voice information is not available, `voice_probability` must 57 // always be set to 1. 58 // `key_pressed` determines if a key was pressed on this audio chunk. 59 // Returns a delayed version of `voice_probability` according to the 60 // algorithmic delay introduced by this method. In this way, the modified 61 // `data` and the returned voice probability will be temporally aligned. 62 virtual float Suppress(float* data, 63 size_t data_length, 64 int num_channels, 65 const float* detection_data, 66 size_t detection_length, 67 const float* reference_data, 68 size_t reference_length, 69 float voice_probability, 70 bool key_pressed) = 0; 71 }; 72 73 } // namespace webrtc 74 75 #endif // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ 76