xref: /aosp_15_r20/external/webrtc/modules/audio_processing/transient/transient_suppression_test.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 
15 #include <memory>
16 #include <string>
17 #include <vector>
18 
19 #include "absl/flags/flag.h"
20 #include "absl/flags/parse.h"
21 #include "common_audio/include/audio_util.h"
22 #include "modules/audio_processing/agc/agc.h"
23 #include "modules/audio_processing/transient/transient_suppressor.h"
24 #include "modules/audio_processing/transient/transient_suppressor_impl.h"
25 #include "test/gtest.h"
26 #include "test/testsupport/file_utils.h"
27 
28 ABSL_FLAG(std::string, in_file_name, "", "PCM file that contains the signal.");
29 ABSL_FLAG(std::string,
30           detection_file_name,
31           "",
32           "PCM file that contains the detection signal.");
33 ABSL_FLAG(std::string,
34           reference_file_name,
35           "",
36           "PCM file that contains the reference signal.");
37 
38 ABSL_FLAG(int,
39           chunk_size_ms,
40           10,
41           "Time between each chunk of samples in milliseconds.");
42 
43 ABSL_FLAG(int,
44           sample_rate_hz,
45           16000,
46           "Sampling frequency of the signal in Hertz.");
47 ABSL_FLAG(int,
48           detection_rate_hz,
49           0,
50           "Sampling frequency of the detection signal in Hertz.");
51 
52 ABSL_FLAG(int, num_channels, 1, "Number of channels.");
53 
54 namespace webrtc {
55 
56 const char kUsage[] =
57     "\nDetects and suppresses transients from file.\n\n"
58     "This application loads the signal from the in_file_name with a specific\n"
59     "num_channels and sample_rate_hz, the detection signal from the\n"
60     "detection_file_name with a specific detection_rate_hz, and the reference\n"
61     "signal from the reference_file_name with sample_rate_hz, divides them\n"
62     "into chunk_size_ms blocks, computes its voice value and depending on the\n"
63     "voice_threshold does the respective restoration. You can always get the\n"
64     "all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n"
65     "1 respectively.\n\n";
66 
67 // Read next buffers from the test files (signed 16-bit host-endian PCM
68 // format). audio_buffer has int16 samples, detection_buffer has float samples
69 // with range [-32768,32767], and reference_buffer has float samples with range
70 // [-1,1]. Return true iff all the buffers were filled completely.
ReadBuffers(FILE * in_file,size_t audio_buffer_size,int num_channels,int16_t * audio_buffer,FILE * detection_file,size_t detection_buffer_size,float * detection_buffer,FILE * reference_file,float * reference_buffer)71 bool ReadBuffers(FILE* in_file,
72                  size_t audio_buffer_size,
73                  int num_channels,
74                  int16_t* audio_buffer,
75                  FILE* detection_file,
76                  size_t detection_buffer_size,
77                  float* detection_buffer,
78                  FILE* reference_file,
79                  float* reference_buffer) {
80   std::unique_ptr<int16_t[]> tmpbuf;
81   int16_t* read_ptr = audio_buffer;
82   if (num_channels > 1) {
83     tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]);
84     read_ptr = tmpbuf.get();
85   }
86   if (fread(read_ptr, sizeof(*read_ptr), num_channels * audio_buffer_size,
87             in_file) != num_channels * audio_buffer_size) {
88     return false;
89   }
90   // De-interleave.
91   if (num_channels > 1) {
92     for (int i = 0; i < num_channels; ++i) {
93       for (size_t j = 0; j < audio_buffer_size; ++j) {
94         audio_buffer[i * audio_buffer_size + j] =
95             read_ptr[i + j * num_channels];
96       }
97     }
98   }
99   if (detection_file) {
100     std::unique_ptr<int16_t[]> ibuf(new int16_t[detection_buffer_size]);
101     if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size,
102               detection_file) != detection_buffer_size)
103       return false;
104     for (size_t i = 0; i < detection_buffer_size; ++i)
105       detection_buffer[i] = ibuf[i];
106   }
107   if (reference_file) {
108     std::unique_ptr<int16_t[]> ibuf(new int16_t[audio_buffer_size]);
109     if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file) !=
110         audio_buffer_size)
111       return false;
112     S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer);
113   }
114   return true;
115 }
116 
117 // Write a number of samples to an open signed 16-bit host-endian PCM file.
WritePCM(FILE * f,size_t num_samples,int num_channels,const float * buffer)118 static void WritePCM(FILE* f,
119                      size_t num_samples,
120                      int num_channels,
121                      const float* buffer) {
122   std::unique_ptr<int16_t[]> ibuf(new int16_t[num_channels * num_samples]);
123   // Interleave.
124   for (int i = 0; i < num_channels; ++i) {
125     for (size_t j = 0; j < num_samples; ++j) {
126       ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]);
127     }
128   }
129   fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f);
130 }
131 
132 // This application tests the transient suppression by providing a processed
133 // PCM file, which has to be listened to in order to evaluate the
134 // performance.
135 // It gets an audio file, and its voice gain information, and the suppressor
136 // process it giving the output file "suppressed_keystrokes.pcm".
void_main()137 void void_main() {
138   // TODO(aluebs): Remove all FileWrappers.
139   // Prepare the input file.
140   FILE* in_file = fopen(absl::GetFlag(FLAGS_in_file_name).c_str(), "rb");
141   ASSERT_TRUE(in_file != NULL);
142 
143   // Prepare the detection file.
144   FILE* detection_file = NULL;
145   if (!absl::GetFlag(FLAGS_detection_file_name).empty()) {
146     detection_file =
147         fopen(absl::GetFlag(FLAGS_detection_file_name).c_str(), "rb");
148   }
149 
150   // Prepare the reference file.
151   FILE* reference_file = NULL;
152   if (!absl::GetFlag(FLAGS_reference_file_name).empty()) {
153     reference_file =
154         fopen(absl::GetFlag(FLAGS_reference_file_name).c_str(), "rb");
155   }
156 
157   // Prepare the output file.
158   std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm";
159   FILE* out_file = fopen(out_file_name.c_str(), "wb");
160   ASSERT_TRUE(out_file != NULL);
161 
162   int detection_rate_hz = absl::GetFlag(FLAGS_detection_rate_hz);
163   if (detection_rate_hz == 0) {
164     detection_rate_hz = absl::GetFlag(FLAGS_sample_rate_hz);
165   }
166 
167   Agc agc;
168 
169   TransientSuppressorImpl suppressor(TransientSuppressor::VadMode::kDefault,
170                                      absl::GetFlag(FLAGS_sample_rate_hz),
171                                      detection_rate_hz,
172                                      absl::GetFlag(FLAGS_num_channels));
173 
174   const size_t audio_buffer_size = absl::GetFlag(FLAGS_chunk_size_ms) *
175                                    absl::GetFlag(FLAGS_sample_rate_hz) / 1000;
176   const size_t detection_buffer_size =
177       absl::GetFlag(FLAGS_chunk_size_ms) * detection_rate_hz / 1000;
178 
179   // int16 and float variants of the same data.
180   std::unique_ptr<int16_t[]> audio_buffer_i(
181       new int16_t[absl::GetFlag(FLAGS_num_channels) * audio_buffer_size]);
182   std::unique_ptr<float[]> audio_buffer_f(
183       new float[absl::GetFlag(FLAGS_num_channels) * audio_buffer_size]);
184 
185   std::unique_ptr<float[]> detection_buffer, reference_buffer;
186 
187   if (detection_file)
188     detection_buffer.reset(new float[detection_buffer_size]);
189   if (reference_file)
190     reference_buffer.reset(new float[audio_buffer_size]);
191 
192   while (ReadBuffers(
193       in_file, audio_buffer_size, absl::GetFlag(FLAGS_num_channels),
194       audio_buffer_i.get(), detection_file, detection_buffer_size,
195       detection_buffer.get(), reference_file, reference_buffer.get())) {
196     agc.Process({audio_buffer_i.get(), audio_buffer_size});
197 
198     for (size_t i = 0;
199          i < absl::GetFlag(FLAGS_num_channels) * audio_buffer_size; ++i) {
200       audio_buffer_f[i] = audio_buffer_i[i];
201     }
202 
203     suppressor.Suppress(audio_buffer_f.get(), audio_buffer_size,
204                         absl::GetFlag(FLAGS_num_channels),
205                         detection_buffer.get(), detection_buffer_size,
206                         reference_buffer.get(), audio_buffer_size,
207                         agc.voice_probability(), true);
208 
209     // Write result to out file.
210     WritePCM(out_file, audio_buffer_size, absl::GetFlag(FLAGS_num_channels),
211              audio_buffer_f.get());
212   }
213 
214   fclose(in_file);
215   if (detection_file) {
216     fclose(detection_file);
217   }
218   if (reference_file) {
219     fclose(reference_file);
220   }
221   fclose(out_file);
222 }
223 
224 }  // namespace webrtc
225 
main(int argc,char * argv[])226 int main(int argc, char* argv[]) {
227   std::vector<char*> args = absl::ParseCommandLine(argc, argv);
228   if (args.size() != 1) {
229     printf("%s", webrtc::kUsage);
230     return 1;
231   }
232   RTC_CHECK_GT(absl::GetFlag(FLAGS_chunk_size_ms), 0);
233   RTC_CHECK_GT(absl::GetFlag(FLAGS_sample_rate_hz), 0);
234   RTC_CHECK_GT(absl::GetFlag(FLAGS_num_channels), 0);
235 
236   webrtc::void_main();
237   return 0;
238 }
239