xref: /aosp_15_r20/external/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_
12 #define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_
13 
14 #include <stddef.h>
15 
16 #include <map>
17 #include <memory>
18 #include <set>
19 #include <string>
20 #include <utility>
21 #include <vector>
22 
23 #include "absl/strings/string_view.h"
24 #include "api/array_view.h"
25 #include "modules/audio_processing/test/conversational_speech/timing.h"
26 #include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h"
27 #include "modules/audio_processing/test/conversational_speech/wavreader_interface.h"
28 
29 namespace webrtc {
30 namespace test {
31 namespace conversational_speech {
32 
33 class MultiEndCall {
34  public:
35   struct SpeakingTurn {
36     // Constructor required in order to use std::vector::emplace_back().
SpeakingTurnSpeakingTurn37     SpeakingTurn(absl::string_view new_speaker_name,
38                  absl::string_view new_audiotrack_file_name,
39                  size_t new_begin,
40                  size_t new_end,
41                  int gain)
42         : speaker_name(new_speaker_name),
43           audiotrack_file_name(new_audiotrack_file_name),
44           begin(new_begin),
45           end(new_end),
46           gain(gain) {}
47     std::string speaker_name;
48     std::string audiotrack_file_name;
49     size_t begin;
50     size_t end;
51     int gain;
52   };
53 
54   MultiEndCall(
55       rtc::ArrayView<const Turn> timing,
56       absl::string_view audiotracks_path,
57       std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory);
58   ~MultiEndCall();
59 
60   MultiEndCall(const MultiEndCall&) = delete;
61   MultiEndCall& operator=(const MultiEndCall&) = delete;
62 
speaker_names()63   const std::set<std::string>& speaker_names() const { return speaker_names_; }
64   const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
audiotrack_readers()65   audiotrack_readers() const {
66     return audiotrack_readers_;
67   }
valid()68   bool valid() const { return valid_; }
sample_rate()69   int sample_rate() const { return sample_rate_hz_; }
total_duration_samples()70   size_t total_duration_samples() const { return total_duration_samples_; }
speaking_turns()71   const std::vector<SpeakingTurn>& speaking_turns() const {
72     return speaking_turns_;
73   }
74 
75  private:
76   // Finds unique speaker names.
77   void FindSpeakerNames();
78 
79   // Creates one WavReader instance for each unique audiotrack. It returns false
80   // if the audio tracks do not have the same sample rate or if they are not
81   // mono.
82   bool CreateAudioTrackReaders();
83 
84   // Validates the speaking turns timing information. Accepts cross-talk, but
85   // only up to 2 speakers. Rejects unordered turns and self cross-talk.
86   bool CheckTiming();
87 
88   rtc::ArrayView<const Turn> timing_;
89   std::string audiotracks_path_;
90   std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory_;
91   std::set<std::string> speaker_names_;
92   std::map<std::string, std::unique_ptr<WavReaderInterface>>
93       audiotrack_readers_;
94   bool valid_;
95   int sample_rate_hz_;
96   size_t total_duration_samples_;
97   std::vector<SpeakingTurn> speaking_turns_;
98 };
99 
100 }  // namespace conversational_speech
101 }  // namespace test
102 }  // namespace webrtc
103 
104 #endif  // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_
105