1# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
2#
3# Use of this source code is governed by a BSD-style license
4# that can be found in the LICENSE file in the root of the source
5# tree. An additional intellectual property rights grant can be found
6# in the file PATENTS.  All contributing project authors may
7# be found in the AUTHORS file in the root of the source tree.
8"""Imports a filtered subset of the scores and configurations computed
9by apm_quality_assessment.py into a pandas data frame.
10"""
11
12import argparse
13import glob
14import logging
15import os
16import re
17import sys
18
19try:
20    import pandas as pd
21except ImportError:
22    logging.critical('Cannot import the third-party Python package pandas')
23    sys.exit(1)
24
25from . import data_access as data_access
26from . import simulation as sim
27
28# Compiled regular expressions used to extract score descriptors.
29RE_CONFIG_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixApmConfig() +
30                            r'(.+)')
31RE_CAPTURE_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixCapture() +
32                             r'(.+)')
33RE_RENDER_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixRender() + r'(.+)')
34RE_ECHO_SIM_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixEchoSimulator() +
35                              r'(.+)')
36RE_TEST_DATA_GEN_NAME = re.compile(
37    sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + r'(.+)')
38RE_TEST_DATA_GEN_PARAMS = re.compile(
39    sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + r'(.+)')
40RE_SCORE_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixScore() +
41                           r'(.+)(\..+)')
42
43
44def InstanceArgumentsParser():
45    """Arguments parser factory.
46  """
47    parser = argparse.ArgumentParser(
48        description=('Override this description in a user script by changing'
49                     ' `parser.description` of the returned parser.'))
50
51    parser.add_argument('-o',
52                        '--output_dir',
53                        required=True,
54                        help=('the same base path used with the '
55                              'apm_quality_assessment tool'))
56
57    parser.add_argument(
58        '-c',
59        '--config_names',
60        type=re.compile,
61        help=('regular expression to filter the APM configuration'
62              ' names'))
63
64    parser.add_argument(
65        '-i',
66        '--capture_names',
67        type=re.compile,
68        help=('regular expression to filter the capture signal '
69              'names'))
70
71    parser.add_argument('-r',
72                        '--render_names',
73                        type=re.compile,
74                        help=('regular expression to filter the render signal '
75                              'names'))
76
77    parser.add_argument(
78        '-e',
79        '--echo_simulator_names',
80        type=re.compile,
81        help=('regular expression to filter the echo simulator '
82              'names'))
83
84    parser.add_argument('-t',
85                        '--test_data_generators',
86                        type=re.compile,
87                        help=('regular expression to filter the test data '
88                              'generator names'))
89
90    parser.add_argument(
91        '-s',
92        '--eval_scores',
93        type=re.compile,
94        help=('regular expression to filter the evaluation score '
95              'names'))
96
97    return parser
98
99
100def _GetScoreDescriptors(score_filepath):
101    """Extracts a score descriptor from the given score file path.
102
103  Args:
104    score_filepath: path to the score file.
105
106  Returns:
107    A tuple of strings (APM configuration name, capture audio track name,
108    render audio track name, echo simulator name, test data generator name,
109    test data generator parameters as string, evaluation score name).
110  """
111    fields = score_filepath.split(os.sep)[-7:]
112    extract_name = lambda index, reg_expr: (reg_expr.match(fields[index]).
113                                            groups(0)[0])
114    return (
115        extract_name(0, RE_CONFIG_NAME),
116        extract_name(1, RE_CAPTURE_NAME),
117        extract_name(2, RE_RENDER_NAME),
118        extract_name(3, RE_ECHO_SIM_NAME),
119        extract_name(4, RE_TEST_DATA_GEN_NAME),
120        extract_name(5, RE_TEST_DATA_GEN_PARAMS),
121        extract_name(6, RE_SCORE_NAME),
122    )
123
124
125def _ExcludeScore(config_name, capture_name, render_name, echo_simulator_name,
126                  test_data_gen_name, score_name, args):
127    """Decides whether excluding a score.
128
129  A set of optional regular expressions in args is used to determine if the
130  score should be excluded (depending on its |*_name| descriptors).
131
132  Args:
133    config_name: APM configuration name.
134    capture_name: capture audio track name.
135    render_name: render audio track name.
136    echo_simulator_name: echo simulator name.
137    test_data_gen_name: test data generator name.
138    score_name: evaluation score name.
139    args: parsed arguments.
140
141  Returns:
142    A boolean.
143  """
144    value_regexpr_pairs = [
145        (config_name, args.config_names),
146        (capture_name, args.capture_names),
147        (render_name, args.render_names),
148        (echo_simulator_name, args.echo_simulator_names),
149        (test_data_gen_name, args.test_data_generators),
150        (score_name, args.eval_scores),
151    ]
152
153    # Score accepted if each value matches the corresponding regular expression.
154    for value, regexpr in value_regexpr_pairs:
155        if regexpr is None:
156            continue
157        if not regexpr.match(value):
158            return True
159
160    return False
161
162
163def FindScores(src_path, args):
164    """Given a search path, find scores and return a DataFrame object.
165
166  Args:
167    src_path: Search path pattern.
168    args: parsed arguments.
169
170  Returns:
171    A DataFrame object.
172  """
173    # Get scores.
174    scores = []
175    for score_filepath in glob.iglob(src_path):
176        # Extract score descriptor fields from the path.
177        (config_name, capture_name, render_name, echo_simulator_name,
178         test_data_gen_name, test_data_gen_params,
179         score_name) = _GetScoreDescriptors(score_filepath)
180
181        # Ignore the score if required.
182        if _ExcludeScore(config_name, capture_name, render_name,
183                         echo_simulator_name, test_data_gen_name, score_name,
184                         args):
185            logging.info('ignored score: %s %s %s %s %s %s', config_name,
186                         capture_name, render_name, echo_simulator_name,
187                         test_data_gen_name, score_name)
188            continue
189
190        # Read metadata and score.
191        metadata = data_access.Metadata.LoadAudioTestDataPaths(
192            os.path.split(score_filepath)[0])
193        score = data_access.ScoreFile.Load(score_filepath)
194
195        # Add a score with its descriptor fields.
196        scores.append((
197            metadata['clean_capture_input_filepath'],
198            metadata['echo_free_capture_filepath'],
199            metadata['echo_filepath'],
200            metadata['render_filepath'],
201            metadata['capture_filepath'],
202            metadata['apm_output_filepath'],
203            metadata['apm_reference_filepath'],
204            config_name,
205            capture_name,
206            render_name,
207            echo_simulator_name,
208            test_data_gen_name,
209            test_data_gen_params,
210            score_name,
211            score,
212        ))
213
214    return pd.DataFrame(data=scores,
215                        columns=(
216                            'clean_capture_input_filepath',
217                            'echo_free_capture_filepath',
218                            'echo_filepath',
219                            'render_filepath',
220                            'capture_filepath',
221                            'apm_output_filepath',
222                            'apm_reference_filepath',
223                            'apm_config',
224                            'capture',
225                            'render',
226                            'echo_simulator',
227                            'test_data_gen',
228                            'test_data_gen_params',
229                            'eval_score_name',
230                            'score',
231                        ))
232
233
234def ConstructSrcPath(args):
235    return os.path.join(
236        args.output_dir,
237        sim.ApmModuleSimulator.GetPrefixApmConfig() + '*',
238        sim.ApmModuleSimulator.GetPrefixCapture() + '*',
239        sim.ApmModuleSimulator.GetPrefixRender() + '*',
240        sim.ApmModuleSimulator.GetPrefixEchoSimulator() + '*',
241        sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + '*',
242        sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + '*',
243        sim.ApmModuleSimulator.GetPrefixScore() + '*')
244