1#!/usr/bin/env python
2# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3#
4# Use of this source code is governed by a BSD-style license
5# that can be found in the LICENSE file in the root of the source
6# tree. An additional intellectual property rights grant can be found
7# in the file PATENTS.  All contributing project authors may
8# be found in the AUTHORS file in the root of the source tree.
9"""Shows boxplots of given score for different values of selected
10parameters. Can be used to compare scores by audioproc_f flag.
11
12Usage: apm_quality_assessment_boxplot.py -o /path/to/output
13                                         -v polqa
14                                         -n /path/to/dir/with/apm_configs
15                                         -z audioproc_f_arg1 [arg2 ...]
16
17Arguments --config_names, --render_names, --echo_simulator_names,
18--test_data_generators, --eval_scores can be used to filter the data
19used for plotting.
20"""
21
22import collections
23import logging
24import matplotlib.pyplot as plt
25import os
26
27import quality_assessment.data_access as data_access
28import quality_assessment.collect_data as collect_data
29
30
31def InstanceArgumentsParser():
32    """Arguments parser factory.
33  """
34    parser = collect_data.InstanceArgumentsParser()
35    parser.description = (
36        'Shows boxplot of given score for different values of selected'
37        'parameters. Can be used to compare scores by audioproc_f flag')
38
39    parser.add_argument('-v',
40                        '--eval_score',
41                        required=True,
42                        help=('Score name for constructing boxplots'))
43
44    parser.add_argument(
45        '-n',
46        '--config_dir',
47        required=False,
48        help=('path to the folder with the configuration files'),
49        default='apm_configs')
50
51    parser.add_argument('-z',
52                        '--params_to_plot',
53                        required=True,
54                        nargs='+',
55                        help=('audioproc_f parameter values'
56                              'by which to group scores (no leading dash)'))
57
58    return parser
59
60
61def FilterScoresByParams(data_frame, filter_params, score_name, config_dir):
62    """Filters data on the values of one or more parameters.
63
64  Args:
65    data_frame: pandas.DataFrame of all used input data.
66
67    filter_params: each config of the input data is assumed to have
68      exactly one parameter from `filter_params` defined. Every value
69      of the parameters in `filter_params` is a key in the returned
70      dict; the associated value is all cells of the data with that
71      value of the parameter.
72
73    score_name: Name of score which value is boxplotted. Currently cannot do
74      more than one value.
75
76    config_dir: path to dir with APM configs.
77
78  Returns: dictionary, key is a param value, result is all scores for
79    that param value (see `filter_params` for explanation).
80  """
81    results = collections.defaultdict(dict)
82    config_names = data_frame['apm_config'].drop_duplicates().values.tolist()
83
84    for config_name in config_names:
85        config_json = data_access.AudioProcConfigFile.Load(
86            os.path.join(config_dir, config_name + '.json'))
87        data_with_config = data_frame[data_frame.apm_config == config_name]
88        data_cell_scores = data_with_config[data_with_config.eval_score_name ==
89                                            score_name]
90
91        # Exactly one of `params_to_plot` must match:
92        (matching_param, ) = [
93            x for x in filter_params if '-' + x in config_json
94        ]
95
96        # Add scores for every track to the result.
97        for capture_name in data_cell_scores.capture:
98            result_score = float(data_cell_scores[data_cell_scores.capture ==
99                                                  capture_name].score)
100            config_dict = results[config_json['-' + matching_param]]
101            if capture_name not in config_dict:
102                config_dict[capture_name] = {}
103
104            config_dict[capture_name][matching_param] = result_score
105
106    return results
107
108
109def _FlattenToScoresList(config_param_score_dict):
110    """Extracts a list of scores from input data structure.
111
112  Args:
113    config_param_score_dict: of the form {'capture_name':
114    {'param_name' : score_value,.. } ..}
115
116  Returns: Plain list of all score value present in input data
117    structure
118  """
119    result = []
120    for capture_name in config_param_score_dict:
121        result += list(config_param_score_dict[capture_name].values())
122    return result
123
124
125def main():
126    # Init.
127    # TODO(alessiob): INFO once debugged.
128    logging.basicConfig(level=logging.DEBUG)
129    parser = InstanceArgumentsParser()
130    args = parser.parse_args()
131
132    # Get the scores.
133    src_path = collect_data.ConstructSrcPath(args)
134    logging.debug(src_path)
135    scores_data_frame = collect_data.FindScores(src_path, args)
136
137    # Filter the data by `args.params_to_plot`
138    scores_filtered = FilterScoresByParams(scores_data_frame,
139                                           args.params_to_plot,
140                                           args.eval_score, args.config_dir)
141
142    data_list = sorted(scores_filtered.items())
143    data_values = [_FlattenToScoresList(x) for (_, x) in data_list]
144    data_labels = [x for (x, _) in data_list]
145
146    _, axes = plt.subplots(nrows=1, ncols=1, figsize=(6, 6))
147    axes.boxplot(data_values, labels=data_labels)
148    axes.set_ylabel(args.eval_score)
149    axes.set_xlabel('/'.join(args.params_to_plot))
150    plt.show()
151
152
153if __name__ == "__main__":
154    main()
155