xref: /aosp_15_r20/external/toolchain-utils/crosperf/results_organizer.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1*760c253cSXin Li# -*- coding: utf-8 -*-
2*760c253cSXin Li# Copyright 2013 The ChromiumOS Authors
3*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be
4*760c253cSXin Li# found in the LICENSE file.
5*760c253cSXin Li
6*760c253cSXin Li"""Parse data from benchmark_runs for tabulator."""
7*760c253cSXin Li
8*760c253cSXin Li
9*760c253cSXin Liimport errno
10*760c253cSXin Liimport json
11*760c253cSXin Liimport os
12*760c253cSXin Liimport re
13*760c253cSXin Liimport sys
14*760c253cSXin Li
15*760c253cSXin Lifrom cros_utils import misc
16*760c253cSXin Li
17*760c253cSXin Li
18*760c253cSXin Li_TELEMETRY_RESULT_DEFAULTS_FILE = "default-telemetry-results.json"
19*760c253cSXin Li_DUP_KEY_REGEX = re.compile(r"(\w+)\{(\d+)\}")
20*760c253cSXin Li
21*760c253cSXin Li
22*760c253cSXin Lidef _AdjustIteration(benchmarks, max_dup, bench):
23*760c253cSXin Li    """Adjust the interation numbers if they have keys like ABCD{i}."""
24*760c253cSXin Li    for benchmark in benchmarks:
25*760c253cSXin Li        if benchmark.name != bench or benchmark.iteration_adjusted:
26*760c253cSXin Li            continue
27*760c253cSXin Li        benchmark.iteration_adjusted = True
28*760c253cSXin Li        benchmark.iterations *= max_dup + 1
29*760c253cSXin Li
30*760c253cSXin Li
31*760c253cSXin Lidef _GetMaxDup(data):
32*760c253cSXin Li    """Find the maximum i inside ABCD{i}.
33*760c253cSXin Li
34*760c253cSXin Li    data should be a [[[Key]]], where Key is a string that may look like
35*760c253cSXin Li    ABCD{i}.
36*760c253cSXin Li    """
37*760c253cSXin Li    max_dup = 0
38*760c253cSXin Li    for label in data:
39*760c253cSXin Li        for run in label:
40*760c253cSXin Li            for key in run:
41*760c253cSXin Li                match = _DUP_KEY_REGEX.match(key)
42*760c253cSXin Li                if match:
43*760c253cSXin Li                    max_dup = max(max_dup, int(match.group(2)))
44*760c253cSXin Li    return max_dup
45*760c253cSXin Li
46*760c253cSXin Li
47*760c253cSXin Lidef _Repeat(func, times):
48*760c253cSXin Li    """Returns the result of running func() n times."""
49*760c253cSXin Li    return [func() for _ in range(times)]
50*760c253cSXin Li
51*760c253cSXin Li
52*760c253cSXin Lidef _DictWithReturnValues(retval, pass_fail):
53*760c253cSXin Li    """Create a new dictionary pre-populated with success/fail values."""
54*760c253cSXin Li    new_dict = {}
55*760c253cSXin Li    # Note: 0 is a valid retval; test to make sure it's not None.
56*760c253cSXin Li    if retval is not None:
57*760c253cSXin Li        new_dict["retval"] = retval
58*760c253cSXin Li    if pass_fail:
59*760c253cSXin Li        new_dict[""] = pass_fail
60*760c253cSXin Li    return new_dict
61*760c253cSXin Li
62*760c253cSXin Li
63*760c253cSXin Lidef _GetNonDupLabel(max_dup, runs):
64*760c253cSXin Li    """Create new list for the runs of the same label.
65*760c253cSXin Li
66*760c253cSXin Li    Specifically, this will split out keys like foo{0}, foo{1} from one run into
67*760c253cSXin Li    their own runs. For example, given a run like:
68*760c253cSXin Li      {"foo": 1, "bar{0}": 2, "baz": 3, "qux{1}": 4, "pirate{0}": 5}
69*760c253cSXin Li
70*760c253cSXin Li    You'll get:
71*760c253cSXin Li      [{"foo": 1, "baz": 3}, {"bar": 2, "pirate": 5}, {"qux": 4}]
72*760c253cSXin Li
73*760c253cSXin Li    Hands back the lists of transformed runs, all concatenated together.
74*760c253cSXin Li    """
75*760c253cSXin Li    new_runs = []
76*760c253cSXin Li    for run in runs:
77*760c253cSXin Li        run_retval = run.get("retval", None)
78*760c253cSXin Li        run_pass_fail = run.get("", None)
79*760c253cSXin Li        new_run = {}
80*760c253cSXin Li        # pylint: disable=cell-var-from-loop
81*760c253cSXin Li        added_runs = _Repeat(
82*760c253cSXin Li            lambda: _DictWithReturnValues(run_retval, run_pass_fail), max_dup
83*760c253cSXin Li        )
84*760c253cSXin Li        for key, value in run.items():
85*760c253cSXin Li            match = _DUP_KEY_REGEX.match(key)
86*760c253cSXin Li            if not match:
87*760c253cSXin Li                new_run[key] = value
88*760c253cSXin Li            else:
89*760c253cSXin Li                new_key, index_str = match.groups()
90*760c253cSXin Li                added_runs[int(index_str) - 1][new_key] = str(value)
91*760c253cSXin Li        new_runs.append(new_run)
92*760c253cSXin Li        new_runs += added_runs
93*760c253cSXin Li    return new_runs
94*760c253cSXin Li
95*760c253cSXin Li
96*760c253cSXin Lidef _DuplicatePass(result, benchmarks):
97*760c253cSXin Li    """Properly expands keys like `foo{1}` in `result`."""
98*760c253cSXin Li    for bench, data in result.items():
99*760c253cSXin Li        max_dup = _GetMaxDup(data)
100*760c253cSXin Li        # If there's nothing to expand, there's nothing to do.
101*760c253cSXin Li        if not max_dup:
102*760c253cSXin Li            continue
103*760c253cSXin Li        for i, runs in enumerate(data):
104*760c253cSXin Li            data[i] = _GetNonDupLabel(max_dup, runs)
105*760c253cSXin Li        _AdjustIteration(benchmarks, max_dup, bench)
106*760c253cSXin Li
107*760c253cSXin Li
108*760c253cSXin Lidef _ReadSummaryFile(filename):
109*760c253cSXin Li    """Reads the summary file at filename."""
110*760c253cSXin Li    dirname, _ = misc.GetRoot(filename)
111*760c253cSXin Li    fullname = os.path.join(dirname, _TELEMETRY_RESULT_DEFAULTS_FILE)
112*760c253cSXin Li    try:
113*760c253cSXin Li        # Slurp the summary file into a dictionary. The keys in the dictionary are
114*760c253cSXin Li        # the benchmark names. The value for a key is a list containing the names
115*760c253cSXin Li        # of all the result fields that should be returned in a 'default' report.
116*760c253cSXin Li        with open(fullname) as in_file:
117*760c253cSXin Li            return json.load(in_file)
118*760c253cSXin Li    except IOError as e:
119*760c253cSXin Li        # ENOENT means "no such file or directory"
120*760c253cSXin Li        if e.errno == errno.ENOENT:
121*760c253cSXin Li            return {}
122*760c253cSXin Li        raise
123*760c253cSXin Li
124*760c253cSXin Li
125*760c253cSXin Lidef _MakeOrganizeResultOutline(benchmark_runs, labels):
126*760c253cSXin Li    """Creates the "outline" of the OrganizeResults result for a set of runs.
127*760c253cSXin Li
128*760c253cSXin Li    Report generation returns lists of different sizes, depending on the input
129*760c253cSXin Li    data. Depending on the order in which we iterate through said input data, we
130*760c253cSXin Li    may populate the Nth index of a list, then the N-1st, then the N+Mth, ...
131*760c253cSXin Li
132*760c253cSXin Li    It's cleaner to figure out the "skeleton"/"outline" ahead of time, so we don't
133*760c253cSXin Li    have to worry about resizing while computing results.
134*760c253cSXin Li    """
135*760c253cSXin Li    # Count how many iterations exist for each benchmark run.
136*760c253cSXin Li    # We can't simply count up, since we may be given an incomplete set of
137*760c253cSXin Li    # iterations (e.g. [r.iteration for r in benchmark_runs] == [1, 3])
138*760c253cSXin Li    iteration_count = {}
139*760c253cSXin Li    for run in benchmark_runs:
140*760c253cSXin Li        name = run.benchmark.name
141*760c253cSXin Li        old_iterations = iteration_count.get(name, -1)
142*760c253cSXin Li        # N.B. run.iteration starts at 1, not 0.
143*760c253cSXin Li        iteration_count[name] = max(old_iterations, run.iteration)
144*760c253cSXin Li
145*760c253cSXin Li    # Result structure: {benchmark_name: [[{key: val}]]}
146*760c253cSXin Li    result = {}
147*760c253cSXin Li    for run in benchmark_runs:
148*760c253cSXin Li        name = run.benchmark.name
149*760c253cSXin Li        num_iterations = iteration_count[name]
150*760c253cSXin Li        # default param makes cros lint be quiet about defining num_iterations in a
151*760c253cSXin Li        # loop.
152*760c253cSXin Li        make_dicts = lambda n=num_iterations: _Repeat(dict, n)
153*760c253cSXin Li        result[name] = _Repeat(make_dicts, len(labels))
154*760c253cSXin Li    return result
155*760c253cSXin Li
156*760c253cSXin Li
157*760c253cSXin Lidef OrganizeResults(benchmark_runs, labels, benchmarks=None, json_report=False):
158*760c253cSXin Li    """Create a dict from benchmark_runs.
159*760c253cSXin Li
160*760c253cSXin Li    The structure of the output dict is as follows:
161*760c253cSXin Li    {"benchmark_1":[
162*760c253cSXin Li      [{"key1":"v1", "key2":"v2"},{"key1":"v1", "key2","v2"}]
163*760c253cSXin Li      #one label
164*760c253cSXin Li      []
165*760c253cSXin Li      #the other label
166*760c253cSXin Li      ]
167*760c253cSXin Li     "benchmark_2":
168*760c253cSXin Li      [
169*760c253cSXin Li      ]}.
170*760c253cSXin Li    """
171*760c253cSXin Li    result = _MakeOrganizeResultOutline(benchmark_runs, labels)
172*760c253cSXin Li    label_names = [label.name for label in labels]
173*760c253cSXin Li    label_indices = {name: i for i, name in enumerate(label_names)}
174*760c253cSXin Li    summary_file = _ReadSummaryFile(sys.argv[0])
175*760c253cSXin Li
176*760c253cSXin Li    if benchmarks is None:
177*760c253cSXin Li        benchmarks = []
178*760c253cSXin Li
179*760c253cSXin Li    for benchmark_run in benchmark_runs:
180*760c253cSXin Li        if not benchmark_run.result:
181*760c253cSXin Li            continue
182*760c253cSXin Li        benchmark = benchmark_run.benchmark
183*760c253cSXin Li        label_index = label_indices[benchmark_run.label.name]
184*760c253cSXin Li        cur_label_list = result[benchmark.name][label_index]
185*760c253cSXin Li        cur_dict = cur_label_list[benchmark_run.iteration - 1]
186*760c253cSXin Li
187*760c253cSXin Li        show_all_results = json_report or benchmark.show_all_results
188*760c253cSXin Li        if not show_all_results:
189*760c253cSXin Li            summary_list = summary_file.get(benchmark.name)
190*760c253cSXin Li            if summary_list:
191*760c253cSXin Li                for key in benchmark_run.result.keyvals.keys():
192*760c253cSXin Li                    if any(
193*760c253cSXin Li                        key.startswith(added_key)
194*760c253cSXin Li                        for added_key in ["retval", "cpufreq", "cputemp"]
195*760c253cSXin Li                    ):
196*760c253cSXin Li                        summary_list.append(key)
197*760c253cSXin Li            else:
198*760c253cSXin Li                # Did not find test_name in json file; show everything.
199*760c253cSXin Li                show_all_results = True
200*760c253cSXin Li        if benchmark_run.result.cwp_dso:
201*760c253cSXin Li            # If we are in cwp approximation mode, we only care about samples
202*760c253cSXin Li            if "samples" in benchmark_run.result.keyvals:
203*760c253cSXin Li                cur_dict["samples"] = benchmark_run.result.keyvals["samples"]
204*760c253cSXin Li            cur_dict["retval"] = benchmark_run.result.keyvals["retval"]
205*760c253cSXin Li            for key, value in benchmark_run.result.keyvals.items():
206*760c253cSXin Li                if any(
207*760c253cSXin Li                    key.startswith(cpustat_keyword)
208*760c253cSXin Li                    for cpustat_keyword in ["cpufreq", "cputemp"]
209*760c253cSXin Li                ):
210*760c253cSXin Li                    cur_dict[key] = value
211*760c253cSXin Li        else:
212*760c253cSXin Li            for test_key in benchmark_run.result.keyvals:
213*760c253cSXin Li                if show_all_results or test_key in summary_list:
214*760c253cSXin Li                    cur_dict[test_key] = benchmark_run.result.keyvals[test_key]
215*760c253cSXin Li        # Occasionally Telemetry tests will not fail but they will not return a
216*760c253cSXin Li        # result, either.  Look for those cases, and force them to be a fail.
217*760c253cSXin Li        # (This can happen if, for example, the test has been disabled.)
218*760c253cSXin Li        if len(cur_dict) == 1 and cur_dict["retval"] == 0:
219*760c253cSXin Li            cur_dict["retval"] = 1
220*760c253cSXin Li            benchmark_run.result.keyvals["retval"] = 1
221*760c253cSXin Li            # TODO: This output should be sent via logger.
222*760c253cSXin Li            print(
223*760c253cSXin Li                "WARNING: Test '%s' appears to have succeeded but returned"
224*760c253cSXin Li                " no results." % benchmark.name,
225*760c253cSXin Li                file=sys.stderr,
226*760c253cSXin Li            )
227*760c253cSXin Li        if json_report and benchmark_run.machine:
228*760c253cSXin Li            cur_dict["machine"] = benchmark_run.machine.name
229*760c253cSXin Li            cur_dict["machine_checksum"] = benchmark_run.machine.checksum
230*760c253cSXin Li            cur_dict["machine_string"] = benchmark_run.machine.checksum_string
231*760c253cSXin Li    _DuplicatePass(result, benchmarks)
232*760c253cSXin Li    return result
233